X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearchIdx.pm;h=1b86f7278e785ecd88dbb5d83eafd33adb0d656b;hb=1b7e935ab1690e28;hp=fd48169d4d7bb73170906f3e007c56da6659848b;hpb=789ac5b0119d8d878380bbe8b3de4c7630460148;p=public-inbox.git diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index fd48169d..1b86f727 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -18,7 +18,7 @@ use Carp qw(croak); use POSIX qw(strftime); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn); -require PublicInbox::Git; +use PublicInbox::Git qw(git_unquote); use Compress::Zlib qw(compress); use constant { @@ -29,25 +29,6 @@ use constant { my $xapianlevels = qr/\A(?:full|medium)\z/; -my %GIT_ESC = ( - a => "\a", - b => "\b", - f => "\f", - n => "\n", - r => "\r", - t => "\t", - v => "\013", -); - -sub git_unquote ($) { - my ($s) = @_; - return $s unless ($s =~ /\A"(.*)"\z/); - $s = $1; - $s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g; - $s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge; - $s; -} - sub new { my ($class, $ibx, $creat, $part) = @_; my $levels = qr/\A(?:full|medium|basic)\z/; @@ -282,7 +263,7 @@ sub index_body ($$$) { sub add_xapian ($$$$$) { my ($self, $mime, $num, $oid, $mids, $mid0) = @_; my $smsg = PublicInbox::SearchMsg->new($mime); - my $doc = $smsg->{doc}; + my $doc = Search::Xapian::Document->new; my $subj = $smsg->subject; add_val($doc, PublicInbox::Search::TS(), $smsg->ts); my @ds = gmtime($smsg->ds); @@ -384,6 +365,7 @@ sub find_doc_ids { ($db->postlist_begin($termval), $db->postlist_end($termval)); } +# v1 only sub batch_do { my ($self, $termval, $cb) = @_; my $batch_size = 1000; # don't let @ids grow too large to avoid OOM @@ -398,25 +380,33 @@ sub batch_do { } } +# v1 only, where $mid is unique sub remove_message { my ($self, $mid) = @_; my $db = $self->{xdb}; - my $called; $mid = mid_clean($mid); - my $over = $self->{over}; + if (my $over = $self->{over}) { + my $nr = eval { $over->remove_oid(undef, $mid) }; + if ($@) { + warn "failed to remove <$mid> from overview: $@\n"; + } elsif ($nr == 0) { + warn "<$mid> missing for removal from overview\n"; + } + } + return if $self->{indexlevel} !~ $xapianlevels; + my $nr = 0; eval { batch_do($self, 'Q' . $mid, sub { my ($ids) = @_; $db->delete_document($_) for @$ids; - $over->delete_articles($ids) if $over; - $called = 1; + $nr = scalar @$ids; }); }; if ($@) { - warn "failed to remove message <$mid>: $@\n"; - } elsif (!$called) { - warn "cannot remove non-existent <$mid>\n"; + warn "failed to remove <$mid> from Xapian: $@\n"; + } elsif ($nr == 0) { + warn "<$mid> missing for removal from Xapian\n"; } } @@ -439,8 +429,8 @@ sub remove_by_oid { for (; $head != $tail; $head->inc) { my $docid = $head->get_docid; my $doc = $db->get_document($docid); - my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); - $smsg->load_expand; + my $smsg = PublicInbox::SearchMsg->wrap($mid); + $smsg->load_expand($doc); if ($smsg->{blob} eq $oid) { push(@delete, $docid); } @@ -646,6 +636,7 @@ sub _git_log { --raw -r --no-abbrev/, $range); } +# --is-ancestor requires git 1.8.0+ sub is_ancestor ($$$) { my ($git, $cur, $tip) = @_; return 0 unless $git->check($cur); @@ -666,12 +657,30 @@ sub need_update ($$$) { ($n eq '' || $n > 0); } +# The last git commit we indexed with Xapian or SQLite (msgmap) +# This needs to account for cases where Xapian or SQLite is +# out-of-date with respect to the other. +sub _last_x_commit { + my ($self, $mm) = @_; + my $lm = $mm->last_commit || ''; + my $lx = ''; + if ($self->{indexlevel} =~ $xapianlevels) { + $lx = $self->{xdb}->get_metadata('last_commit') || ''; + } else { + $lx = $lm; + } + # Use last_commit from msgmap if it is older or unset + if (!$lm || ($lx && $lx && is_ancestor($self->{git}, $lm, $lx))) { + $lx = $lm; + } + $lx; +} + # indexes all unindexed messages (v1 only) sub _index_sync { my ($self, $opts) = @_; my $tip = $opts->{ref} || 'HEAD'; - my $reindex = $opts->{reindex}; - my ($mkey, $last_commit, $lx, $xlog); + my ($last_commit, $lx, $xlog); my $git = $self->{git}; $git->batch_prepare; @@ -679,19 +688,8 @@ sub _index_sync { my $mm = _msgmap_init($self); do { $xlog = undef; - $mkey = 'last_commit'; - $last_commit = $xdb->get_metadata('last_commit'); - $lx = $last_commit; - if ($reindex) { - $lx = ''; - $mkey = undef if $last_commit ne ''; - } - - # use last_commit from msgmap if it is older or unset - my $lm = $mm->last_commit || ''; - if (!$lm || ($lm && $lx && is_ancestor($git, $lm, $lx))) { - $lx = $lm; - } + $last_commit = _last_x_commit($self, $mm); + $lx = $opts->{reindex} ? '' : $last_commit; $self->{over}->rollback_lazy; $self->{over}->disconnect; @@ -705,7 +703,7 @@ sub _index_sync { $xlog = _git_log($self, $range); $xdb = $self->begin_txn_lazy; - } while ($xdb->get_metadata('last_commit') ne $last_commit); + } while (_last_x_commit($self, $mm) ne $last_commit); my $dbh = $mm->{dbh} if $mm; my $cb = sub { @@ -719,10 +717,10 @@ sub _index_sync { } $dbh->commit; } - if ($mkey && $newest && $self->{indexlevel} =~ $xapianlevels) { - my $cur = $xdb->get_metadata($mkey); + if ($newest && $self->{indexlevel} =~ $xapianlevels) { + my $cur = $xdb->get_metadata('last_commit'); if (need_update($self, $cur, $newest)) { - $xdb->set_metadata($mkey, $newest); + $xdb->set_metadata('last_commit', $newest); } } $self->commit_txn_lazy;