From 885250c3c289c96764e0eb9f432a389136d07088 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 15 Aug 2015 09:28:31 +0000 Subject: [PATCH] search: implement index_sync to fixup indexer We need to make the indexer executable and installable while we're at it. --- Makefile.PL | 3 ++- lib/PublicInbox/Search.pm | 39 ++++++++++++++++++++++++++++++++++++++- public-inbox-index | 0 3 files changed, 40 insertions(+), 2 deletions(-) mode change 100644 => 100755 public-inbox-index diff --git a/Makefile.PL b/Makefile.PL index 1ee10896..f302b7c0 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -9,7 +9,8 @@ WriteMakefile( AUTHOR => 'Eric Wong ', ABSTRACT => 'public-inbox server infrastructure', EXE_FILES => [qw/public-inbox-mda public-inbox.cgi - public-inbox-learn public-inbox-init/], + public-inbox-learn public-inbox-init + public-inbox-index/], PREREQ_PM => { # note: we use ssoma(1) and spamc(1), NOT the Perl modules # We also depend on git through ssoma. diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index fe4984e5..15bb9f62 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -226,7 +226,6 @@ sub remove_message { } else { $db->commit_transaction; } - $db->commit; $doc_id; } @@ -536,4 +535,42 @@ sub enquire { $self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb}); } +# indexes all unindexed messages +sub index_sync { + my ($self, $git) = @_; + my $db = $self->{xdb}; + my $latest = $db->get_metadata('last_commit'); + my $range = length $latest ? "$latest..HEAD" : 'HEAD'; + $latest = undef; + + my $hex = '[a-f0-9]'; + my $h40 = $hex .'{40}'; + my $addmsg = qr!^:000000 100644 \S+ ($h40) A\t${hex}{2}/${hex}{38}$!; + my $delmsg = qr!^:100644 000000 ($h40) \S+ D\t${hex}{2}/${hex}{38}$!; + + # get indexed messages + my @cmd = ('git', "--git-dir=$git->{git_dir}", "log", + qw/--reverse --no-notes --no-color --raw -r --no-abbrev/, + $range); + + my $pid = open(my $log, '-|', @cmd) or + die('open` '.join(' ', @cmd) . " pipe failed: $!\n"); + my $last; + while (my $line = <$log>) { + if ($line =~ /$addmsg/o) { + $self->index_blob($git, $1); + } elsif ($line =~ /$delmsg/o) { + $self->unindex_blob($git, $1); + } elsif ($line =~ /^commit ($h40)/o) { + my $commit = $1; + if (defined $latest) { + $db->set_metadata('last_commit', $latest) + } + $latest = $commit; + } + } + close $log; + $db->set_metadata('last_commit', $latest) if defined $latest; +} + 1; diff --git a/public-inbox-index b/public-inbox-index old mode 100644 new mode 100755 -- 2.44.0