TODO | 3 --- lib/PublicInbox/Search.pm | 5 +++-- lib/PublicInbox/SearchIdx.pm | 15 +++++++++++++++ t/extsearch.t | 7 ++++++- t/v2mda.t | 10 ++++++++-- diff --git a/TODO b/TODO index 5be4b5e37289dbb2e2bd5d00a248194090c4b733..43eee0638f658aa1ce752b474461ba910c3fc53f 100644 --- a/TODO +++ b/TODO @@ -137,9 +137,6 @@ * make "git cat-file --batch" detect unlinked packfiles so we don't have to restart processes (very long-term) -* support searching based on `git-patch-id --stable` to improve - bidirectional mapping of commits <=> emails - * linter to check validity of config file * linter option and WWW endpoint to graph relationships and flows diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 523003b3c269ae42e9ee210e91415b2846735daa..6f9fdde1bc2276cbc0620c794731f179a00750b1 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files or flags # @@ -118,9 +118,10 @@ lid => 'G', # newsGroup (or similar entity), just inside <> dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', + patchid => 'XDFID', ); -my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST'; +my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID'; my %prob_prefix = ( # for mairix compatibility s => 'S', diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 50e260508a2935dc5ed5bd1e6489cf7fb5be763d..53ec23a5c50c4facfcf1977ea04f2fa4498d8805 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -18,6 +18,7 @@ use PublicInbox::MsgIter; use PublicInbox::IdxStack; use Carp qw(croak carp); use POSIX qw(strftime); +use Fcntl qw(SEEK_SET); use Time::Local qw(timegm); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn); @@ -348,6 +349,20 @@ my ($s, undef) = msg_part_text($part, $ct); defined $s or return; $_[0]->[0] = $part = undef; # free memory + + if ($s =~ /^(?:diff|---|\+\+\+) /ms) { + open(my $fh, '+>:utf8', undef) or die "open: $!"; + open(my $eh, '+>', undef) or die "open: $!"; + $fh->autoflush(1); + print $fh $s or die "print: $!"; + sysseek($fh, 0, SEEK_SET) or die "sysseek: $!"; + my $id = ($self->{ibx} // $self->{eidx})->git->qx( + [qw(patch-id --stable)], + {}, { 0 => $fh, 2 => $eh }); + $id =~ /\A([a-f0-9]{40,})/ and $doc->add_term('XDFID'.$1); + seek($eh, 0, SEEK_SET) or die "seek: $!"; + while (<$eh>) { warn $_ } + } # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); diff --git a/t/extsearch.t b/t/extsearch.t index 09cbdabee6c4f0962936bcf39a6cf2fe5ea275a1..2d7375d63f9a6467f3b98b80da2715589ad4b573 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -314,7 +314,12 @@ my $new = $oidx->get_art($max + 1); is($new->{subject}, $eml->header('Subject'), 'new message added'); $es->{xdb}->reopen; - my $mset = $es->mset("mid:$new->{mid}"); + # git patch-id --stable search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); + + $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 1, 'previously unseen, now indexed in Xapian'); ok($im->remove($eml), 'remove new message from v2 inbox'); diff --git a/t/v2mda.t b/t/v2mda.t index 3dfc569e1dc9b9e7c20a2eeddb6136d045f5c646..8f2f335d96b8d834cfe5ffd38df13dfc6599595a 100644 --- a/t/v2mda.t +++ b/t/v2mda.t @@ -1,7 +1,8 @@ -# Copyright (C) 2018-2021 all contributors +#!perl -w +# Copyright (C) all contributors # License: AGPL-3.0+ +use v5.10.1; use strict; -use warnings; use Test::More; use Fcntl qw(SEEK_SET); use Cwd; @@ -88,6 +89,11 @@ is($pre->size, 1, 'got one result for dfpre'); $pre = $ibx->search->mset_to_smsg($ibx, $pre); $post = $ibx->search->mset_to_smsg($ibx, $post); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases'); + + # git patch-id --stable search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); } done_testing();