X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLeiSearch.pm;h=ceb3624b5e46c59d620191b412e5574841edc6bf;hb=fbc11e24a72f41b0ed7ead30d199288a4d674be4;hp=66c16e0485fd611f7dbad6186301bb920426d8b5;hpb=ba135f3e25bf5d1b3aa3d34e31fefb55ee4c8d29;p=public-inbox.git diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 66c16e04..ceb3624b 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -1,39 +1,74 @@ -# Copyright (C) 2020 all contributors +# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ +# read-only counterpart for PublicInbox::LeiStore package PublicInbox::LeiSearch; use strict; use v5.10.1; use parent qw(PublicInbox::ExtSearch); -use PublicInbox::Search; +use PublicInbox::Search qw(xap_terms); +use PublicInbox::ContentHash qw(content_digest content_hash); +use PublicInbox::MID qw(mids mids_in); -sub combined_docid ($$) { +# get combined docid from over.num: +# (not generic Xapian, only works with our sharding scheme) +sub num2docid ($$) { my ($self, $num) = @_; - ($num - 1) * $self->{nshard} + 1; + my $nshard = $self->{nshard}; + ($num - 1) * $nshard + $num % $nshard + 1; } sub msg_keywords { my ($self, $num) = @_; # num_or_mitem my $xdb = $self->xdb; # set {nshard}; - my $docid = ref($num) ? $num->get_docid : do { - # get combined docid from over.num: - # (not generic Xapian, only works with our sharding scheme) - my $nshard = $self->{nshard}; - ($num - 1) * $nshard + $num % $nshard + 1; - }; - my %kw; - eval { - my $end = $xdb->termlist_end($docid); - my $cur = $xdb->termlist_begin($docid); - for (; $cur != $end; $cur++) { - $cur->skip_to('K'); - last if $cur == $end; - my $kw = $cur->get_termname; - $kw =~ s/\AK//s and $kw{$kw} = undef; - } - }; + my $docid = ref($num) ? $num->get_docid : num2docid($self, $num); + my $kw = xap_terms('K', $xdb, $docid); warn "E: #$docid ($num): $@\n" if $@; - wantarray ? sort(keys(%kw)) : \%kw; + wantarray ? sort(keys(%$kw)) : $kw; +} + +# when a message has no Message-IDs at all, this is needed for +# unsent Draft messages, at least +sub content_key ($) { + my ($eml) = @_; + my $dig = content_digest($eml); + my $chash = $dig->clone->digest; + my $mids = mids_in($eml, + qw(Message-ID X-Alt-Message-ID Resent-Message-ID)); + unless (@$mids) { + $eml->{-lei_fake_mid} = $mids->[0] = + PublicInbox::Import::digest2mid($dig, $eml); + } + ($chash, $mids); +} + +sub _cmp_1st { # git->cat_async callback + my ($bref, $oid, $type, $size, $cmp) = @_; # cmp: [chash, found, smsg] + return if defined($cmp->[1]->[0]); # $found->[0] + if (content_hash(PublicInbox::Eml->new($bref)) eq $cmp->[0]) { + push @{$cmp->[1]}, $cmp->[2]->{num}; + } +} + +# returns true if $eml is indexed by lei/store and keywords don't match +sub kw_changed { + my ($self, $eml, $new_kw_sorted) = @_; + my ($chash, $mids) = content_key($eml); + my $over = $self->over; + my $git = $self->git; + my $found = []; + for my $mid (@$mids) { + my ($id, $prev); + while (my $cur = $over->next_by_mid($mid, \$id, \$prev)) { + $git->cat_async($cur->{blob}, \&_cmp_1st, + [ $chash, $found, $cur ]); + last if scalar(@$found); + } + } + $git->cat_async_wait; + my $num = $found->[0] // return; + my @cur_kw = msg_keywords($self, $num); + join("\0", @$new_kw_sorted) eq join("\0", @cur_kw) ? 0 : 1; } 1;