]> Sergey Matveev's repositories - public-inbox.git/commitdiff
searchidx: xref3 delete support
authorEric Wong <e@80x24.org>
Tue, 27 Oct 2020 07:54:22 +0000 (07:54 +0000)
committerEric Wong <e@80x24.org>
Sat, 7 Nov 2020 10:18:53 +0000 (10:18 +0000)
Not yet tested, but Perl compiles it!

lib/PublicInbox/SearchIdx.pm

index 5171c610da36a28bd2063baf0f87b06520c53a3a..0458d9c3ff18ec1ff24fd2e84735d5ac2d334241 100644 (file)
@@ -472,29 +472,41 @@ sub remove_xref3 {
        begin_txn_lazy($self);
        my $doc = _get_doc($self, $docid, $oid) or return;
        my $xref3 = PublicInbox::Smsg::xref3(undef, $doc);
+       my %x3 = map { $_ => undef } @$xref3;
        for (grep(/\A\Q$eidx_key\E:[0-9]+:\Q$oid\E\z/, @$xref3)) {
+               delete $x3{$_};
                $doc->remove_term('P' . $_);
        }
-       for my $l ($eml->header_raw('List-Id')) {
-               $l =~ /<([^>]+)>/ or next;
-               my $lid = lc $1;
-               $doc->remove_term('G' . $lid);
-
-               # nb: we don't remove the XL probabilistic terms
-               # since terms may overlap if cross-posted.
-               #
-               # IOW, a message which has both <foo.example.com>
-               # and <bar.example.com> would have overlapping
-               # "XLexample" and "XLcom" as terms and which we
-               # wouldn't know if they're safe to remove if we just
-               # unindex <foo.example.com> while preserving
-               # <bar.example.com>.
-               #
-               # In any case, this entire sub is will likely never
-               # be needed and users using the "l:" prefix are probably
-               # rarer.
+       if (scalar(keys(%x3)) == 0) {
+               $self->{xdb}->delete_document($docid);
+               if (my $del_fh = $self->{del_fh}) { # TODO
+                       print $del_fh $docid, "\n" or die "E: print $!";
+               }
+       } else {
+               if (!grep(/\A\Q$eidx_key\E:/, keys %x3)) {
+                       $doc->remove_term('O'.$eidx_key);
+               }
+               for my $l ($eml->header_raw('List-Id')) {
+                       $l =~ /<([^>]+)>/ or next;
+                       my $lid = lc $1;
+                       $doc->remove_term('G' . $lid);
+
+                       # nb: we don't remove the XL probabilistic terms
+                       # since terms may overlap if cross-posted.
+                       #
+                       # IOW, a message which has both <foo.example.com>
+                       # and <bar.example.com> would have overlapping
+                       # "XLexample" and "XLcom" as terms and which we
+                       # wouldn't know if they're safe to remove if we just
+                       # unindex <foo.example.com> while preserving
+                       # <bar.example.com>.
+                       #
+                       # In any case, this entire sub is will likely never
+                       # be needed and users using the "l:" prefix are probably
+                       # rarer.
+               }
+               $self->{xdb}->replace_document($docid, $doc);
        }
-       $self->{xdb}->replace_document($docid, $doc);
 }
 
 sub get_val ($$) {