X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearchIdx.pm;h=aeb363e03d1d57a7ccd3cc7c1ae408cf98d91a05;hb=a46893a2b5dabfdbcf7b593ac19967daecfb1772;hp=6e44887d2651c2b20fef84e8a7d38369badf607e;hpb=9ff904a5b93dcc7989e475e2296ff13fe530d547;p=public-inbox.git diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 6e44887d..aeb363e0 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -22,7 +22,7 @@ require PublicInbox::Git; use Compress::Zlib qw(compress); use constant { - BATCH_BYTES => 10_000_000, + BATCH_BYTES => 1_000_000, DEBUG => !!$ENV{DEBUG}, }; @@ -331,6 +331,13 @@ sub add_message { foreach my $mid (@$mids) { $tg->index_text($mid, 1, 'XM'); + + # because too many Message-IDs are prefixed with + # "Pine.LNX."... + if ($mid =~ /\w{12,}/) { + my @long = ($mid =~ /(\w{3,}+)/g); + $tg->index_text(join(' ', @long), 1, 'XM'); + } $tg->increase_termpos; } $smsg->{to} = $smsg->{cc} = ''; @@ -551,16 +558,11 @@ sub read_log { my %D; my $line; my $newest; - my $mid = '20170114215743.5igbjup6qpsh3jfg@genre.crustytoothpaste.net'; while (defined($line = <$log>)) { if ($line =~ /$addmsg/o) { my $blob = $1; delete $D{$blob} and next; my $mime = do_cat_mail($git, $blob, \$bytes) or next; - my $mids = mids($mime->header_obj); - foreach (@$mids) { - warn "ADD $mid\n" if ($_ eq $mid); - } batch_adjust(\$max, $bytes, $batch_cb, $latest); $add_cb->($self, $mime, $bytes, $blob); } elsif ($line =~ /$delmsg/o) { @@ -574,10 +576,6 @@ sub read_log { # get the leftovers foreach my $blob (keys %D) { my $mime = do_cat_mail($git, $blob, \$bytes) or next; - my $mids = mids($mime->header_obj); - foreach (@$mids) { - warn "DEL $mid\n" if ($_ eq $mid); - } $del_cb->($self, $mime); } $batch_cb->($latest, $newest); @@ -698,6 +696,7 @@ sub _index_sync { } } $self->commit_txn_lazy; + $xdb = _xdb_release($self); # let another process do some work... < if (!$newest) { $xdb = $self->begin_txn_lazy;