$smsg->{blob} = $self->get_mark(":$blob");
$smsg->set_bytes($raw_email, $n);
if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore
- return if $oidx->blob_exists($smsg->{blob});
+ my @docids = $oidx->blob_exists($smsg->{blob});
+ my @vivify_xvmd;
+ for my $id (@docids) {
+ if (my $cur = $oidx->get_art($id)) {
+ # already imported if bytes > 0
+ return if $cur->{bytes} > 0;
+ push @vivify_xvmd, $id;
+ } else {
+ warn "W: $smsg->{blob} ",
+ "#$id gone (bug?)\n";
+ }
+ }
+ $smsg->{-vivify_xvmd} = \@vivify_xvmd;
}
}
my $ref = $self->{ref};
use PublicInbox::PktOp qw(pkt_do);
sub _import_eml { # MboxReader callback
- my ($eml, $sto, $set_kw) = @_;
- $sto->ipc_do('set_eml', $eml, $set_kw ?
- { kw => PublicInbox::MboxReader::mbox_keywords($eml) } : ());
+ my ($eml, $lei, $mbox_keywords) = @_;
+ my $vmd;
+ if ($mbox_keywords) {
+ my $kw = $mbox_keywords->($eml);
+ $vmd = { kw => $kw } if scalar(@$kw);
+ }
+ my $xoids = $lei->{ale}->xoids_for($eml);
+ $lei->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
}
sub import_done_wait { # dwaitpid callback
sub import_start {
my ($lei) = @_;
my $self = $lei->{imp};
+ $lei->ale;
my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
if (my $net = $lei->{net}) {
# $j = $net->net_concurrency($j); TODO
sub _import_fh {
my ($lei, $fh, $input, $ifmt) = @_;
- my $set_kw = $lei->{opt}->{kw};
+ my $kw = $lei->{opt}->{kw} ?
+ PublicInbox::MboxReader->can('mbox_keywords') : undef;
eval {
if ($ifmt eq 'eml') {
my $buf = do { local $/; <$fh> } //
error reading $input: $!
my $eml = PublicInbox::Eml->new(\$buf);
- _import_eml($eml, $lei->{sto}, $set_kw);
+ _import_eml($eml, $lei, $kw);
} else { # some mbox (->can already checked in call);
my $cb = PublicInbox::MboxReader->can($ifmt) //
die "BUG: bad fmt=$ifmt";
- $cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw);
+ $cb->(undef, $fh, \&_import_eml, $lei, $kw);
}
};
$lei->child_error(1 << 8, "$input: $@") if $@;
sub import_stdin {
my ($self) = @_;
my $lei = $self->{lei};
- _import_fh($lei, delete $self->{0}, '<stdin>', $lei->{opt}->{'in-format'});
+ my $in = delete $self->{0};
+ _import_fh($lei, $in, '<stdin>', $lei->{opt}->{'in-format'});
}
no warnings 'once'; # the following works even when LeiAuth is lazy-loaded
}
}
-sub xoids_for { # returns { OID => docid } mapping for $eml matches
+# returns { OID => num } mapping for $eml matches
+# The `num' hash value only makes sense from LeiSearch itself
+# and is nonsense from the PublicInbox::LeiALE subclass
+sub xoids_for {
my ($self, $eml, $min) = @_;
my ($chash, $mids) = content_key($eml);
my @overs = ($self->over // $self->overs_all);
}
sub add_eml {
- my ($self, $eml, $vmd) = @_;
+ my ($self, $eml, $vmd, $xoids) = @_;
my $im = $self->importer; # may create new epoch
my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates
my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this
$im->add($eml, undef, $smsg) or return; # duplicate returns undef
local $self->{current_info} = $smsg->{blob};
- if (my @docids = _docids_for($self, $eml)) {
+ my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches
+ if ($xoids) { # fuzzy matches from externals in ale->xoids_for
+ delete $xoids->{$smsg->{blob}}; # added later
+ if (scalar keys %$xoids) {
+ my %docids = map { $_ => 1 } @$vivify_xvmd;
+ for my $oid (keys %$xoids) {
+ my @id = $oidx->blob_exists($oid);
+ @docids{@id} = @id;
+ }
+ @$vivify_xvmd = sort { $a <=> $b } keys(%docids);
+ }
+ }
+ if (@$vivify_xvmd) {
+ $xoids //= {};
+ $xoids->{$smsg->{blob}} = 1;
+ for my $docid (@$vivify_xvmd) {
+ my $cur = $oidx->get_art($docid);
+ my $idx = $eidx->idx_shard($docid);
+ if (!$cur || $cur->{bytes} == 0) { # really vivifying
+ $smsg->{num} = $docid;
+ $oidx->add_overview($eml, $smsg);
+ $smsg->{-merge_vmd} = 1;
+ $idx->index_eml($eml, $smsg);
+ } else { # lse fuzzy hit off ale
+ $idx->ipc_do('add_eidx_info', $docid, '.', $eml);
+ }
+ for my $oid (keys %$xoids) {
+ $oidx->add_xref3($docid, -1, $oid, '.');
+ }
+ $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
+ }
+ $vivify_xvmd;
+ } elsif (my @docids = _docids_for($self, $eml)) {
+ # fuzzy match from within lei/store
for my $docid (@docids) {
my $idx = $eidx->idx_shard($docid);
$oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
$idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
}
\@docids;
- } else {
+ } else { # totally new message
$smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
$oidx->add_overview($eml, $smsg);
$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
my $idx = $eidx->idx_shard($smsg->{num});
$idx->index_eml($eml, $smsg);
- $idx->ipc_do('add_vmd', $smsg->{num}, $vmd ) if $vmd;
+ $idx->ipc_do('add_vmd', $smsg->{num}, $vmd) if $vmd;
$smsg;
}
}
sub set_eml {
- my ($self, $eml, $vmd) = @_;
- add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd);
+ my ($self, $eml, $vmd, $xoids) = @_;
+ add_eml($self, $eml, $vmd, $xoids) //
+ set_eml_vmd($self, $eml, $vmd);
}
# set or update keywords for external message, called via ipc_do
my ($self, $oidhex) = @_;
if (wantarray) {
my $sth = $self->dbh->prepare_cached(<<'', undef, 1);
-SELECT docid FROM xref3 WHERE oidbin = ?
+SELECT docid FROM xref3 WHERE oidbin = ? ORDER BY docid ASC
$sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
$sth->execute;
use v5.10.1;
use parent qw(PublicInbox::Search PublicInbox::Lock Exporter);
use PublicInbox::Eml;
+use PublicInbox::Search qw(xap_terms);
use PublicInbox::InboxWritable;
use PublicInbox::MID qw(mids_for_index mids);
use PublicInbox::MsgIter;
my $xapianlevels = qr/\A(?:full|medium)\z/;
my $hex = '[a-f0-9]';
my $OID = $hex .'{40,}';
+my @VMD_MAP = (kw => 'K', label => 'L');
our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/;
sub new {
sub add_xapian ($$$$) {
my ($self, $eml, $smsg, $mids) = @_;
begin_txn_lazy($self);
+ my $merge_vmd = delete $smsg->{-merge_vmd};
my $doc = eml2doc($self, $eml, $smsg, $mids);
+ if (my $old = $merge_vmd ? _get_doc($self, $smsg->{num}) : undef) {
+ my @x = @VMD_MAP;
+ while (my ($field, $pfx) = splice(@x, 0, 2)) {
+ my $vals = xap_terms($pfx, $old);
+ $doc->add_boolean_term($pfx.$_) for keys %$vals;
+ }
+ }
$self->{xdb}->replace_document($smsg->{num}, $doc);
}
$self->{xdb}->replace_document($docid, $doc);
}
-my @VMD_MAP = (kw => 'K', label => 'L');
-
sub set_vmd {
my ($self, $docid, $vmd) = @_;
begin_txn_lazy($self);
lei_ok(qw(q --pretty), "m:$m", @inc);
like($lei_out, qr/^ "kw": \["answered", "flagged"\],\n/sm,
'--pretty JSON output shows kw: on one line');
+
+# ensure import on previously external-only message works
+lei_ok('q', "m:$m");
+is_deeply(json_utf8->decode($lei_out), [ undef ],
+ 'to-be-imported message non-existent');
+lei_ok(qw(import -F eml t/x-unknown-alpine.eml));
+is($lei_err, '', 'no errors importing previous external-only message');
+lei_ok('q', "m:$m");
+$res = json_utf8->decode($lei_out);
+is($res->[1], undef, 'got one result');
+is_deeply($res->[0]->{kw}, [ qw(answered flagged) ], 'kw preserved on exact');
+
+# ensure fuzzy match import works, too
+$m = 'multipart@example.com';
+$o = "$ENV{HOME}/fuzz";
+lei_ok('q', '-o', $o, "m:$m", @inc);
+@fn = glob("$o/cur/*");
+scalar(@fn) == 1 or BAIL_OUT "wrote multiple or zero files: ".explain(\@fn);
+rename($fn[0], "$fn[0]S") or BAIL_OUT "rename $!";
+lei_ok('q', '-o', $o, "m:$m");
+is_deeply([glob("$o/cur/*")], [], 'clobbered output results');
+my $eml = eml_load('t/plack-2-txt-bodies.eml');
+$eml->header_set('List-Id', '<list.example.com>');
+my $in = $eml->as_string;
+lei_ok([qw(import -F eml --stdin)], undef, { 0 => \$in, %$lei_opt });
+is($lei_err, '', 'no errors from import');
+lei_ok(qw(q -f mboxrd), "m:$m");
+open $fh, '<', \$lei_out or BAIL_OUT $!;
+my @res;
+PublicInbox::MboxReader->mboxrd($fh, sub { push @res, shift });
+is($res[0]->header('Status'), 'RO', 'seen kw set');
+$res[0]->header_set('Status');
+is_deeply(\@res, [ $eml ], 'imported message matches w/ List-Id');
+
+$eml->header_set('List-Id', '<another.example.com>');
+$in = $eml->as_string;
+lei_ok([qw(import -F eml --stdin)], undef, { 0 => \$in, %$lei_opt });
+is($lei_err, '', 'no errors from 2nd import');
+lei_ok(qw(q -f mboxrd), "m:$m", 'l:another.example.com');
+my @another;
+open $fh, '<', \$lei_out or BAIL_OUT $!;
+PublicInbox::MboxReader->mboxrd($fh, sub { push @another, shift });
+is($another[0]->header('Status'), 'RO', 'seen kw set');
+
}); # test_lei
done_testing;