X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLeiXSearch.pm;h=5965274ca211396df49e263ec6ade1508e3ba458;hb=refs%2Fheads%2Fmaster;hp=2958d3f910b0986baa2bccf138ee185ae35b8c7f;hpb=b63e13509ce1fb359db4096b33219bcdb978e36d;p=public-inbox.git diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 2958d3f9..5965274c 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # Combine any combination of PublicInbox::Search, @@ -103,13 +103,6 @@ sub smsg_for { $smsg; } -sub recent { - my ($self, $qstr, $opt) = @_; - $opt //= {}; - $opt->{relevance} //= -2; - $self->mset($qstr //= 'z:1..', $opt); -} - sub over {} sub _check_mset_limit ($$$) { @@ -163,8 +156,9 @@ sub mset_progress { } sub l2m_progress { - my ($lei, $nr) = @_; - $lei->{-nr_write} += $nr; + my ($lei, $nr_write, $nr_seen) = @_; + $lei->{-nr_write} += $nr_write; + $lei->{-nr_seen} += $nr_seen; } sub query_one_mset { # for --threads and l2m w/o sort @@ -285,7 +279,7 @@ sub each_remote_eml { # callback for MboxReader->mboxrd my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml); if (ref($res) eq ref($smsg)) { # totally new message $smsg = $res; - $self->{-imported} = 1; + $self->{-sto_imported} = 1; } $smsg->{kw} = $kw; # short-circuit xsmsg_vmd } @@ -376,7 +370,7 @@ sub query_remote_mboxrd { $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self, $lei, $each_smsg); - if ($self->{import_sto} && delete($self->{-imported})) { + if (delete($self->{-sto_imported})) { my $wait = $self->{import_sto}->wq_do('done'); } $reap_curl->join; @@ -406,9 +400,8 @@ sub query_remote_mboxrd { sub git { $_[0]->{git} // die 'BUG: git uninitialized' } -sub xsearch_done_wait { # dwaitpid callback - my ($arg, $pid) = @_; - my ($wq, $lei) = @$arg; +sub xsearch_done_wait { # awaitpid cb + my ($pid, $wq, $lei) = @_; return if !$?; my $s = $? & 127; return $lei->child_error($?) if $s == 13 || $s == 15; @@ -447,13 +440,16 @@ Error closing $lei->{ovv}->{dst}: \$!=$! \$?=$? } if ($lei->{-progress}) { my $tot = $lei->{-mset_total} // 0; - my $nr = $lei->{-nr_write} // 0; + my $nr_w = $lei->{-nr_write} // 0; + my $d = ($lei->{-nr_seen} // 0) - $nr_w; + my $x = "$tot matches"; + $x .= ", $d duplicates" if $d; if ($l2m) { - my $m = "# $nr written to " . - "$lei->{ovv}->{dst} ($tot matches)"; - $nr ? $lei->qfin($m) : $lei->qerr($m); + my $m = "# $nr_w written to " . + "$lei->{ovv}->{dst} ($x)"; + $nr_w ? $lei->qfin($m) : $lei->qerr($m); } else { - $lei->qerr("# $tot matches"); + $lei->qerr("# $x"); } } $lei->start_mua if $l2m && !$l2m->lock_free; @@ -577,15 +573,15 @@ sub do_query { $l2m->{au_peers} = [ $a_r, $a_w, $b_r, $b_w ]; } $l2m->wq_workers_start('lei2mail', undef, - $lei->oldset, { lei => $lei }); - $l2m->wq_wait_async(\&xsearch_done_wait, $lei); + $lei->oldset, { lei => $lei }, + \&xsearch_done_wait, $lei); pipe($lei->{startq}, $lei->{au_done}) or die "pipe: $!"; fcntl($lei->{startq}, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ; delete $l2m->{au_peers}; } $self->wq_workers_start('lei_xsearch', undef, - $lei->oldset, { lei => $lei }); - $self->wq_wait_async(\&xsearch_done_wait, $lei); + $lei->oldset, { lei => $lei }, + \&xsearch_done_wait, $lei); my $op_c = delete $lei->{pkt_op_c}; delete $lei->{pkt_op_p}; @$end = (); @@ -608,34 +604,40 @@ sub add_uri { require IO::Uncompress::Gunzip; require PublicInbox::LeiCurl; push @{$self->{remotes}}, $uri; + $uri; } else { warn "curl missing, ignoring $uri\n"; + undef; } } +# returns URI or PublicInbox::Inbox-like object sub prepare_external { my ($self, $loc, $boost) = @_; # n.b. already ordered by boost if (ref $loc) { # already a URI, or PublicInbox::Inbox-like object return add_uri($self, $loc) if $loc->can('scheme'); + # fall-through on Inbox-like objects } elsif ($loc =~ m!\Ahttps?://!) { require URI; return add_uri($self, URI->new($loc)); - } elsif (-f "$loc/ei.lock") { + } elsif (-f "$loc/ei.lock" && -d "$loc/ALL.git/objects") { require PublicInbox::ExtSearch; die "`\\n' not allowed in `$loc'\n" if index($loc, "\n") >= 0; $loc = PublicInbox::ExtSearch->new($loc); - } elsif (-f "$loc/inbox.lock" || -d "$loc/public-inbox") { + } elsif ((-f "$loc/inbox.lock" && -d "$loc/all.git/objects") || + (-d "$loc/public-inbox" && -d "$loc/objects")) { die "`\\n' not allowed in `$loc'\n" if index($loc, "\n") >= 0; require PublicInbox::Inbox; # v2, v1 $loc = bless { inboxdir => $loc }, 'PublicInbox::Inbox'; } elsif (!-e $loc) { warn "W: $loc gone, perhaps run: lei forget-external $loc\n"; - return; + return undef; } else { warn "W: $loc ignored, unable to determine external type\n"; - return; + return undef; } push @{$self->{locals}}, $loc; + $loc; } sub _lcat_i { # LeiMailSync->each_src iterator callback