X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FExtMsg.pm;h=49fc1ac5521c1506acb137afaf1f667f666f231d;hb=55b707d788ce13696e4411389583e720ea6dab01;hp=51e7799de8640929be9538713d3f2ee7e1b1081d;hpb=a46893a2b5dabfdbcf7b593ac19967daecfb1772;p=public-inbox.git diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 51e7799d..49fc1ac5 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 all contributors +# Copyright (C) 2015-2019 all contributors # License: AGPL-3.0+ # # Used by the web interface to link to messages outside of the our @@ -8,12 +8,12 @@ package PublicInbox::ExtMsg; use strict; use warnings; -use PublicInbox::Hval; -use PublicInbox::MID qw/mid2path/; +use PublicInbox::Hval qw(ascii_html prurl); use PublicInbox::WwwStream; +our $MIN_PARTIAL_LEN = 16; # TODO: user-configurable -our @EXT_URL = ( +our @EXT_URL = map { ascii_html($_) } ( # leading "//" denotes protocol-relative (http:// or https://) '//marc.info/?i=%s', '//www.mail-archive.com/search?l=mid&q=%s', @@ -28,8 +28,13 @@ our @EXT_URL = ( sub PARTIAL_MAX () { 100 } +sub mids_from_mset { # Search::retry_reopen callback + [ map { PublicInbox::SearchMsg::from_mitem($_)->mid } $_[0]->items ]; +} + sub search_partial ($$) { my ($srch, $mid) = @_; + return if length($mid) < $MIN_PARTIAL_LEN; my $opt = { limit => PARTIAL_MAX, mset => 2 }; my @try = ("m:$mid*"); my $chop = $mid; @@ -58,17 +63,33 @@ sub search_partial ($$) { } foreach my $m (@try) { - my $mset = eval { $srch->query($m, $opt) }; - if (ref($@) eq 'Search::Xapian::QueryParserError') { - # If Xapian can't handle the wildcard since it - # has too many results. - next; - } - my @mids = map { - my $doc = $_->get_document; - PublicInbox::SearchMsg->load_doc($doc)->mid; - } $mset->items; - return \@mids if scalar(@mids); + # If Xapian can't handle the wildcard since it + # has too many results. $@ can be + # Search::Xapian::QueryParserError or even: + # "something terrible happened at ../Search/Xapian/Enquire.pm" + my $mset = eval { $srch->query($m, $opt) } or next; + my $mids = $srch->retry_reopen(\&mids_from_mset, $mset); + return $mids if scalar(@$mids); + } +} + +sub ext_msg_i { + my ($other, $arg) = @_; + my ($cur, $mid, $ibxs, $found) = @$arg; + + return if $other->{name} eq $cur->{name} || !$other->base_url; + + my $mm = $other->mm or return; + + # try to find the URL with Msgmap to avoid forking + my $num = $mm->num_for($mid); + if (defined $num) { + push @$found, $other; + } else { + # no point in trying the fork fallback if we + # know Xapian is up-to-date but missing the + # message in the current repo + push @$ibxs, $other; } } @@ -78,28 +99,13 @@ sub ext_msg { my $mid = $ctx->{mid}; eval { require PublicInbox::Msgmap }; - my $have_mm = $@ ? 0 : 1; - my (@ibx, @found); - - $ctx->{www}->{pi_config}->each_inbox(sub { - my ($other) = @_; - return if $other->{name} eq $cur->{name} || !$other->base_url; - - my $mm = $other->mm or return; - - # try to find the URL with Msgmap to avoid forking - my $num = $mm->num_for($mid); - if (defined $num) { - push @found, $other; - } else { - # no point in trying the fork fallback if we - # know Xapian is up-to-date but missing the - # message in the current repo - push @ibx, $other; - } - }); + my $ibxs = []; + my $found = []; + my $arg = [ $cur, $mid, $ibxs, $found ]; + + $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $arg); - return exact($ctx, \@found, $mid) if @found; + return exact($ctx, $found, $mid) if @$found; # fall back to partial MID matching my @partial; @@ -112,8 +118,8 @@ sub ext_msg { } # can't find a partial match in current inbox, try the others: - if (!$n_partial && length($mid) >= 16) { - foreach my $ibx (@ibx) { + if (!$n_partial && length($mid) >= $MIN_PARTIAL_LEN) { + foreach my $ibx (@$ibxs) { $srch = $ibx->search or next; $mids = search_partial($srch, $mid) or next; $n_partial += scalar(@$mids); @@ -165,7 +171,7 @@ sub ext_urls { my $env = $ctx->{env}; my $e = "\nPerhaps try an external site:\n\n"; foreach my $url (@EXT_URL) { - my $u = PublicInbox::Hval::prurl($env, $url); + my $u = prurl($env, $url); my $r = sprintf($u, $href); my $t = sprintf($u, $html); $e .= qq{$t\n};