X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=blobdiff_plain;f=lib%2FPublicInbox%2FExtMsg.pm;h=be29974900792f2023b7b7b97b5144b61ee6538c;hp=167dc5e5d37fe0e2185d2eccbc47ff6f94263131;hb=HEAD;hpb=9bd675d33ad1e49bd2ebe12a1d216216e61380de diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 167dc5e5..be299749 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2019 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # # Used by the web interface to link to messages outside of the our @@ -8,17 +8,17 @@ package PublicInbox::ExtMsg; use strict; use warnings; -use PublicInbox::Hval qw/ascii_html/; -use PublicInbox::MID qw/mid2path/; -use PublicInbox::WwwStream; -our $MIN_PARTIAL_LEN = 16; +use PublicInbox::Hval qw(ascii_html prurl mid_href); +use PublicInbox::WwwStream qw(html_oneshot); +use PublicInbox::Smsg; +our $MIN_PARTIAL_LEN = 14; # for 'XXXXXXXXXX.fsf' msgids gnus generates # TODO: user-configurable our @EXT_URL = map { ascii_html($_) } ( # leading "//" denotes protocol-relative (http:// or https://) '//marc.info/?i=%s', '//www.mail-archive.com/search?l=mid&q=%s', - 'nntp://news.gmane.org/%s', + 'nntp://news.gmane.io/%s', 'https://lists.debian.org/msgid-search/%s', '//docs.FreeBSD.org/cgi/mid.cgi?db=mid&id=%s', 'https://www.w3.org/mid/%s', @@ -30,9 +30,10 @@ our @EXT_URL = map { ascii_html($_) } ( sub PARTIAL_MAX () { 100 } sub search_partial ($$) { - my ($srch, $mid) = @_; + my ($ibx, $mid) = @_; return if length($mid) < $MIN_PARTIAL_LEN; - my $opt = { limit => PARTIAL_MAX, mset => 2 }; + my $srch = $ibx->isrch or return; + my $opt = { limit => PARTIAL_MAX, relevance => -1 }; my @try = ("m:$mid*"); my $chop = $mid; if ($chop =~ s/(\W+)(\w*)\z//) { @@ -64,87 +65,158 @@ sub search_partial ($$) { # has too many results. $@ can be # Search::Xapian::QueryParserError or even: # "something terrible happened at ../Search/Xapian/Enquire.pm" - my $mset = eval { $srch->query($m, $opt) } or next; - + my $mset = eval { $srch->mset($m, $opt) } or next; my @mids = map { - my $doc = $_->get_document; - PublicInbox::SearchMsg->load_doc($doc)->mid; - } $mset->items; + $_->{mid} + } @{$srch->mset_to_smsg($ibx, $mset)}; return \@mids if scalar(@mids); } } -sub ext_msg { - my ($ctx) = @_; - my $cur = $ctx->{-inbox}; - my $mid = $ctx->{mid}; +sub ext_msg_i { + my ($other, $ctx) = @_; - eval { require PublicInbox::Msgmap }; - my $have_mm = $@ ? 0 : 1; - my (@ibx, @found); + return if $other->{name} eq $ctx->{ibx}->{name} || !$other->base_url; - $ctx->{www}->{pi_config}->each_inbox(sub { - my ($other) = @_; - return if $other->{name} eq $cur->{name} || !$other->base_url; + my $mm = $other->mm or return; - my $mm = $other->mm or return; + # try to find the URL with Msgmap to avoid forking + my $num = $mm->num_for($ctx->{mid}); + if (defined $num) { + push @{$ctx->{found}}, $other; + } else { + # no point in trying the fork fallback if we + # know Xapian is up-to-date but missing the + # message in the current repo + push @{$ctx->{again}}, $other; + } +} - # try to find the URL with Msgmap to avoid forking - my $num = $mm->num_for($mid); - if (defined $num) { - push @found, $other; - } else { - # no point in trying the fork fallback if we - # know Xapian is up-to-date but missing the - # message in the current repo - push @ibx, $other; - } - }); +sub ext_msg_step { + my ($pi_cfg, $section, $ctx) = @_; + if (defined($section)) { + return if $section !~ m!\Apublicinbox\.([^/]+)\z!; + my $ibx = $pi_cfg->lookup_name($1) or return; + ext_msg_i($ibx, $ctx); + } else { # undef == "EOF" + finalize_exact($ctx); + } +} - return exact($ctx, \@found, $mid) if @found; +sub ext_msg_ALL ($) { + my ($ctx) = @_; + my $ALL = $ctx->{www}->{pi_cfg}->ALL or return; + my $by_eidx_key = $ctx->{www}->{pi_cfg}->{-by_eidx_key}; + my $cur_key = eval { $ctx->{ibx}->eidx_key } // + return partial_response($ctx); # $cur->{ibx} == $ALL + my %seen = ($cur_key => 1); + my ($id, $prev); + while (my $x = $ALL->over->next_by_mid($ctx->{mid}, \$id, \$prev)) { + my $xr3 = $ALL->over->get_xref3($x->{num}); + for my $k (@$xr3) { + $k =~ s/:[0-9]+:$x->{blob}\z// or next; + next if $k eq $cur_key; + my $ibx = $by_eidx_key->{$k} // next; + $ibx->base_url or next; + push(@{$ctx->{found}}, $ibx) unless $seen{$k}++; + } + } + return exact($ctx) if $ctx->{found}; # fall back to partial MID matching - my @partial; - my $n_partial = 0; - my $srch = $cur->search; - my $mids = search_partial($srch, $mid) if $srch; - if ($mids) { - $n_partial = scalar(@$mids); - push @partial, [ $cur, $mids ]; + for my $ibxish ($ctx->{ibx}, $ALL) { + my $mids = search_partial($ibxish, $ctx->{mid}) or next; + push @{$ctx->{partial}}, [ $ibxish, $mids ]; + last if ($ctx->{n_partial} += scalar(@$mids)) >= PARTIAL_MAX; } + partial_response($ctx); +} +sub ext_msg { + my ($ctx) = @_; + ext_msg_ALL($ctx) // sub { + $ctx->{-wcb} = $_[0]; # HTTP server write callback + + if ($ctx->{env}->{'pi-httpd.async'}) { + require PublicInbox::ConfigIter; + my $iter = PublicInbox::ConfigIter->new( + $ctx->{www}->{pi_cfg}, + \&ext_msg_step, $ctx); + $iter->event_step; + } else { + $ctx->{www}->{pi_cfg}->each_inbox(\&ext_msg_i, $ctx); + finalize_exact($ctx); + } + }; +} + +# called via PublicInbox::DS::event_loop +sub event_step { + my ($ctx, $sync) = @_; # can't find a partial match in current inbox, try the others: - if (!$n_partial && length($mid) >= $MIN_PARTIAL_LEN) { - foreach my $ibx (@ibx) { - $srch = $ibx->search or next; - $mids = search_partial($srch, $mid) or next; - $n_partial += scalar(@$mids); - push @partial, [ $ibx, $mids]; - last if $n_partial >= PARTIAL_MAX; + my $ibx = shift @{$ctx->{again}} or return finalize_partial($ctx); + my $mids = search_partial($ibx, $ctx->{mid}) or + return ($sync ? undef : PublicInbox::DS::requeue($ctx)); + $ctx->{n_partial} += scalar(@$mids); + push @{$ctx->{partial}}, [ $ibx, $mids ]; + $ctx->{n_partial} >= PARTIAL_MAX ? finalize_partial($ctx) + : ($sync ? undef : PublicInbox::DS::requeue($ctx)); +} + +sub finalize_exact { + my ($ctx) = @_; + + return $ctx->{-wcb}->(exact($ctx)) if $ctx->{found}; + + # fall back to partial MID matching + my $mid = $ctx->{mid}; + my $cur = $ctx->{ibx}; + my $mids = search_partial($cur, $mid); + if ($mids) { + $ctx->{n_partial} = scalar(@$mids); + push @{$ctx->{partial}}, [ $cur, $mids ]; + } elsif ($ctx->{again} && length($mid) >= $MIN_PARTIAL_LEN) { + bless $ctx, __PACKAGE__; + if ($ctx->{env}->{'pi-httpd.async'}) { + $ctx->event_step; + return; } + + # synchronous fall-through + $ctx->event_step while @{$ctx->{again}}; } + finalize_partial($ctx); +} +sub _url_pfx ($$) { + my ($ctx, $u) = @_; + (index($u, '://') < 0 && index($u, '/') != 0) ? + "$ctx->{-upfx}../$u" : $u; +} + +sub partial_response ($) { + my ($ctx) = @_; + my $mid = $ctx->{mid}; my $code = 404; - my $h = PublicInbox::Hval->new_msgid($mid); - my $href = $h->{href}; - my $html = $h->as_html; + my $href = mid_href($mid); + my $html = ascii_html($mid); my $title = "<$html> not found"; my $s = "
Message-ID <$html>\nnot found\n";
-	if ($n_partial) {
+	$ctx->{-upfx} //= '../';
+	if (my $n_partial = $ctx->{n_partial}) {
 		$code = 300;
 		my $es = $n_partial == 1 ? '' : 'es';
 		$n_partial .= '+' if ($n_partial == PARTIAL_MAX);
 		$s .= "\n$n_partial partial match$es found:\n\n";
-		my $cur_name = $cur->{name};
-		foreach my $pair (@partial) {
+		my $cur_name = $ctx->{ibx}->{name};
+		foreach my $pair (@{$ctx->{partial}}) {
 			my ($ibx, $res) = @$pair;
-			my $env = $ctx->{env} if $ibx->{name} eq $cur_name;
-			my $u = $ibx->base_url($env) or next;
+			my $e = $ibx->{name} eq $cur_name ? $ctx->{env} : undef;
+			my $u = _url_pfx($ctx, $ibx->base_url($e) // next);
 			foreach my $m (@$res) {
-				my $p = PublicInbox::Hval->new_msgid($m);
-				my $r = $p->{href};
-				my $t = $p->as_html;
-				$s .= qq{$u$t/\n};
+				my $href = mid_href($m);
+				my $html = ascii_html($m);
+				$s .= qq{$u$html/\n};
 			}
 		}
 	}
@@ -155,11 +227,13 @@ sub ext_msg {
 	}
 	$ctx->{-html_tip} = $s .= '
'; $ctx->{-title_html} = $title; - $ctx->{-upfx} = '../'; - PublicInbox::WwwStream->response($ctx, $code); + html_oneshot($ctx, $code); } +sub finalize_partial ($) { $_[0]->{-wcb}->(partial_response($_[0])) } + sub ext_urls { + return ""; my ($ctx, $mid, $href, $html) = @_; # Fall back to external repos if configured @@ -167,7 +241,7 @@ sub ext_urls { my $env = $ctx->{env}; my $e = "\nPerhaps try an external site:\n\n"; foreach my $url (@EXT_URL) { - my $u = PublicInbox::Hval::prurl($env, $url); + my $u = prurl($env, $url); my $r = sprintf($u, $href); my $t = sprintf($u, $html); $e .= qq{$t\n}; @@ -178,24 +252,25 @@ sub ext_urls { } sub exact { - my ($ctx, $found, $mid) = @_; - my $h = PublicInbox::Hval->new_msgid($mid); - my $href = $h->{href}; - my $html = $h->as_html; + my ($ctx) = @_; + my $mid = $ctx->{mid}; + my $found = $ctx->{found}; + my $href = mid_href($mid); + my $html = ascii_html($mid); my $title = "<$html> found in "; my $end = @$found == 1 ? 'another inbox' : 'other inboxes'; $ctx->{-title_html} = $title . $end; - $ctx->{-upfx} = '../'; + $ctx->{-upfx} //= '../'; my $ext_urls = ext_urls($ctx, $mid, $href, $html); my $code = (@$found == 1 && $ext_urls eq '') ? 200 : 300; $ctx->{-html_tip} = join('', "
Message-ID: <$html>\nfound in $end:\n\n",
 				(map {
-					my $u = $_->base_url;
+					my $u = _url_pfx($ctx, $_->base_url);
 					qq($u$html/\n)
 				} @$found),
 			$ext_urls, '
'); - PublicInbox::WwwStream->response($ctx, $code); + html_oneshot($ctx, $code); } 1;