From 34d6aff1ca223521d8137f642a58db077ab70df0 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Thu, 29 Mar 2018 09:57:56 +0000 Subject: [PATCH] www: cleanup expensive fallback for legacy URLs Back in the day, we compressed long Message-IDs to SHA-1 hexdigests for the URL. This now redirects to a 301 in the hopes we can remove these checks some day to reduce overhead. --- lib/PublicInbox/Inbox.pm | 11 ++++++++--- lib/PublicInbox/WWW.pm | 23 +++++++++-------------- t/plack.t | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 01aa500c..265360d9 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -293,13 +293,18 @@ sub path_check { git($self)->check('HEAD:'.$path); } +sub mid2num($$) { + my ($self, $mid) = @_; + my $mm = mm($self) or return; + $mm->num_for($mid); +} + sub smsg_by_mid ($$) { my ($self, $mid) = @_; my $srch = search($self) or return; # favor the Message-ID we used for the NNTP article number: - my $mm = mm($self) or return; - my $num = $mm->num_for($mid); - $srch->lookup_article($num); + my $num = mid2num($self, $mid); + defined $num ? $srch->lookup_article($num) : undef; } sub msg_by_mid ($$;$) { diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 7bd29732..24e24f1e 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -169,14 +169,15 @@ sub invalid_inbox_mid { return $ret if $ret; $ctx->{mid} = $mid; - if ($mid =~ /\A[a-f0-9]{40}\z/) { - # this is horiffically wasteful for legacy URLs: - if ($mid = mid2blob($ctx)) { - require Email::Simple; - use PublicInbox::MID qw/mid_clean/; - my $s = Email::Simple->new($mid); - $ctx->{mid} = mid_clean($s->header('Message-ID')); - } + my $ibx = $ctx->{-inbox}; + if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) { + my ($x2, $x38) = ($1, $2); + # this is horrifically wasteful for legacy URLs: + my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return; + require Email::Simple; + my $s = Email::Simple->new($str); + $mid = PublicInbox::MID::mid_clean($s->header('Message-ID')); + return r301($ctx, $inbox, $mid); } undef; } @@ -208,12 +209,6 @@ sub get_index { } } -# just returns a string ref for the blob in the current ctx -sub mid2blob { - my ($ctx) = @_; - $ctx->{-inbox}->msg_by_mid($ctx->{mid}); -} - # /$INBOX/$MESSAGE_ID/raw -> raw mbox sub get_mid_txt { my ($ctx) = @_; diff --git a/t/plack.t b/t/plack.t index 26b03660..7eb7d7f2 100644 --- a/t/plack.t +++ b/t/plack.t @@ -18,6 +18,7 @@ foreach my $mod (@mods) { } use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::Git'; +my @ls; foreach my $mod (@mods) { use_ok $mod; } { @@ -55,6 +56,8 @@ EOF $im->done; my $rev = `git --git-dir="$maindir" rev-list HEAD`; like($rev, qr/\A[a-f0-9]{40}/, "good revision committed"); + @ls = `git --git-dir="$maindir" ls-tree -r --name-only HEAD`; + chomp @ls; } my $app = eval { local $ENV{PI_CONFIG} = $pi_config; @@ -198,6 +201,21 @@ EOF "$sfx redirected to /mbox.gz"); }); } + test_psgi($app, sub { + my ($cb) = @_; + # for a while, we used to support /$INBOX/$X40/ + # when we "compressed" long Message-IDs to SHA-1 + # Now we're stuck supporting them forever :< + foreach my $path (@ls) { + $path =~ tr!/!!d; + my $from = "http://example.com/test/$path/"; + my $res = $cb->(GET($from)); + is(301, $res->code, 'is permanent redirect'); + like($res->header('Location'), + qr!/test/blah\@example\.com/!, + 'redirect from x40 MIDs works'); + } + }); } done_testing(); -- 2.44.0