]> Sergey Matveev's repositories - public-inbox.git/commitdiff
lei lcat: extract Message-IDs from URLs and show them
authorEric Wong <e@80x24.org>
Tue, 27 Apr 2021 11:07:52 +0000 (11:07 +0000)
committerEric Wong <e@80x24.org>
Wed, 28 Apr 2021 01:28:57 +0000 (21:28 -0400)
It's a wrapper around "lei q" which extracts Message-IDs
from URLs, "<$MSGID>", "id:$MSGID" and attempts to display the
local version of the message.

Its main purpose is to extract Message-IDs out of
commonly-understood URLs to save users bandwidth and time
by displaying the message locally.  When reading from stdin,
it will discard things it doesn't understand, so you can just
pipe an entire "Link: $URL" line to it and it'll attempt to
pluck the Message-ID out of the URL.

MANIFEST
lib/PublicInbox/LEI.pm
lib/PublicInbox/LeiLcat.pm [new file with mode: 0644]
t/lei-lcat.t [new file with mode: 0644]
t/lei_lcat.t [new file with mode: 0644]

index d4e7d66f9f1b0e63bfaf3d6925159a8732eac542..d3b46f8b2a2b463f9c161cef578de0982d25c7d4 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -200,6 +200,7 @@ lib/PublicInbox/LeiImport.pm
 lib/PublicInbox/LeiInit.pm
 lib/PublicInbox/LeiInput.pm
 lib/PublicInbox/LeiInspect.pm
+lib/PublicInbox/LeiLcat.pm
 lib/PublicInbox/LeiLsLabel.pm
 lib/PublicInbox/LeiLsSearch.pm
 lib/PublicInbox/LeiLsSync.pm
@@ -400,6 +401,7 @@ t/lei-import-imap.t
 t/lei-import-maildir.t
 t/lei-import-nntp.t
 t/lei-import.t
+t/lei-lcat.t
 t/lei-mirror.t
 t/lei-p2q.t
 t/lei-q-kw.t
@@ -411,6 +413,7 @@ t/lei-tag.t
 t/lei.t
 t/lei_dedupe.t
 t/lei_external.t
+t/lei_lcat.t
 t/lei_mail_sync.t
 t/lei_overview.t
 t/lei_saved_search.t
index effc905ac4d323bdf926d57ac538fd49b3f56bee..ef72758c6d9a060ebe5813a7a8b0fbe8aca437a8 100644 (file)
@@ -149,6 +149,14 @@ our %CMD = ( # sorted in order of importance/use:
 'up' => [ 'OUTPUT|--all', 'update saved search',
        qw(jobs|j=s lock=s@ alert=s@ mua=s verbose|v+ all:s), @c_opt ],
 
+'lcat' => [ '--stdin|MSGID_OR_URL..', 'display local copy of message(s)',
+       'stdin|', # /|\z/ must be first for lone dash
+       # some of these options are ridiculous for lcat
+       @lxs_opt, qw(output|mfolder|o=s format|f=s dedupe|d=s threads|t+
+       sort|s=s reverse|r offset=i jobs|j=s globoff|g augment|a
+       import-before! lock=s@ rsyncable alert=s@ mua=s verbose|v+), @c_opt,
+       opt_dash('limit|n=i', '[0-9]+') ],
+
 'blob' => [ 'OID', 'show a git blob, reconstructing from mail if necessary',
        qw(git-dir=s@ cwd! verbose|v+ mail! oid-a|A=s path-a|a=s path-b|b=s),
        @lxs_opt, @c_opt ],
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm
new file mode 100644 (file)
index 0000000..f10452b
--- /dev/null
@@ -0,0 +1,125 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# lcat: local cat, display a local message by Message-ID or blob,
+# extracting from URL necessary
+# "lei lcat <URL|SPEC>"
+package PublicInbox::LeiLcat;
+use strict;
+use v5.10.1;
+use PublicInbox::LeiViewText;
+use URI::Escape qw(uri_unescape);
+use URI;
+use PublicInbox::MID qw($MID_EXTRACT);
+
+sub lcat_redispatch {
+       my ($lei, $out, $op_p) = @_;
+       my $l = bless { %$lei }, ref($lei);
+       delete $l->{sock};
+       $l->{''} = $op_p; # daemon only
+       eval {
+               $l->qerr("# updating $out");
+               up1($l, $out);
+               $l->qerr("# $out done");
+       };
+       $l->err($@) if $@;
+}
+
+sub extract_1 ($$) {
+       my ($lei, $x) = @_;
+       if ($x =~ m!\b([a-z]+?://\S+)!i) {
+               my $u = $1;
+               $u =~ s/[\>\]\)\,\.\;]+\z//;
+               $u = URI->new($u);
+               my $p = $u->path;
+               my $term;
+               if ($p =~ m!([^/]+\@[^/]+)!) { # common msgid pattern
+                       $term = 'mid:'.uri_unescape($1);
+
+                       # is it a URL which returns the full thread?
+                       if ($u->scheme =~ /\Ahttps?/i &&
+                               $p =~ m!/(?:T/?|t/?|t\.mbox\.gz|t\.atom)\b!) {
+
+                               $lei->{mset_opt}->{threads} = 1;
+                       }
+               } elsif ($u->scheme =~ /\Ahttps?/i &&
+                               # some msgids don't have '@', see if it looks like
+                               # a public-inbox URL:
+                               $p =~ m!/([^/]+)/(raw|t/?|T/?|
+                                       t\.mbox\.gz|t\.atom)\z!x) {
+                       $lei->{mset_opt}->{threads} = 1 if $2 && $2 ne 'raw';
+                       $term = 'mid:'.uri_unescape($1);
+               }
+               $term;
+       } elsif ($x =~ $MID_EXTRACT) { # <$MSGID>
+               "mid:$1";
+       } elsif ($x =~ /\b((?:m|mid):\S+)/) { # our own prefixes (and mairix)
+               $1;
+       } elsif ($x =~ /\bid:(\S+)/) { # notmuch convention
+               "mid:$1";
+       } else {
+               undef;
+       }
+}
+
+sub extract_all {
+       my ($lei, @argv) = @_;
+       my $strict = !$lei->{opt}->{stdin};
+       my @q;
+       for my $x (@argv) {
+               if (my $term = extract_1($lei,$x)) {
+                       push @q, $term;
+               } elsif ($strict) {
+                       return $lei->fail(<<"");
+could not extract Message-ID from $x
+
+               }
+       }
+       @q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv");
+}
+
+sub _stdin { # PublicInbox::InputPipe::consume callback for --stdin
+       my ($lei) = @_; # $_[1] = $rbuf
+       if (defined($_[1])) {
+               $_[1] eq '' and return eval {
+                       if (my $dfd = $lei->{3}) {
+                               chdir($dfd) or return $lei->fail("fchdir: $!");
+                       }
+                       my @argv = split(/\s+/, $lei->{mset_opt}->{qstr});
+                       $lei->{mset_opt}->{qstr} = extract_all($lei, @argv)
+                               or return;
+                       $lei->_start_query;
+               };
+               $lei->{mset_opt}->{qstr} .= $_[1];
+       } else {
+               $lei->fail("error reading stdin: $!");
+       }
+}
+
+sub lei_lcat {
+       my ($lei, @argv) = @_;
+       my $lxs = $lei->lxs_prepare or return;
+       $lei->ale->refresh_externals($lxs);
+       my $sto = $lei->_lei_store(1);
+       $lei->{lse} = $sto->search;
+       my $opt = $lei->{opt};
+       my %mset_opt = map { $_ => $opt->{$_} } qw(threads limit offset);
+       $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
+       $mset_opt{limit} //= 10000;
+       $opt->{sort} //= 'relevance';
+       $mset_opt{relevance} = 1;
+       $lei->{mset_opt} = \%mset_opt;
+       $opt->{'format'} //= 'mboxrd' unless defined($opt->{output});
+       if ($lei->{opt}->{stdin}) {
+               return $lei->fail(<<'') if @argv;
+no args allowed on command-line with --stdin
+
+               require PublicInbox::InputPipe;
+               PublicInbox::InputPipe::consume($lei->{0}, \&_stdin, $lei);
+               return;
+       }
+       $lei->{mset_opt}->{qstr} = extract_all($lei, @argv) or return;
+       $lei->_start_query;
+}
+
+1;
diff --git a/t/lei-lcat.t b/t/lei-lcat.t
new file mode 100644 (file)
index 0000000..e5f0070
--- /dev/null
@@ -0,0 +1,16 @@
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+require_mods(qw(lei));
+
+test_lei(sub {
+       my $in = "\nMessage-id: <qp\@example.com>\n";
+       lei_ok([qw(lcat --stdin)], undef, { 0 => \$in, %$lei_opt });
+       unlike($lei_out, qr/\S/, 'nothing, yet');
+       lei_ok('import', 't/plack-qp.eml');
+       lei_ok([qw(lcat --stdin)], undef, { 0 => \$in, %$lei_opt });
+       like($lei_out, qr/qp\@example\.com/, 'got a result');
+});
+
+done_testing;
diff --git a/t/lei_lcat.t b/t/lei_lcat.t
new file mode 100644 (file)
index 0000000..536abde
--- /dev/null
@@ -0,0 +1,44 @@
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# unit test for "lei lcat" internals, see t/lei-lcat.t for functional test
+use strict;
+use v5.10.1;
+use Test::More;
+use_ok 'PublicInbox::LeiLcat';
+my $cb = \&PublicInbox::LeiLcat::extract_1;
+my $ck = sub {
+       my ($txt, $exp, $t) = @_;
+       my $lei = {};
+       is($cb->($lei, $txt), $exp, $txt);
+       ($t ? is_deeply($lei, { mset_opt => { threads => 1 } }, "-t $exp")
+               : is_deeply($lei, {}, "no -t for $exp")) or diag explain($lei);
+};
+
+for my $txt (qw(https://example.com/inbox/foo@bar/
+               https://example.com/inbox/foo@bar
+               https://example.com/inbox/foo@bar/raw
+               id:foo@bar
+               mid:foo@bar
+               <foo@bar>
+               <https://example.com/inbox/foo@bar>
+               <https://example.com/inbox/foo@bar/raw>
+               <https://example.com/inbox/foo@bar/>
+               <nntp://example.com/foo@bar>)) {
+       $ck->($txt, 'mid:foo@bar');
+}
+
+for my $txt (qw(https://example.com/inbox/foo@bar/T/
+               https://example.com/inbox/foo@bar/t/
+               https://example.com/inbox/foo@bar/t.mbox.gz
+               <https://example.com/inbox/foo@bar/t.atom>
+               <https://example.com/inbox/foo@bar/t/>)) {
+       $ck->($txt, 'mid:foo@bar', '-t');
+}
+
+$ck->('https://example.com/x/foobar/T/', 'mid:foobar', '-t');
+$ck->('https://example.com/x/foobar/raw', 'mid:foobar');
+is($cb->(my $lei = {}, 'asdf'), undef, 'no Message-ID');
+is($cb->($lei = {}, 'm:x'), 'm:x', 'bare m: accepted');
+
+done_testing;