From: Eric Wong Date: Tue, 21 Sep 2021 07:41:54 +0000 (+0000) Subject: lei lcat: support NNTP URLs X-Git-Tag: v1.7.0~309 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=1ed1a566c7d300ab8afb604b2e7c939299318005 lei lcat: support NNTP URLs NNTP URLs are probably more prevalent in public message archives than IMAP URLs. --- diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm index 1a4a988e..0902c213 100644 --- a/lib/PublicInbox/LeiLcat.pm +++ b/lib/PublicInbox/LeiLcat.pm @@ -11,47 +11,64 @@ use PublicInbox::LeiViewText; use URI::Escape qw(uri_unescape); use PublicInbox::MID qw($MID_EXTRACT); -sub lcat_folder ($$$) { - my ($lei, $lms, $folder) = @_; - $lms //= $lei->lms or return; - my $folders = [ $folder]; +sub lcat_folder ($$;$$) { + my ($lei, $folder, $beg, $end) = @_; + my $lms = $lei->{-lms_ro} //= $lei->lms // return; + my $folders = [ $folder ]; eval { $lms->arg2folder($lei, $folders) }; - if ($@) { - $lei->child_error(0, "# unknown folder: $folder"); - } else { - for my $f (@$folders) { - my $fid = $lms->fid_for($f); - push @{$lei->{lcat_todo}}, { fid => $fid }; - } + return $lei->child_error(0, "# unknown folder: $folder") if $@; + my %range; + if (defined($beg)) { # NNTP article range + $range{min} = $beg; + $range{max} = $end // $beg; + } + for my $f (@$folders) { + my $fid = $lms->fid_for($f); + push @{$lei->{lcat_todo}}, { fid => $fid, %range }; } } sub lcat_imap_uri ($$) { my ($lei, $uri) = @_; - my $lms = $lei->lms or return; - # cf. LeiXsearch->lcat_dump + # cf. LeiXSearch->lcat_dump + my $lms = $lei->{-lms_ro} //= $lei->lms // return; if (defined $uri->uid) { push @{$lei->{lcat_todo}}, $lms->imap_oidhex($lei, $uri); } elsif (defined(my $fid = $lms->fid_for($$uri))) { push @{$lei->{lcat_todo}}, { fid => $fid }; } else { - lcat_folder($lei, $lms, $$uri); + lcat_folder($lei, $$uri); } } +sub lcat_nntp_uri ($$) { + my ($lei, $uri) = @_; + my $mid = $uri->message; # already unescaped by URI::news + return "mid:$mid" if defined($mid); + my $lms = $lei->{-lms_ro} //= $lei->lms // return; + my ($ng, $beg, $end) = $uri->group; + $uri->group($ng); + lcat_folder($lei, $$uri, $beg, $end); + '""'; +} + sub extract_1 ($$) { my ($lei, $x) = @_; - if ($x =~ m!\b(imaps?://[^>]+)!i) { - my $u = $1; - require PublicInbox::URIimap; - lcat_imap_uri($lei, PublicInbox::URIimap->new($u)); - '""'; # blank query, using {lcat_todo} - } elsif ($x =~ m!\b(maildir:.+)!i) { - lcat_folder($lei, undef, $1); + if ($x =~ m!\b(maildir:.+)!i) { + lcat_folder($lei, $1); '""'; # blank query, using {lcat_todo} - } elsif ($x =~ m!\b([a-z]+?://\S+)!i) { - my $u = $1; + } elsif ($x =~ m!\b(([a-z]+)://\S+)!i) { + my ($u, $scheme) = ($1, $2); $u =~ s/[\>\]\)\,\.\;]+\z//; + if ($scheme =~ m!\A(imaps?)\z!i) { + require PublicInbox::URIimap; + lcat_imap_uri($lei, PublicInbox::URIimap->new($u)); + return '""'; # blank query, using {lcat_todo} + } elsif ($scheme =~ m!\A(?:nntps?|s?news)\z!i) { + require PublicInbox::URInntps; + $u = PublicInbox::URInntps->new($u); + return lcat_nntp_uri($lei, $u); + } # http, or something else: require URI; $u = URI->new($u); my $p = $u->path; @@ -93,7 +110,7 @@ sub extract_all { my $strict = !$lei->{opt}->{stdin}; my @q; for my $x (@argv) { - if (my $term = extract_1($lei,$x)) { + if (my $term = extract_1($lei, $x)) { push @q, $term; } elsif ($strict) { return $lei->fail(<<""); @@ -101,6 +118,7 @@ could not extract Message-ID from $x } } + delete $lei->{-lms_ro}; @q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv"); } diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index f83c7de2..522a5ebc 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -197,9 +197,12 @@ INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?) sub each_src { my ($self, $folder, $cb, @args) = @_; my $dbh = $self->{dbh} //= dbh_new($self); - my $fid; + my ($fid, @rng); + my $and_ge_le = ''; if (ref($folder) eq 'HASH') { $fid = $folder->{fid} // die "BUG: no `fid'"; + @rng = grep(defined, @$folder{qw(min max)}); + $and_ge_le = 'AND uid >= ? AND uid <= ?' if @rng; } else { $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder) // return; @@ -208,16 +211,17 @@ sub each_src { # minimize implicit txn time to avoid blocking writers by # batching SELECTs. This looks wonky but is necessary since # $cb-> may access the DB on its own. - my $ary = $dbh->selectall_arrayref(<<'', undef, $fid); -SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? + my $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng); +SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? $and_ge_le ORDER BY _rowid_ ASC LIMIT 1000 my $min = @$ary ? $ary->[-1]->[0] : undef; while (defined $min) { for my $row (@$ary) { $cb->($row->[1], $row->[2], @args) } - $ary = $dbh->selectall_arrayref(<<'', undef, $fid, $min); -SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? AND _rowid_ > ? + $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng, $min); +SELECT _rowid_,oidbin,uid FROM blob2num +WHERE fid = ? $and_ge_le AND _rowid_ > ? ORDER BY _rowid_ ASC LIMIT 1000 $min = @$ary ? $ary->[-1]->[0] : undef; diff --git a/t/lei-import-nntp.t b/t/lei-import-nntp.t index 0b080781..eb1ae312 100644 --- a/t/lei-import-nntp.t +++ b/t/lei-import-nntp.t @@ -25,6 +25,11 @@ test_lei({ tmpdir => $tmpdir }, sub { is(ref(json_utf8->decode($lei_out)), 'ARRAY', 'ls-mail-source JSON'); lei_ok('import', $url); + lei_ok "lcat", "nntp://$host_port/testmessage\@example.com"; + my $local = $lei_out; + lei_ok "lcat", "nntp://example.com/testmessage\@example.com"; + my $remote = $lei_out; + is($local, $remote, 'Message-ID used even from unknown host'); lei_ok(qw(q z:1..)); $out = json_utf8->decode($lei_out); ok(scalar(@$out) > 1, 'got imported messages'); @@ -57,6 +62,11 @@ test_lei({ tmpdir => $tmpdir }, sub { lei_ok('inspect', "$url/$high"); my $x = json_utf8->decode($lei_out); like($x->{$url}->{$high}, qr/\A[a-f0-9]{40,}\z/, 'inspect shows blob'); + lei_ok qw(lcat -f json), "$url/$high"; + my $lcat = json_utf8->decode($lei_out); + is($lcat->[1], undef, 'only one result for lcat'); + is($lcat->[0]->{blob}, $x->{$url}->{$high}, + 'lcat showed correct blob'); lei_ok 'ls-mail-sync'; is($lei_out, "$url\n", 'article number not stored as folder'); @@ -78,6 +88,19 @@ test_lei({ tmpdir => $tmpdir }, sub { is(scalar(grep(/\A[a-f0-9]{40,}\z/, values %{$x->{$url}})), $end - $low + 1, 'all values are git blobs'); + lei_ok qw(lcat -f json), "$url/$low"; + $lcat = json_utf8->decode($lei_out); + is($lcat->[1], undef, 'only one result for lcat'); + is($lcat->[0]->{blob}, $x->{$url}->{$low}, + 'lcat showed correct blob'); + lei_ok qw(lcat -f json), "$url/$low-$end"; + $lcat = json_utf8->decode($lei_out); + pop @$lcat; + for ($low..$end) { + my $tip = shift @$lcat; + is($x->{$url}->{$_}, $tip->{blob}, "blob matches art #$_"); + } + lei_ok 'ls-mail-sync'; is($lei_out, "$url\n", 'article range not stored as folder'); lei_ok qw(q z:0..); my $start = json_utf8->decode($lei_out);