use URI::Escape qw(uri_unescape);
use PublicInbox::MID qw($MID_EXTRACT);
-sub lcat_folder ($$$) {
- my ($lei, $lms, $folder) = @_;
- $lms //= $lei->lms or return;
- my $folders = [ $folder];
+sub lcat_folder ($$;$$) {
+ my ($lei, $folder, $beg, $end) = @_;
+ my $lms = $lei->{-lms_ro} //= $lei->lms // return;
+ my $folders = [ $folder ];
eval { $lms->arg2folder($lei, $folders) };
- if ($@) {
- $lei->child_error(0, "# unknown folder: $folder");
- } else {
- for my $f (@$folders) {
- my $fid = $lms->fid_for($f);
- push @{$lei->{lcat_todo}}, { fid => $fid };
- }
+ return $lei->child_error(0, "# unknown folder: $folder") if $@;
+ my %range;
+ if (defined($beg)) { # NNTP article range
+ $range{min} = $beg;
+ $range{max} = $end // $beg;
+ }
+ for my $f (@$folders) {
+ my $fid = $lms->fid_for($f);
+ push @{$lei->{lcat_todo}}, { fid => $fid, %range };
}
}
sub lcat_imap_uri ($$) {
my ($lei, $uri) = @_;
- my $lms = $lei->lms or return;
- # cf. LeiXsearch->lcat_dump
+ # cf. LeiXSearch->lcat_dump
+ my $lms = $lei->{-lms_ro} //= $lei->lms // return;
if (defined $uri->uid) {
push @{$lei->{lcat_todo}}, $lms->imap_oidhex($lei, $uri);
} elsif (defined(my $fid = $lms->fid_for($$uri))) {
push @{$lei->{lcat_todo}}, { fid => $fid };
} else {
- lcat_folder($lei, $lms, $$uri);
+ lcat_folder($lei, $$uri);
}
}
+sub lcat_nntp_uri ($$) {
+ my ($lei, $uri) = @_;
+ my $mid = $uri->message; # already unescaped by URI::news
+ return "mid:$mid" if defined($mid);
+ my $lms = $lei->{-lms_ro} //= $lei->lms // return;
+ my ($ng, $beg, $end) = $uri->group;
+ $uri->group($ng);
+ lcat_folder($lei, $$uri, $beg, $end);
+ '""';
+}
+
sub extract_1 ($$) {
my ($lei, $x) = @_;
- if ($x =~ m!\b(imaps?://[^>]+)!i) {
- my $u = $1;
- require PublicInbox::URIimap;
- lcat_imap_uri($lei, PublicInbox::URIimap->new($u));
- '""'; # blank query, using {lcat_todo}
- } elsif ($x =~ m!\b(maildir:.+)!i) {
- lcat_folder($lei, undef, $1);
+ if ($x =~ m!\b(maildir:.+)!i) {
+ lcat_folder($lei, $1);
'""'; # blank query, using {lcat_todo}
- } elsif ($x =~ m!\b([a-z]+?://\S+)!i) {
- my $u = $1;
+ } elsif ($x =~ m!\b(([a-z]+)://\S+)!i) {
+ my ($u, $scheme) = ($1, $2);
$u =~ s/[\>\]\)\,\.\;]+\z//;
+ if ($scheme =~ m!\A(imaps?)\z!i) {
+ require PublicInbox::URIimap;
+ lcat_imap_uri($lei, PublicInbox::URIimap->new($u));
+ return '""'; # blank query, using {lcat_todo}
+ } elsif ($scheme =~ m!\A(?:nntps?|s?news)\z!i) {
+ require PublicInbox::URInntps;
+ $u = PublicInbox::URInntps->new($u);
+ return lcat_nntp_uri($lei, $u);
+ } # http, or something else:
require URI;
$u = URI->new($u);
my $p = $u->path;
my $strict = !$lei->{opt}->{stdin};
my @q;
for my $x (@argv) {
- if (my $term = extract_1($lei,$x)) {
+ if (my $term = extract_1($lei, $x)) {
push @q, $term;
} elsif ($strict) {
return $lei->fail(<<"");
}
}
+ delete $lei->{-lms_ro};
@q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv");
}
sub each_src {
my ($self, $folder, $cb, @args) = @_;
my $dbh = $self->{dbh} //= dbh_new($self);
- my $fid;
+ my ($fid, @rng);
+ my $and_ge_le = '';
if (ref($folder) eq 'HASH') {
$fid = $folder->{fid} // die "BUG: no `fid'";
+ @rng = grep(defined, @$folder{qw(min max)});
+ $and_ge_le = 'AND uid >= ? AND uid <= ?' if @rng;
} else {
$fid = $self->{fmap}->{$folder} //=
fid_for($self, $folder) // return;
# minimize implicit txn time to avoid blocking writers by
# batching SELECTs. This looks wonky but is necessary since
# $cb-> may access the DB on its own.
- my $ary = $dbh->selectall_arrayref(<<'', undef, $fid);
-SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ?
+ my $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng);
+SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? $and_ge_le
ORDER BY _rowid_ ASC LIMIT 1000
my $min = @$ary ? $ary->[-1]->[0] : undef;
while (defined $min) {
for my $row (@$ary) { $cb->($row->[1], $row->[2], @args) }
- $ary = $dbh->selectall_arrayref(<<'', undef, $fid, $min);
-SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? AND _rowid_ > ?
+ $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng, $min);
+SELECT _rowid_,oidbin,uid FROM blob2num
+WHERE fid = ? $and_ge_le AND _rowid_ > ?
ORDER BY _rowid_ ASC LIMIT 1000
$min = @$ary ? $ary->[-1]->[0] : undef;
is(ref(json_utf8->decode($lei_out)), 'ARRAY', 'ls-mail-source JSON');
lei_ok('import', $url);
+ lei_ok "lcat", "nntp://$host_port/testmessage\@example.com";
+ my $local = $lei_out;
+ lei_ok "lcat", "nntp://example.com/testmessage\@example.com";
+ my $remote = $lei_out;
+ is($local, $remote, 'Message-ID used even from unknown host');
lei_ok(qw(q z:1..));
$out = json_utf8->decode($lei_out);
ok(scalar(@$out) > 1, 'got imported messages');
lei_ok('inspect', "$url/$high");
my $x = json_utf8->decode($lei_out);
like($x->{$url}->{$high}, qr/\A[a-f0-9]{40,}\z/, 'inspect shows blob');
+ lei_ok qw(lcat -f json), "$url/$high";
+ my $lcat = json_utf8->decode($lei_out);
+ is($lcat->[1], undef, 'only one result for lcat');
+ is($lcat->[0]->{blob}, $x->{$url}->{$high},
+ 'lcat showed correct blob');
lei_ok 'ls-mail-sync';
is($lei_out, "$url\n", 'article number not stored as folder');
is(scalar(grep(/\A[a-f0-9]{40,}\z/, values %{$x->{$url}})),
$end - $low + 1, 'all values are git blobs');
+ lei_ok qw(lcat -f json), "$url/$low";
+ $lcat = json_utf8->decode($lei_out);
+ is($lcat->[1], undef, 'only one result for lcat');
+ is($lcat->[0]->{blob}, $x->{$url}->{$low},
+ 'lcat showed correct blob');
+ lei_ok qw(lcat -f json), "$url/$low-$end";
+ $lcat = json_utf8->decode($lei_out);
+ pop @$lcat;
+ for ($low..$end) {
+ my $tip = shift @$lcat;
+ is($x->{$url}->{$_}, $tip->{blob}, "blob matches art #$_");
+ }
+
lei_ok 'ls-mail-sync';
is($lei_out, "$url\n", 'article range not stored as folder');
lei_ok qw(q z:0..); my $start = json_utf8->decode($lei_out);