lib/PublicInbox/LeiImport.pm
lib/PublicInbox/LeiInit.pm
lib/PublicInbox/LeiInput.pm
+lib/PublicInbox/LeiInspect.pm
lib/PublicInbox/LeiLsLabel.pm
lib/PublicInbox/LeiLsSearch.pm
lib/PublicInbox/LeiMailSync.pm
($spec, '<>' => $cb, $GLP_PASS) # for Getopt::Long
}
-sub rel2abs ($$) {
+# rel2abs preserves symlinks in parent, unlike abs_path
+sub rel2abs {
my ($self, $p) = @_;
- return $p if index($p, '/') == 0; # already absolute
+ if (index($p, '/') == 0) { # already absolute
+ $p =~ tr!/!/!s; # squeeze redundant slashes
+ return $p;
+ }
my $pwd = $self->{env}->{PWD};
my $cwd;
if (defined $pwd) {
File::Spec->rel2abs($p, $pwd);
}
+# abs_path resolves symlinks in parent iff all parents exist
+sub abs_path { Cwd::abs_path($_[1]) // rel2abs(@_) }
+
sub share_path ($) { # $HOME/.local/share/lei/$FOO
my ($self) = @_;
rel2abs($self, ($self->{env}->{XDG_DATA_HOME} //
'import' => [ 'LOCATION...|--stdin',
'one-time import/update from URL or filesystem',
qw(stdin| offset=i recursive|r exclude=s include|I=s
- lock=s@ in-format|F=s kw! verbose|v+ incremental!), @c_opt ],
+ lock=s@ in-format|F=s kw! verbose|v+ incremental! sync!), @c_opt ],
'convert' => [ 'LOCATION...|--stdin',
'one-time conversion from URL or filesystem to another format',
qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s
'git-config(1) wrapper for '._config_path($_[0]);
}, qw(config-file|system|global|file|f=s), # for conflict detection
qw(c=s@ C=s@), pass_through('git config') ],
+'inspect' => [ 'ITEMS...', 'inspect lei/store and/or local external',
+ qw(pretty ascii dir=s), @c_opt ],
+
'init' => [ '[DIRNAME]', sub {
"initialize storage, default: ".store_path($_[0]);
}, @c_opt ],
my ($self, $eml, $vmd) = @_;
my $xoids = $self->{lei}->{ale}->xoids_for($eml);
if (my $all_vmd = $self->{all_vmd}) {
- $vmd //= {};
@$vmd{keys %$all_vmd} = values %$all_vmd;
}
$self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
sub input_maildir_cb { # maildir_each_eml cb
my ($f, $kw, $eml, $self) = @_;
- input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef);
+ my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+ if ($self->{-mail_sync}) {
+ if ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) { # ugh...
+ $vmd->{sync_info} = [ "maildir:$1", \(my $n = $2) ];
+ } else {
+ warn "E: $f was not from a Maildir?\n";
+ }
+ }
+ input_eml_cb($self, $eml, $vmd);
}
-sub input_net_cb { # imap_each, nntp_each cb
+sub input_imap_cb { # imap_each
my ($url, $uid, $kw, $eml, $self) = @_;
+ my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+ $vmd->{sync_info} = [ $url, $uid ] if $self->{-mail_sync};
+ input_eml_cb($self, $eml, $vmd);
+}
+
+sub input_nntp_cb { # nntp_each
+ my ($url, $num, $kw, $eml, $self) = @_;
input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef);
}
return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
$self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod;
$self->prepare_inputs($lei, \@inputs) or return;
+ $self->{-mail_sync} = $lei->{opt}->{sync} // 1;
+
$lei->ale; # initialize for workers to read
my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
if (my $net = $lei->{net}) {
my $ifmt = lc($lei->{opt}->{'in-format'} // '');
# TODO auto-detect?
if ($input =~ m!\Aimaps?://!i) {
- $lei->{net}->imap_each($input, $self->can('input_net_cb'),
+ $lei->{net}->imap_each($input, $self->can('input_imap_cb') //
+ $self->can('input_net_cb'),
$self, @args);
return;
} elsif ($input =~ m!\A(?:nntps?|s?news)://!i) {
- $lei->{net}->nntp_each($input, $self->can('input_net_cb'),
+ $lei->{net}->nntp_each($input, $self->can('input_nntp_cb') //
+ $self->can('input_net_cb'),
$self, @args);
return;
}
sub prepare_inputs { # returns undef on error
my ($self, $lei, $inputs) = @_;
my $in_fmt = $lei->{opt}->{'in-format'};
+ my $sync = $lei->{opt}->{sync} ? {} : undef; # using LeiMailSync
if ($lei->{opt}->{stdin}) {
@$inputs and return
$lei->fail("--stdin and @$inputs do not mix");
check_input_format($lei) or return;
push @$inputs, '/dev/stdin';
+ push @{$sync->{no}}, '/dev/stdin' if $sync;
}
my $net = $lei->{net}; # NetWriter may be created by l2m
my (@f, @d);
require PublicInbox::NetReader;
$net //= PublicInbox::NetReader->new;
$net->add_url($input);
+ if ($sync) {
+ if ($input =~ m!\Aimaps?://!) {
+ push @{$sync->{ok}}, $input;
+ } else {
+ push @{$sync->{no}}, $input;
+ }
+ }
} elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
my $ifmt = lc $1;
if (($in_fmt // $ifmt) ne $ifmt) {
--in-format=$in_fmt and `$ifmt:' conflict
}
+ if ($sync) {
+ if ($ifmt =~ /\A(?:maildir|mh)\z/i) {
+ push @{$sync->{ok}}, $input;
+ } else {
+ push @{$sync->{no}}, $input;
+ }
+ }
my $devfd = $lei->path_to_fd($input_path) // return;
if ($devfd >= 0 || (-f $input_path || -p _)) {
require PublicInbox::MboxLock;
require PublicInbox::MdirReader;
$ifmt eq 'maildir' or return
$lei->fail("$ifmt not supported");
+ $input = $lei->abs_path($input) if $sync;
} else {
return $lei->fail("Unable to handle $input");
}
$input is `eml', not --in-format=$in_fmt
require PublicInbox::Eml;
+ push @{$sync->{no}}, $input if $sync;
} else {
my $devfd = $lei->path_to_fd($input) // return;
if ($devfd >= 0 || -f $input || -p _) {
- push @f, $input
+ push @{$sync->{no}}, $input if $sync;
+ push @f, $input;
} elsif (-d $input) {
- push @d, $input
+ if ($sync) {
+ $input = $lei->abs_path($input);
+ push @{$sync->{ok}}, $input;
+ }
+ push @d, $input;
} else {
return $lei->fail("Unable to handle $input")
}
if (@d) { # TODO: check for MH vs Maildir, here
require PublicInbox::MdirReader;
}
+ if ($sync && $sync->{no}) {
+ return $lei->fail(<<"") if !$sync->{ok};
+--sync specified but no inputs support it
+
+ # non-fatal if some inputs support support sync
+ $lei->err("# --sync will only be used for @{$sync->{ok}}");
+ $lei->err("# --sync is not supported for: @{$sync->{no}}");
+ }
if ($net) {
if (my $err = $net->errors) {
return $lei->fail($err);
--- /dev/null
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei inspect" general purpose inspector for stuff in SQLite and
+# Xapian. Will eventually be useful with plain public-inboxes,
+# not just lei/store. This is totally half-baked at the moment
+# but useful for testing.
+package PublicInbox::LeiInspect;
+use strict;
+use v5.10.1;
+use PublicInbox::Config;
+
+sub inspect_blob ($$) {
+ my ($lei, $oidhex) = @_;
+ my $ent = {};
+ if (my $lse = $lei->{lse}) {
+ my @docids = $lse ? $lse->over->blob_exists($oidhex) : ();
+ $ent->{'lei/store'} = \@docids if @docids;
+ my $lms = $lse->lms;
+ if (my $loc = $lms ? $lms->locations_for($oidhex) : undef) {
+ $ent->{sync} = $loc;
+ }
+ }
+ $ent;
+}
+
+sub inspect_sync_folder ($$) {
+ my ($lei, $folder) = @_;
+ my $ent = {};
+ my $lse = $lei->{lse} or return $ent;
+ my $lms = $lse->lms or return $ent;
+ my @folders;
+ if ($folder =~ m!\Aimaps?://!i) {
+ require PublicInbox::URIimap;
+ my $uri = PublicInbox::URIimap->new($folder)->canonical;
+ if (defined($uri->uidvalidity)) {
+ $folders[0] = $$uri;
+ } else {
+ my @maybe = $lms->folders($$uri);
+ @folders = grep {
+ my $u = PublicInbox::URIimap->new($_);
+ $uri->uidvalidity($u->uidvalidity);
+ $$uri eq $$u;
+ } @maybe;
+ }
+ } elsif ($folder =~ m!\A(maildir|mh):(.+)!i) {
+ my $type = $1;
+ $folders[0] = "$type:".$lei->abs_path($2);
+ } elsif (-d $folder) {
+ $folders[0] = 'maildir:'.$lei->abs_path($folder);
+ } else {
+ $lei->fail("$folder not understood");
+ }
+ $lei->qerr("# no folders match $folder (non-fatal)") if !@folders;
+ for my $f (@folders) {
+ $ent->{$f} = $lms->location_stats($f); # may be undef
+ }
+ $ent
+}
+
+sub inspect1 ($$$) {
+ my ($lei, $item, $more) = @_;
+ my $ent;
+ if ($item =~ /\Ablob:(.+)/) {
+ $ent = inspect_blob($lei, $1);
+ } elsif ($item =~ m!\Aimaps?://!i ||
+ $item =~ m!\A(?:maildir|mh):!i || -d $item) {
+ $ent = inspect_sync_folder($lei, $item);
+ } else { # TODO: more things
+ return $lei->fail("$item not understood");
+ }
+ $lei->out($lei->{json}->encode($ent));
+ $lei->out(',') if $more;
+ 1;
+}
+
+sub lei_inspect {
+ my ($lei, @argv) = @_;
+ $lei->{1}->autoflush(0);
+ my $multi = scalar(@argv) > 1;
+ $lei->out('[') if $multi;
+ $lei->{json} = ref(PublicInbox::Config::json())->new->utf8->canonical;
+ $lei->{lse} = ($lei->{opt}->{external} // 1) ? do {
+ my $sto = $lei->_lei_store;
+ $sto ? $sto->search : undef;
+ } : undef;
+ if ($lei->{opt}->{pretty} || -t $lei->{1}) {
+ $lei->{json}->pretty(1)->indent(2);
+ }
+ while (defined(my $x = shift @argv)) {
+ inspect1($lei, $x, scalar(@argv)) or return;
+ }
+ $lei->out(']') if $multi;
+}
+
+1;
$qp
}
+sub lms {
+ my ($self) = @_;
+ require PublicInbox::LeiMailSync;
+ my $f = "$self->{topdir}/mail_sync.sqlite3";
+ -f $f ? PublicInbox::LeiMailSync->new($f) : undef;
+}
+
1;
\@docids;
}
+sub set_sync_info ($$$) {
+ my ($self, $oidhex, $sync_info) = @_;
+ ($self->{lms} //= do {
+ require PublicInbox::LeiMailSync;
+ my $f = "$self->{priv_eidx}->{topdir}/mail_sync.sqlite3";
+ my $lms = PublicInbox::LeiMailSync->new($f);
+ $lms->lms_begin;
+ $lms;
+ })->set_src($oidhex, @$sync_info);
+}
+
sub add_eml {
my ($self, $eml, $vmd, $xoids) = @_;
my $im = $self->importer; # may create new epoch
my ($eidx, $tl) = eidx_init($self); # updates/writes alternates file
my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this
my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg';
- $im->add($eml, undef, $smsg) or return; # duplicate returns undef
+ my $im_mark = $im->add($eml, undef, $smsg);
+ if ($vmd && $vmd->{sync_info}) {
+ set_sync_info($self, $smsg->{blob}, $vmd->{sync_info});
+ }
+ $im_mark or return; # duplicate blob returns undef
local $self->{current_info} = $smsg->{blob};
my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches
warn $err;
}
}
+ if (my $lms = delete $self->{lms}) {
+ $lms->lms_commit;
+ }
$self->{priv_eidx}->done; # V2Writable::done
xchg_stderr($self);
die $err if $err;
my $host_port = tcp_host_port($sock);
undef $sock;
test_lei({ tmpdir => $tmpdir }, sub {
+ my $url = "imap://$host_port/t.v2.0";
+
lei_ok(qw(q z:1..));
my $out = json_utf8->decode($lei_out);
is_deeply($out, [ undef ], 'nothing imported, yet');
- lei_ok('import', "imap://$host_port/t.v2.0");
+
+ lei_ok('inspect', $url);
+ is_deeply(json_utf8->decode($lei_out), {}, 'no inspect stats, yet');
+
+ lei_ok('import', $url);
+
+ lei_ok('inspect', $url);
+ my $inspect = json_utf8->decode($lei_out);
+ my @k = keys %$inspect;
+ is(scalar(@k), 1, 'one URL resolved');
+ like($k[0], qr!\A\Q$url\E;UIDVALIDITY=\d+\z!, 'inspect URL matches');
+ my $stats = $inspect->{$k[0]};
+ is_deeply([ sort keys %$stats ],
+ [ qw(uid.count uid.max uid.min) ], 'keys match');
+ ok($stats->{'uid.min'} < $stats->{'uid.max'}, 'min < max');
+ ok($stats->{'uid.count'} > 0, 'count > 0');
+
lei_ok(qw(q z:1..));
$out = json_utf8->decode($lei_out);
ok(scalar(@$out) > 1, 'got imported messages');
my %r;
for (@$out) { $r{ref($_)}++ }
is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes');
- lei_ok([qw(tag +kw:seen), "imap://$host_port/t.v2.0"], undef, undef);
+ lei_ok([qw(tag +kw:seen), $url], undef, undef);
my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3";
ok(-s $f, 'net tracked for redundant imports');
+ lei_ok('inspect', "blob:$out->[5]->{blob}");
+ my $x = json_utf8->decode($lei_out);
+ is(ref($x->{'lei/store'}), 'ARRAY', 'lei/store in inspect');
+ is(ref($x->{sync}), 'HASH', 'sync in inspect');
+ is(ref($x->{sync}->{$k[0]}), 'ARRAY', 'UID arrays in inspect');
});
done_testing;
BAIL_OUT "symlink $md $!";
lei_ok(qw(import), $md, \'import Maildir');
my $imp_err = $lei_err;
+
+ my %i;
+ lei_ok('inspect', $md); $i{no_type} = $lei_out;
+ lei_ok('inspect', "maildir:$md"), $i{with_type} = $lei_out;
+ lei_ok(['inspect', $md], undef, { -C => $ENV{HOME}, %$lei_opt });
+ $i{rel_no_type} = $lei_out;
+ lei_ok(['inspect', "maildir:$md"], undef,
+ { -C => $ENV{HOME}, %$lei_opt });
+ $i{rel_with_type} = $lei_out;
+ my %v = map { $_ => 1 } values %i;
+ is(scalar(keys %v), 1, 'inspect handles relative and absolute paths');
+ my $inspect = json_utf8->decode([ keys %v ]->[0]);
+ is_deeply($inspect, {"maildir:$md" => { 'name.count' => 1 }},
+ 'inspect maildir: path had expected output');
+
lei_ok(qw(q s:boolean));
my $res = json_utf8->decode($lei_out);
like($res->[0]->{'s'}, qr/use boolean/, 'got expected result')
is_deeply($res->[0]->{kw}, ['seen'], 'keyword set');
is($res->[1], undef, 'only got one result');
+ lei_ok('inspect', "blob:$res->[0]->{blob}");
+ $inspect = json_utf8->decode($lei_out);
+ is(ref(delete $inspect->{"lei/store"}), 'ARRAY', 'lei/store IDs');
+ is_deeply($inspect, { sync => { "maildir:$md" => [ 'x:2,S' ] } },
+ 'maildir sync info as expected');
+
lei_ok(qw(import), $md, \'import Maildir again');
$imp_err = $lei_err;
lei_ok(qw(q -d none s:boolean), \'lei q w/o dedupe');