From b6b86cfd238c170ea3e2c4d4179f06c7af139086 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 24 Apr 2021 09:28:46 +0000 Subject: [PATCH] lei import: keep sync info for Maildir and IMAP folders We aren't using it, yet, but the plan is to be able to use this information to propagate keyword changes back to IMAP and Maildir folders using some to-be-implemented command. "lei inspect" is a half-baked new command to make testing this change easier. It will be updated to support more SQLite+Xapian introspection duties in the future, including public-inbox things independent of lei. --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 16 ++++-- lib/PublicInbox/LeiImport.pm | 22 ++++++-- lib/PublicInbox/LeiInput.pm | 41 +++++++++++++-- lib/PublicInbox/LeiInspect.pm | 96 +++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiSearch.pm | 7 +++ lib/PublicInbox/LeiStore.pm | 20 +++++++- t/lei-import-imap.t | 27 +++++++++- t/lei-import-maildir.t | 21 ++++++++ 9 files changed, 238 insertions(+), 13 deletions(-) create mode 100644 lib/PublicInbox/LeiInspect.pm diff --git a/MANIFEST b/MANIFEST index abaf54b0..79d393c5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -199,6 +199,7 @@ lib/PublicInbox/LeiHelp.pm lib/PublicInbox/LeiImport.pm lib/PublicInbox/LeiInit.pm lib/PublicInbox/LeiInput.pm +lib/PublicInbox/LeiInspect.pm lib/PublicInbox/LeiLsLabel.pm lib/PublicInbox/LeiLsSearch.pm lib/PublicInbox/LeiMailSync.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 9f49fc03..39278de6 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -64,9 +64,13 @@ sub opt_dash ($$) { ($spec, '<>' => $cb, $GLP_PASS) # for Getopt::Long } -sub rel2abs ($$) { +# rel2abs preserves symlinks in parent, unlike abs_path +sub rel2abs { my ($self, $p) = @_; - return $p if index($p, '/') == 0; # already absolute + if (index($p, '/') == 0) { # already absolute + $p =~ tr!/!/!s; # squeeze redundant slashes + return $p; + } my $pwd = $self->{env}->{PWD}; my $cwd; if (defined $pwd) { @@ -84,6 +88,9 @@ sub rel2abs ($$) { File::Spec->rel2abs($p, $pwd); } +# abs_path resolves symlinks in parent iff all parents exist +sub abs_path { Cwd::abs_path($_[1]) // rel2abs(@_) } + sub share_path ($) { # $HOME/.local/share/lei/$FOO my ($self) = @_; rel2abs($self, ($self->{env}->{XDG_DATA_HOME} // @@ -193,7 +200,7 @@ our %CMD = ( # sorted in order of importance/use: 'import' => [ 'LOCATION...|--stdin', 'one-time import/update from URL or filesystem', qw(stdin| offset=i recursive|r exclude=s include|I=s - lock=s@ in-format|F=s kw! verbose|v+ incremental!), @c_opt ], + lock=s@ in-format|F=s kw! verbose|v+ incremental! sync!), @c_opt ], 'convert' => [ 'LOCATION...|--stdin', 'one-time conversion from URL or filesystem to another format', qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s @@ -205,6 +212,9 @@ our %CMD = ( # sorted in order of importance/use: 'git-config(1) wrapper for '._config_path($_[0]); }, qw(config-file|system|global|file|f=s), # for conflict detection qw(c=s@ C=s@), pass_through('git config') ], +'inspect' => [ 'ITEMS...', 'inspect lei/store and/or local external', + qw(pretty ascii dir=s), @c_opt ], + 'init' => [ '[DIRNAME]', sub { "initialize storage, default: ".store_path($_[0]); }, @c_opt ], diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index e3c756e8..daaa6753 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -13,7 +13,6 @@ sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml, $vmd) = @_; my $xoids = $self->{lei}->{ale}->xoids_for($eml); if (my $all_vmd = $self->{all_vmd}) { - $vmd //= {}; @$vmd{keys %$all_vmd} = values %$all_vmd; } $self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids); @@ -31,11 +30,26 @@ sub input_mbox_cb { # MboxReader callback sub input_maildir_cb { # maildir_each_eml cb my ($f, $kw, $eml, $self) = @_; - input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef); + my $vmd = $self->{-import_kw} ? { kw => $kw } : undef; + if ($self->{-mail_sync}) { + if ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) { # ugh... + $vmd->{sync_info} = [ "maildir:$1", \(my $n = $2) ]; + } else { + warn "E: $f was not from a Maildir?\n"; + } + } + input_eml_cb($self, $eml, $vmd); } -sub input_net_cb { # imap_each, nntp_each cb +sub input_imap_cb { # imap_each my ($url, $uid, $kw, $eml, $self) = @_; + my $vmd = $self->{-import_kw} ? { kw => $kw } : undef; + $vmd->{sync_info} = [ $url, $uid ] if $self->{-mail_sync}; + input_eml_cb($self, $eml, $vmd); +} + +sub input_nntp_cb { # nntp_each + my ($url, $num, $kw, $eml, $self) = @_; input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef); } @@ -61,6 +75,8 @@ sub lei_import { # the main "lei import" method return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err}; $self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod; $self->prepare_inputs($lei, \@inputs) or return; + $self->{-mail_sync} = $lei->{opt}->{sync} // 1; + $lei->ale; # initialize for workers to read my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1; if (my $net = $lei->{net}) { diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 0114f5ee..d11d23d4 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -83,11 +83,13 @@ sub input_path_url { my $ifmt = lc($lei->{opt}->{'in-format'} // ''); # TODO auto-detect? if ($input =~ m!\Aimaps?://!i) { - $lei->{net}->imap_each($input, $self->can('input_net_cb'), + $lei->{net}->imap_each($input, $self->can('input_imap_cb') // + $self->can('input_net_cb'), $self, @args); return; } elsif ($input =~ m!\A(?:nntps?|s?news)://!i) { - $lei->{net}->nntp_each($input, $self->can('input_net_cb'), + $lei->{net}->nntp_each($input, $self->can('input_nntp_cb') // + $self->can('input_net_cb'), $self, @args); return; } @@ -130,11 +132,13 @@ EOM sub prepare_inputs { # returns undef on error my ($self, $lei, $inputs) = @_; my $in_fmt = $lei->{opt}->{'in-format'}; + my $sync = $lei->{opt}->{sync} ? {} : undef; # using LeiMailSync if ($lei->{opt}->{stdin}) { @$inputs and return $lei->fail("--stdin and @$inputs do not mix"); check_input_format($lei) or return; push @$inputs, '/dev/stdin'; + push @{$sync->{no}}, '/dev/stdin' if $sync; } my $net = $lei->{net}; # NetWriter may be created by l2m my (@f, @d); @@ -145,6 +149,13 @@ sub prepare_inputs { # returns undef on error require PublicInbox::NetReader; $net //= PublicInbox::NetReader->new; $net->add_url($input); + if ($sync) { + if ($input =~ m!\Aimaps?://!) { + push @{$sync->{ok}}, $input; + } else { + push @{$sync->{no}}, $input; + } + } } elsif ($input_path =~ s/\A([a-z0-9]+)://is) { my $ifmt = lc $1; if (($in_fmt // $ifmt) ne $ifmt) { @@ -152,6 +163,13 @@ sub prepare_inputs { # returns undef on error --in-format=$in_fmt and `$ifmt:' conflict } + if ($sync) { + if ($ifmt =~ /\A(?:maildir|mh)\z/i) { + push @{$sync->{ok}}, $input; + } else { + push @{$sync->{no}}, $input; + } + } my $devfd = $lei->path_to_fd($input_path) // return; if ($devfd >= 0 || (-f $input_path || -p _)) { require PublicInbox::MboxLock; @@ -162,6 +180,7 @@ sub prepare_inputs { # returns undef on error require PublicInbox::MdirReader; $ifmt eq 'maildir' or return $lei->fail("$ifmt not supported"); + $input = $lei->abs_path($input) if $sync; } else { return $lei->fail("Unable to handle $input"); } @@ -170,12 +189,18 @@ sub prepare_inputs { # returns undef on error $input is `eml', not --in-format=$in_fmt require PublicInbox::Eml; + push @{$sync->{no}}, $input if $sync; } else { my $devfd = $lei->path_to_fd($input) // return; if ($devfd >= 0 || -f $input || -p _) { - push @f, $input + push @{$sync->{no}}, $input if $sync; + push @f, $input; } elsif (-d $input) { - push @d, $input + if ($sync) { + $input = $lei->abs_path($input); + push @{$sync->{ok}}, $input; + } + push @d, $input; } else { return $lei->fail("Unable to handle $input") } @@ -185,6 +210,14 @@ $input is `eml', not --in-format=$in_fmt if (@d) { # TODO: check for MH vs Maildir, here require PublicInbox::MdirReader; } + if ($sync && $sync->{no}) { + return $lei->fail(<<"") if !$sync->{ok}; +--sync specified but no inputs support it + + # non-fatal if some inputs support support sync + $lei->err("# --sync will only be used for @{$sync->{ok}}"); + $lei->err("# --sync is not supported for: @{$sync->{no}}"); + } if ($net) { if (my $err = $net->errors) { return $lei->fail($err); diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm new file mode 100644 index 00000000..6cfc8083 --- /dev/null +++ b/lib/PublicInbox/LeiInspect.pm @@ -0,0 +1,96 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# "lei inspect" general purpose inspector for stuff in SQLite and +# Xapian. Will eventually be useful with plain public-inboxes, +# not just lei/store. This is totally half-baked at the moment +# but useful for testing. +package PublicInbox::LeiInspect; +use strict; +use v5.10.1; +use PublicInbox::Config; + +sub inspect_blob ($$) { + my ($lei, $oidhex) = @_; + my $ent = {}; + if (my $lse = $lei->{lse}) { + my @docids = $lse ? $lse->over->blob_exists($oidhex) : (); + $ent->{'lei/store'} = \@docids if @docids; + my $lms = $lse->lms; + if (my $loc = $lms ? $lms->locations_for($oidhex) : undef) { + $ent->{sync} = $loc; + } + } + $ent; +} + +sub inspect_sync_folder ($$) { + my ($lei, $folder) = @_; + my $ent = {}; + my $lse = $lei->{lse} or return $ent; + my $lms = $lse->lms or return $ent; + my @folders; + if ($folder =~ m!\Aimaps?://!i) { + require PublicInbox::URIimap; + my $uri = PublicInbox::URIimap->new($folder)->canonical; + if (defined($uri->uidvalidity)) { + $folders[0] = $$uri; + } else { + my @maybe = $lms->folders($$uri); + @folders = grep { + my $u = PublicInbox::URIimap->new($_); + $uri->uidvalidity($u->uidvalidity); + $$uri eq $$u; + } @maybe; + } + } elsif ($folder =~ m!\A(maildir|mh):(.+)!i) { + my $type = $1; + $folders[0] = "$type:".$lei->abs_path($2); + } elsif (-d $folder) { + $folders[0] = 'maildir:'.$lei->abs_path($folder); + } else { + $lei->fail("$folder not understood"); + } + $lei->qerr("# no folders match $folder (non-fatal)") if !@folders; + for my $f (@folders) { + $ent->{$f} = $lms->location_stats($f); # may be undef + } + $ent +} + +sub inspect1 ($$$) { + my ($lei, $item, $more) = @_; + my $ent; + if ($item =~ /\Ablob:(.+)/) { + $ent = inspect_blob($lei, $1); + } elsif ($item =~ m!\Aimaps?://!i || + $item =~ m!\A(?:maildir|mh):!i || -d $item) { + $ent = inspect_sync_folder($lei, $item); + } else { # TODO: more things + return $lei->fail("$item not understood"); + } + $lei->out($lei->{json}->encode($ent)); + $lei->out(',') if $more; + 1; +} + +sub lei_inspect { + my ($lei, @argv) = @_; + $lei->{1}->autoflush(0); + my $multi = scalar(@argv) > 1; + $lei->out('[') if $multi; + $lei->{json} = ref(PublicInbox::Config::json())->new->utf8->canonical; + $lei->{lse} = ($lei->{opt}->{external} // 1) ? do { + my $sto = $lei->_lei_store; + $sto ? $sto->search : undef; + } : undef; + if ($lei->{opt}->{pretty} || -t $lei->{1}) { + $lei->{json}->pretty(1)->indent(2); + } + while (defined(my $x = shift @argv)) { + inspect1($lei, $x, scalar(@argv)) or return; + } + $lei->out(']') if $multi; +} + +1; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index ff615d89..cd28a700 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -137,4 +137,11 @@ sub qparse_new { $qp } +sub lms { + my ($self) = @_; + require PublicInbox::LeiMailSync; + my $f = "$self->{topdir}/mail_sync.sqlite3"; + -f $f ? PublicInbox::LeiMailSync->new($f) : undef; +} + 1; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index f8371abf..1cf7ffc1 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -190,13 +190,28 @@ sub remove_eml_vmd { \@docids; } +sub set_sync_info ($$$) { + my ($self, $oidhex, $sync_info) = @_; + ($self->{lms} //= do { + require PublicInbox::LeiMailSync; + my $f = "$self->{priv_eidx}->{topdir}/mail_sync.sqlite3"; + my $lms = PublicInbox::LeiMailSync->new($f); + $lms->lms_begin; + $lms; + })->set_src($oidhex, @$sync_info); +} + sub add_eml { my ($self, $eml, $vmd, $xoids) = @_; my $im = $self->importer; # may create new epoch my ($eidx, $tl) = eidx_init($self); # updates/writes alternates file my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; - $im->add($eml, undef, $smsg) or return; # duplicate returns undef + my $im_mark = $im->add($eml, undef, $smsg); + if ($vmd && $vmd->{sync_info}) { + set_sync_info($self, $smsg->{blob}, $vmd->{sync_info}); + } + $im_mark or return; # duplicate blob returns undef local $self->{current_info} = $smsg->{blob}; my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches @@ -379,6 +394,9 @@ sub done { warn $err; } } + if (my $lms = delete $self->{lms}) { + $lms->lms_commit; + } $self->{priv_eidx}->done; # V2Writable::done xchg_stderr($self); die $err if $err; diff --git a/t/lei-import-imap.t b/t/lei-import-imap.t index 490ea9be..4a3bd6d8 100644 --- a/t/lei-import-imap.t +++ b/t/lei-import-imap.t @@ -12,10 +12,28 @@ my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-imapd: $?"); my $host_port = tcp_host_port($sock); undef $sock; test_lei({ tmpdir => $tmpdir }, sub { + my $url = "imap://$host_port/t.v2.0"; + lei_ok(qw(q z:1..)); my $out = json_utf8->decode($lei_out); is_deeply($out, [ undef ], 'nothing imported, yet'); - lei_ok('import', "imap://$host_port/t.v2.0"); + + lei_ok('inspect', $url); + is_deeply(json_utf8->decode($lei_out), {}, 'no inspect stats, yet'); + + lei_ok('import', $url); + + lei_ok('inspect', $url); + my $inspect = json_utf8->decode($lei_out); + my @k = keys %$inspect; + is(scalar(@k), 1, 'one URL resolved'); + like($k[0], qr!\A\Q$url\E;UIDVALIDITY=\d+\z!, 'inspect URL matches'); + my $stats = $inspect->{$k[0]}; + is_deeply([ sort keys %$stats ], + [ qw(uid.count uid.max uid.min) ], 'keys match'); + ok($stats->{'uid.min'} < $stats->{'uid.max'}, 'min < max'); + ok($stats->{'uid.count'} > 0, 'count > 0'); + lei_ok(qw(q z:1..)); $out = json_utf8->decode($lei_out); ok(scalar(@$out) > 1, 'got imported messages'); @@ -23,9 +41,14 @@ test_lei({ tmpdir => $tmpdir }, sub { my %r; for (@$out) { $r{ref($_)}++ } is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); - lei_ok([qw(tag +kw:seen), "imap://$host_port/t.v2.0"], undef, undef); + lei_ok([qw(tag +kw:seen), $url], undef, undef); my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3"; ok(-s $f, 'net tracked for redundant imports'); + lei_ok('inspect', "blob:$out->[5]->{blob}"); + my $x = json_utf8->decode($lei_out); + is(ref($x->{'lei/store'}), 'ARRAY', 'lei/store in inspect'); + is(ref($x->{sync}), 'HASH', 'sync in inspect'); + is(ref($x->{sync}->{$k[0]}), 'ARRAY', 'UID arrays in inspect'); }); done_testing; diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t index 6706b014..3e3d9188 100644 --- a/t/lei-import-maildir.t +++ b/t/lei-import-maildir.t @@ -12,6 +12,21 @@ test_lei(sub { BAIL_OUT "symlink $md $!"; lei_ok(qw(import), $md, \'import Maildir'); my $imp_err = $lei_err; + + my %i; + lei_ok('inspect', $md); $i{no_type} = $lei_out; + lei_ok('inspect', "maildir:$md"), $i{with_type} = $lei_out; + lei_ok(['inspect', $md], undef, { -C => $ENV{HOME}, %$lei_opt }); + $i{rel_no_type} = $lei_out; + lei_ok(['inspect', "maildir:$md"], undef, + { -C => $ENV{HOME}, %$lei_opt }); + $i{rel_with_type} = $lei_out; + my %v = map { $_ => 1 } values %i; + is(scalar(keys %v), 1, 'inspect handles relative and absolute paths'); + my $inspect = json_utf8->decode([ keys %v ]->[0]); + is_deeply($inspect, {"maildir:$md" => { 'name.count' => 1 }}, + 'inspect maildir: path had expected output'); + lei_ok(qw(q s:boolean)); my $res = json_utf8->decode($lei_out); like($res->[0]->{'s'}, qr/use boolean/, 'got expected result') @@ -19,6 +34,12 @@ test_lei(sub { is_deeply($res->[0]->{kw}, ['seen'], 'keyword set'); is($res->[1], undef, 'only got one result'); + lei_ok('inspect', "blob:$res->[0]->{blob}"); + $inspect = json_utf8->decode($lei_out); + is(ref(delete $inspect->{"lei/store"}), 'ARRAY', 'lei/store IDs'); + is_deeply($inspect, { sync => { "maildir:$md" => [ 'x:2,S' ] } }, + 'maildir sync info as expected'); + lei_ok(qw(import), $md, \'import Maildir again'); $imp_err = $lei_err; lei_ok(qw(q -d none s:boolean), \'lei q w/o dedupe'); -- 2.44.0