X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLEI.pm;h=e6f763e1067869e0a7c6d3bb8a90e2b544f45e0d;hb=7fc6e30aeab9925bece4bb00f88bb91af5646aa2;hp=8adf70faf2d70dbcf629e103bdf9fd589b2673da;hpb=5f73b20f0579eb3d070b20cb180208ffe2a40787;p=public-inbox.git diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 8adf70fa..e6f763e1 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -28,14 +28,15 @@ use Time::HiRes qw(stat); # ctime comparisons for config cache use File::Path qw(mkpath); use File::Spec; our $quit = \&CORE::exit; -our ($current_lei, $errors_log, $listener, $oldset, $dir_idle); -my ($recv_cmd, $send_cmd); +our ($current_lei, $errors_log, $listener, $oldset, $dir_idle, + $recv_cmd, $send_cmd); my $GLP = Getopt::Long::Parser->new; $GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); my $GLP_PASS = Getopt::Long::Parser->new; $GLP_PASS->configure(qw(gnu_getopt no_ignore_case auto_abbrev pass_through)); our %PATH2CFG; # persistent for socket daemon +our $MDIR2CFGPATH; # /path/to/maildir => { /path/to/config => [ ino watches ] } # TBD: this is a documentation mechanism to show a subcommand # (may) pass options through to another command: @@ -116,6 +117,12 @@ sub cache_dir ($) { .'/lei'); } +sub url_folder_cache { + my ($self) = @_; + require PublicInbox::SharedKV; # URI => updated_at_sec_ + PublicInbox::SharedKV->new(cache_dir($self).'/uri_folder'); +} + sub ale { my ($self) = @_; $self->{ale} //= do { @@ -127,7 +134,7 @@ sub ale { sub index_opt { # TODO: drop underscore variants everywhere, they're undocumented qw(fsync|sync! jobs|j=i indexlevel|L=s compact - max_size|max-size=s sequential_shard|sequential-shard + max_size|max-size=s sequential-shard batch_size|batch-size=s skip-docdata) } @@ -196,6 +203,8 @@ our %CMD = ( # sorted in order of importance/use: 'ls-label' => [ '', 'list labels', qw(z|0 stats:s), @c_opt ], 'ls-mail-sync' => [ '[FILTER]', 'list mail sync folders', qw(z|0 globoff|g invert-match|v local remote), @c_opt ], +'ls-mail-source' => [ 'URL', 'list IMAP or NNTP mail source folders', + qw(z|0 ascii l url), @c_opt ], 'forget-external' => [ 'LOCATION...|--prune', 'exclude further results from a publicinbox|extindex', qw(prune), @c_opt ], @@ -223,11 +232,12 @@ our %CMD = ( # sorted in order of importance/use: 'remove imported messages from IMAP, Maildirs, and MH', qw(exact! all jobs:i indexed), @c_opt ], -'add-watch' => [ 'LOCATION', 'watch for new messages and flag changes', - qw(import! kw! interval=s recursive|r - exclude=s include=s), @c_opt ], +'add-watch' => [ 'LOCATION...', 'watch for new messages and flag changes', + qw(poll-interval=s state=s recursive|r), @c_opt ], +'rm-watch' => [ 'LOCATION...', 'remove specified watch(es)', + qw(recursive|r), @c_opt ], 'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status', - qw(format|f=s z), @c_opt ], + qw(l z|0), @c_opt ], 'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote), @c_opt ], 'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote), @c_opt ], 'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', @@ -240,11 +250,13 @@ our %CMD = ( # sorted in order of importance/use: @c_opt ], 'import' => [ 'LOCATION...|--stdin', 'one-time import/update from URL or filesystem', - qw(stdin| offset=i recursive|r exclude=s include|I=s + qw(stdin| offset=i recursive|r exclude=s include|I=s jobs=s new-only lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!), qw(no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt(), @c_opt ], 'forget-mail-sync' => [ 'LOCATION...', 'forget sync information for a mail folder', @c_opt ], +'prune-mail-sync' => [ 'LOCATION...|--all', + 'prune dangling sync data for a mail folder', 'all:s', @c_opt ], 'export-kw' => [ 'LOCATION...|--all', 'one-time export of keywords of sync sources', qw(all:s mode=s), @c_opt ], @@ -313,6 +325,7 @@ my %OPTDESC = ( 'path-a|a=s' => 'pre-image pathname associated with OID', 'path-b|b=s' => 'post-image pathname associated with OID', 'git-dir=s@' => 'additional git repository to scan', +'dir=s inspect' => 'specify a inboxdir, extindex topdir or Xapian shard', 'proxy=s' => [ 'PROTO://HOST[:PORT]', # shared with curl(1) "proxy for (e.g. `socks5h://0:9050')" ], 'torsocks=s' => ['VAL|auto|no|yes', @@ -343,6 +356,7 @@ my %OPTDESC = ( "or\xa0`-'\x{a0}for\x{a0}stdout)" ], 'mua=s' => [ 'CMD', "MUA to run on --output Maildir or mbox (e.g.\xa0`mutt\xa0-f\xa0%f')" ], +'new-only import' => 'only import new messages from IMAP source', 'inbox-version=i' => [ 'NUM|1|2', 'force a public-inbox version with --mirror'], @@ -357,7 +371,7 @@ my %OPTDESC = ( 'do not index messages larger than SIZE (default: infinity)' ], 'batch_size|batch-size=s' => [ 'SIZE', 'flush changes to OS after given number of bytes (default: 1m)' ], -'sequential_shard|sequential-shard' => +'sequential-shard' => 'index Xapian shards sequentially for slow storage', 'skip-docdata' => 'drop compatibility w/ public-inbox <1.6 to save ~1.5% space', @@ -379,6 +393,9 @@ my %OPTDESC = ( 'format|f=s ls-search' => ['OUT|json|jsonl|concatjson', 'listing output format' ], 'l ls-search' => 'long listing format', +'l ls-watch' => 'long listing format', +'l ls-mail-source' => 'long listing format', +'url ls-mail-source' => 'show full URL of newsgroup or IMAP folder', 'format|f=s ls-external' => $ls_format, 'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ], @@ -421,7 +438,7 @@ my %CONFIG_KEYS = ( 'leistore.dir' => 'top-level storage location', ); -my @WQ_KEYS = qw(lxs l2m wq1 ikw); # internal workers +my @WQ_KEYS = qw(lxs l2m ikw pmd wq1 lne); # internal workers sub _drop_wq { my ($self) = @_; @@ -441,6 +458,7 @@ sub x_it ($$) { # make sure client sees stdout before exit $self->{1}->autoflush(1) if $self->{1}; dump_and_clear_log(); + stop_pager($self); if ($self->{pkt_op_p}) { # to top lei-daemon $self->{pkt_op_p}->pkt_do('x_it', $code); } elsif ($self->{sock}) { # to lei(1) client @@ -523,7 +541,7 @@ sub _lei_atfork_child { chdir '/' or die "chdir(/): $!"; close($_) for (grep(defined, delete @$self{qw(0 1 2 sock)})); if (my $cfg = $self->{cfg}) { - delete $cfg->{-lei_store}; + delete @$cfg{qw(-lei_store -watches -lei_note_event)}; } } else { # worker, Net::NNTP (Net::Cmd) uses STDERR directly open STDERR, '+>&='.fileno($self->{2}) or warn "open $!"; @@ -538,8 +556,10 @@ sub _lei_atfork_child { } close $listener if $listener; undef $listener; - undef $dir_idle; + $dir_idle->force_close if $dir_idle; %PATH2CFG = (); + $MDIR2CFGPATH = {}; + eval 'no warnings; undef $PublicInbox::LeiNoteEvent::to_flush'; undef $errors_log; $quit = \&CORE::exit; $self->{-eml_noisy} or # only "lei import" sets this atm @@ -565,19 +585,26 @@ sub pkt_op_pair { $end; } +sub incr { + my ($self, $field, $nr) = @_; + $self->{counters}->{$field} += $nr; +} + sub workers_start { - my ($lei, $wq, $jobs, $ops) = @_; + my ($lei, $wq, $jobs, $ops, $flds) = @_; $ops = { '!' => [ \&fail_handler, $lei ], '|' => [ \&sigpipe_handler, $lei ], 'x_it' => [ \&x_it, $lei ], 'child_error' => [ \&child_error, $lei ], + 'incr' => [ \&incr, $lei ], ($ops ? %$ops : ()), }; $ops->{''} //= [ $wq->can('_lei_wq_eof') || \&wq_eof, $lei ]; my $end = $lei->pkt_op_pair; my $ident = $wq->{-wq_ident} // "lei-$lei->{cmd} worker"; - $wq->wq_workers_start($ident, $jobs, $lei->oldset, { lei => $lei }); + $flds->{lei} = $lei; + $wq->wq_workers_start($ident, $jobs, $lei->oldset, $flds); delete $lei->{pkt_op_p}; my $op_c = delete $lei->{pkt_op_c}; @$end = (); @@ -585,6 +612,15 @@ sub workers_start { ($op_c, $ops); } +# call this when we're ready to wait on events and yield to other clients +sub wait_wq_events { + my ($lei, $op_c, $ops) = @_; + for my $wq (grep(defined, @$lei{qw(ikw pmd)})) { # auxiliary WQs + $wq->wq_close(1); + } + $op_c->{ops} = $ops; +} + sub _help { require PublicInbox::LeiHelp; PublicInbox::LeiHelp::call($_[0], $_[1], \%CMD, \%OPTDESC); @@ -732,11 +768,7 @@ sub dispatch { next if $d eq ''; # same as git(1) chdir $d or return fail($self, "cd $d: $!"); } - if (delete $self->{3}) { # update cwd for rel2abs - opendir my $dh, '.' or - return fail($self, "opendir . $!"); - $self->{3} = $dh; - } + open $self->{3}, '.' or return fail($self, "open . $!"); } $cb->($self, @argv); } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only @@ -754,10 +786,12 @@ sub _lei_cfg ($;$) { my $f = _config_path($self); my @st = stat($f); my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size - my ($sto, $sto_dir); + my ($sto, $sto_dir, $watches, $lne); if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case return ($self->{cfg} = $cfg) if $cur_st eq $cfg->{-st}; - ($sto, $sto_dir) = @$cfg{qw(-lei_store leistore.dir)}; + ($sto, $sto_dir, $watches, $lne) = + @$cfg{qw(-lei_store leistore.dir -watches + -lei_note_event)}; } if (!@st) { unless ($creat) { @@ -778,6 +812,8 @@ sub _lei_cfg ($;$) { eq File::Spec->canonpath($cfg->{'leistore.dir'} // store_path($self))) { $cfg->{-lei_store} = $sto; + $cfg->{-lei_note_event} = $lne; + $cfg->{-watches} = $watches if $watches; } if (scalar(keys %PATH2CFG) > 5) { # FIXME: use inotify/EVFILT_VNODE to detect unlinked configs @@ -786,11 +822,13 @@ sub _lei_cfg ($;$) { } } $self->{cfg} = $PATH2CFG{$f} = $cfg; + refresh_watches($self); + $cfg; } sub _lei_store ($;$) { my ($self, $creat) = @_; - my $cfg = _lei_cfg($self, $creat); + my $cfg = _lei_cfg($self, $creat) // return; $cfg->{-lei_store} //= do { require PublicInbox::LeiStore; my $dir = $cfg->{'leistore.dir'} // store_path($self); @@ -1068,6 +1106,7 @@ sub event_step { } die "unrecognized client signal: $buf"; } + _drop_wq($self); # EOF, client disconnected dclose($self); }; if (my $err = $@) { @@ -1078,10 +1117,11 @@ sub event_step { sub event_step_init { my ($self) = @_; - return if $self->{-event_init_done}++; - if (my $sock = $self->{sock}) { # using DS->EventLoop + my $sock = $self->{sock} or return; + $self->{-event_init_done} //= do { # persist til $ops done $self->SUPER::new($sock, EPOLLIN|EPOLLET); - } + $sock; + }; } sub noop {} @@ -1097,6 +1137,53 @@ sub dump_and_clear_log { } } +sub cfg2lei ($) { + my ($cfg) = @_; + my $lei = bless { env => { %{$cfg->{-env}} } }, __PACKAGE__; + open($lei->{0}, '<&', \*STDIN) or die "dup 0: $!"; + open($lei->{1}, '>>&', \*STDOUT) or die "dup 1: $!"; + open($lei->{2}, '>>&', \*STDERR) or die "dup 2: $!"; + open($lei->{3}, '/') or die "open /: $!"; + chdir($lei->{3}) or die "chdir /': $!"; + my ($x, $y); + socketpair($x, $y, AF_UNIX, SOCK_SEQPACKET, 0) or die "socketpair: $!"; + $lei->{sock} = $x; + require PublicInbox::LeiSelfSocket; + PublicInbox::LeiSelfSocket->new($y); # adds to event loop + $lei; +} + +sub dir_idle_handler ($) { # PublicInbox::DirIdle callback + my ($ev) = @_; # Linux::Inotify2::Event or duck type + my $fn = $ev->fullname; + if ($fn =~ m!\A(.+)/(new|cur)/([^/]+)\z!) { # Maildir file + my ($mdir, $nc, $bn) = ($1, $2, $3); + $nc = '' if $ev->IN_DELETE; + for my $f (keys %{$MDIR2CFGPATH->{$mdir} // {}}) { + my $cfg = $PATH2CFG{$f} // next; + eval { + local %ENV = %{$cfg->{-env}}; + my $lei = cfg2lei($cfg); + $lei->dispatch('note-event', + "maildir:$mdir", $nc, $bn, $fn); + }; + warn "E note-event $f: $@\n" if $@; + } + } + if ($ev->can('cancel') && ($ev->IN_IGNORE || $ev->IN_UNMOUNT)) { + $ev->cancel; + } + if ($fn =~ m!\A(.+)/(?:new|cur)\z! && !-e $fn) { + delete $MDIR2CFGPATH->{$1}; + } + if (!-e $fn) { # config file or Maildir gone + for my $cfgpaths (values %$MDIR2CFGPATH) { + delete $cfgpaths->{$fn}; + } + delete $PATH2CFG{$fn}; + } +} + # lei(1) calls this when it can't connect sub lazy_start { my ($path, $errno, $narg) = @_; @@ -1118,7 +1205,6 @@ sub lazy_start { } umask(077) // die("umask(077): $!"); bind($listener, $addr) or die "bind($path): $!"; - listen($listener, 1024) or die "listen: $!"; $lk->lock_release; undef $lk; my @st = stat($path) or die "stat($path): $!"; @@ -1146,6 +1232,7 @@ sub lazy_start { return if $pid; $0 = "lei-daemon $path"; local %PATH2CFG; + local $MDIR2CFGPATH; $listener->blocking(0); my $exit_code; my $pil = PublicInbox::Listener->new($listener, \&accept_dispatch); @@ -1175,8 +1262,8 @@ sub lazy_start { local $SIG{PIPE} = 'IGNORE'; require PublicInbox::DirIdle; local $dir_idle = PublicInbox::DirIdle->new([$sock_dir], sub { - # just rely on wakeup ot hit PostLoopCallback set below - _dir_idle_handler(@_) if $_[0]->fullname ne $path; + # just rely on wakeup to hit PostLoopCallback set below + dir_idle_handler($_[0]) if $_[0]->fullname ne $path; }, 1); if ($sigfd) { undef $sigfd; # unref, already in DS::DescriptorMap @@ -1232,6 +1319,12 @@ sub busy { 1 } # prevent daemon-shutdown if client is connected # can immediately reread it sub DESTROY { my ($self) = @_; + if (my $counters = delete $self->{counters}) { + for my $k (sort keys %$counters) { + my $nr = $counters->{$k}; + $self->child_error(1 << 8, "$nr $k messages"); + } + } $self->{1}->autoflush(1) if $self->{1}; stop_pager($self); # preserve $? for ->fail or ->x_it code @@ -1258,4 +1351,67 @@ sub wq_eof { # EOF callback for main daemon $wq1->wq_wait_old(\&wq_done_wait, $lei); } +sub watch_state_ok ($) { + my ($state) = $_[-1]; # $_[0] may be $self + $state =~ /\Apause|(?:import|index|tag)-(?:ro|rw)\z/; +} + +sub cancel_maildir_watch ($$) { + my ($d, $cfg_f) = @_; + my $w = delete $MDIR2CFGPATH->{$d}->{$cfg_f}; + scalar(keys %{$MDIR2CFGPATH->{$d}}) or + delete $MDIR2CFGPATH->{$d}; + for my $x (@{$w // []}) { $x->cancel } +} + +sub refresh_watches { + my ($lei) = @_; + my $cfg = _lei_cfg($lei) or return; + my $old = $cfg->{-watches}; + my $watches = $cfg->{-watches} //= {}; + my %seen; + my $cfg_f = $cfg->{'-f'}; + for my $w (grep(/\Awatch\..+\.state\z/, keys %$cfg)) { + my $url = substr($w, length('watch.'), -length('.state')); + require PublicInbox::LeiWatch; + my $lw = $watches->{$url} //= PublicInbox::LeiWatch->new($url); + $seen{$url} = undef; + my $state = $cfg->get_1("watch.$url", 'state'); + if (!watch_state_ok($state)) { + $lei->err("watch.$url.state=$state not supported"); + next; + } + if ($url =~ /\Amaildir:(.+)/i) { + my $d = File::Spec->canonpath($1); + if ($state eq 'pause') { + cancel_maildir_watch($d, $cfg_f); + } elsif (!exists($MDIR2CFGPATH->{$d}->{$cfg_f})) { + my @w = $dir_idle->add_watches( + ["$d/cur", "$d/new"], 1); + push @{$MDIR2CFGPATH->{$d}->{$cfg_f}}, @w if @w; + } + } else { # TODO: imap/nntp/jmap + $lei->child_error(1, + "E: watch $url not supported, yet"); + } + } + if ($old) { # cull old non-existent entries + for my $url (keys %$old) { + next if exists $seen{$url}; + delete $old->{$url}; + if ($url =~ /\Amaildir:(.+)/i) { + my $d = File::Spec->canonpath($1); + cancel_maildir_watch($d, $cfg_f); + } else { # TODO: imap/nntp/jmap + $lei->child_error(1, "E: watch $url TODO"); + } + } + } + if (scalar keys %$watches) { + $cfg->{-env} //= { %{$lei->{env}}, PWD => '/' }; # for cfg2lei + } else { + delete $cfg->{-watches}; + } +} + 1;