X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLEI.pm;h=b5bdda215008ebd95ba0bb49418e865112f1f44e;hb=3104d7492aa4aee07455dcad7449f786188afdf5;hp=e2f22a7564a4366e46a65a3a0af82833b57f5785;hpb=b436cf6e4794a32b3331a8727d10bf000ba55de2;p=public-inbox.git diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index e2f22a75..b5bdda21 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -22,13 +22,12 @@ use PublicInbox::Syscall qw(SFD_NONBLOCK EPOLLIN EPOLLET); use PublicInbox::Sigfd; use PublicInbox::DS qw(now dwaitpid); use PublicInbox::Spawn qw(spawn popen_rd); -use PublicInbox::OnDestroy; -use Text::Wrap qw(wrap); +use PublicInbox::Lock; use Time::HiRes qw(stat); # ctime comparisons for config cache use File::Path qw(mkpath); use File::Spec; our $quit = \&CORE::exit; -our ($current_lei, $errors_log, $listener); +our ($current_lei, $errors_log, $listener, $oldset); my ($recv_cmd, $send_cmd); my $GLP = Getopt::Long::Parser->new; $GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); @@ -98,80 +97,97 @@ sub _config_path ($) { .'/lei/config'); } -# TODO: generate shell completion + help using %CMD and %OPTDESC +sub index_opt { + # TODO: drop underscore variants everywhere, they're undocumented + qw(fsync|sync! jobs|j=i indexlevel|L=s compact + max_size|max-size=s sequential_shard|sequential-shard + batch_size|batch-size=s skip-docdata) +} + +# we generate shell completion + help using %CMD and %OPTDESC, +# see lei__complete() and PublicInbox::LeiHelp # command => [ positional_args, 1-line description, Getopt::Long option spec ] our %CMD = ( # sorted in order of importance/use: -'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( - save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a +'q' => [ '--stdin|SEARCH_TERMS...', 'search for messages matching terms', qw( + save-as=s output|mfolder|o=s format|f=s dedupe|d=s threads|t+ augment|a sort|s=s reverse|r offset=i remote! local! external! pretty - mua-cmd|mua=s no-torsocks torsocks=s verbose|v - received-after=s received-before=s sent-after=s sent-since=s), + include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g stdin| + import-remote! lock=s@ + alert=s@ mua=s no-torsocks torsocks=s verbose|v+ quiet|q C=s@), PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ], 'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)', - qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!), + qw(type=s solve! format|f=s dedupe|d=s threads|t remote local! C=s@), pass_through('git show') ], -'add-external' => [ 'URL_OR_PATHNAME', +'add-external' => [ 'LOCATION', 'add/set priority of a publicinbox|extindex for extra matches', - qw(boost=i quiet|q) ], -'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations', - qw(format|f=s z|0 local remote quiet|q) ], -'forget-external' => [ 'URL_OR_PATHNAME...|--prune', + qw(boost=i c=s@ mirror=s no-torsocks torsocks=s inbox-version=i), + qw(quiet|q verbose|v+ C=s@), + index_opt(), PublicInbox::LeiQuery::curl_opt() ], +'ls-external' => [ '[FILTER]', 'list publicinbox|extindex locations', + qw(format|f=s z|0 globoff|g invert-match|v local remote C=s@) ], +'forget-external' => [ 'LOCATION...|--prune', 'exclude further results from a publicinbox|extindex', - qw(prune quiet|q) ], + qw(prune quiet|q C=s@) ], 'ls-query' => [ '[FILTER...]', 'list saved search queries', - qw(name-only format|f=s z) ], -'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ], -'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ], + qw(name-only format|f=s z C=s@) ], +'rm-query' => [ 'QUERY_NAME', 'remove a saved search', qw(C=s@) ], +'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search', qw(C=s@) ], -'plonk' => [ '--thread|--from=IDENT', - 'exclude mail matching From: or thread from non-Message-ID searches', - qw(stdin| thread|t from|f=s mid=s oid=s) ], +'plonk' => [ '--threads|--from=IDENT', + 'exclude mail matching From: or threads from non-Message-ID searches', + qw(stdin| threads|t from|f=s mid=s oid=s C=s@) ], 'mark' => [ 'MESSAGE_FLAGS...', - 'set/unset flags on message(s) from stdin', - qw(stdin| oid=s exact by-mid|mid:s) ], + 'set/unset keywords on message(s) from stdin', + qw(stdin| oid=s exact by-mid|mid:s C=s@) ], 'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]', "exclude message(s) on stdin from `q' search results", - qw(stdin| oid=s exact by-mid|mid:s quiet|q) ], + qw(stdin| oid=s exact by-mid|mid:s quiet|q C=s@) ], -'purge-mailsource' => [ 'URL_OR_PATHNAME|--all', +'purge-mailsource' => [ 'LOCATION|--all', 'remove imported messages from IMAP, Maildirs, and MH', - qw(exact! all jobs:i indexed) ], + qw(exact! all jobs:i indexed C=s@) ], # code repos are used for `show' to solve blobs from patch mails -'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', - qw(boost=i) ], +'add-coderepo' => [ 'DIRNAME', 'add or set priority of a git code repo', + qw(boost=i C=s@) ], 'ls-coderepo' => [ '[FILTER_TERMS...]', - 'list known code repos', qw(format|f=s z) ], -'forget-coderepo' => [ 'PATHNAME', + 'list known code repos', qw(format|f=s z C=s@) ], +'forget-coderepo' => [ 'DIRNAME', 'stop using repo to solve blobs from patches', - qw(prune) ], + qw(prune C=s@) ], -'add-watch' => [ '[URL_OR_PATHNAME]', - 'watch for new messages and flag changes', - qw(import! flags! interval=s recursive|r exclude=s include=s) ], +'add-watch' => [ 'LOCATION', 'watch for new messages and flag changes', + qw(import! kw|keywords|flags! interval=s recursive|r + exclude=s include=s C=s@) ], 'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status', - qw(format|f=s z) ], -'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], -'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], + qw(format|f=s z C=s@) ], +'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote C=s@) ], +'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote C=s@) ], 'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', - qw(prune) ], + qw(prune C=s@) ], -'import' => [ 'URL_OR_PATHNAME|--stdin', - 'one-shot import/update from URL or filesystem', - qw(stdin| offset=i recursive|r exclude=s include=s !flags), +'import' => [ 'LOCATION...|--stdin', + 'one-time import/update from URL or filesystem', + qw(stdin| offset=i recursive|r exclude=s include|I=s + in-format|F=s kw|keywords|flags! C=s@), + ], +'convert' => [ 'LOCATION...|--stdin', + 'one-time conversion from URL or filesystem to another format', + qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s quiet|q + kw|keywords|flags! C=s@), ], - 'config' => [ '[...]', sub { 'git-config(1) wrapper for '._config_path($_[0]); }, qw(config-file|system|global|file|f=s), # for conflict detection - pass_through('git config') ], -'init' => [ '[PATHNAME]', sub { - 'initialize storage, default: '._store_path($_[0]); - }, qw(quiet|q) ], + qw(C=s@), pass_through('git config') ], +'init' => [ '[DIRNAME]', sub { + "initialize storage, default: "._store_path($_[0]); + }, qw(quiet|q C=s@) ], 'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon', + # "-C DIR" conflicts with -CHLD, here, and chdir makes no sense, here opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ], 'daemon-pid' => [ '', 'show the PID of the lei-daemon' ], 'help' => [ '[SUBCOMMAND]', 'show help' ], @@ -181,7 +197,7 @@ our %CMD = ( # sorted in order of importance/use: 'reorder-local-store-and-break-history' => [ '[REFNAME]', 'rewrite git history in an attempt to improve compression', - 'gc!' ], + qw(gc! C=s@) ], # internal commands are prefixed with '_' '_complete' => [ '[...]', 'internal shell completion helper', @@ -193,50 +209,93 @@ our %CMD = ( # sorted in order of importance/use: # $spec => [@ALLOWED_VALUES (default is first), $description], # $spec => $description # "$SUB_COMMAND TAB $spec" => as above -my $stdin_formats = [ 'IN|auto|raw|mboxrd|mboxcl2|mboxcl|mboxo', - 'specify message input format' ]; +my $stdin_formats = [ 'MAIL_FORMAT|eml|mboxrd|mboxcl2|mboxcl|mboxo', + 'specify message input format' ]; my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ]; +# we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp my %OPTDESC = ( 'help|h' => 'show this built-in help', +'C=s@' => [ 'DIR', 'chdir to specify to directory' ], 'quiet|q' => 'be quiet', -'verbose|v' => 'be more verbose', +'globoff|g' => "do not match locations using '*?' wildcards ". + "and\xa0'[]'\x{a0}ranges", +'verbose|v+' => 'be more verbose', 'solve!' => 'do not attempt to reconstruct blobs from emails', -'torsocks=s' => ['auto|no|yes', +'torsocks=s' => ['VAL|auto|no|yes', 'whether or not to wrap git and curl commands with torsocks'], 'no-torsocks' => 'alias for --torsocks=no', 'save-as=s' => ['NAME', 'save a search terms by given name'], +'import-remote!' => 'do not memoize remote messages into local store', 'type=s' => [ 'any|mid|git', 'disambiguate type' ], -'dedupe|d=s' => ['STRAT|content|oid|mid|none', +'dedupe|d=s' => ['STRATEGY|content|oid|mid|none', 'deduplication strategy'], -'show thread|t' => 'display entire thread a message belongs to', -'q thread|t' => - 'return all messages in the same thread as the actual match(es)', +'show threads|t' => 'display entire thread a message belongs to', +'q threads|t+' => + 'return all messages in the same threads as the actual match(es)', +'alert=s@' => ['CMD,:WINCH,:bell,', + 'run command(s) or perform ops when done writing to output ' . + '(default: ":WINCH,:bell" with --mua and Maildir/IMAP output, ' . + 'nothing otherwise)' ], + 'augment|a' => 'augment --output destination instead of clobbering', -'output|mfolder|o=s' => [ 'DEST', - "destination (e.g. `/path/to/Maildir', or `-' for stdout)" ], -'mua-cmd|mua=s' => [ 'COMMAND', - "MUA to run on --output Maildir or mbox (e.g. `mutt -f %f'" ], +'output|mfolder|o=s' => [ 'MFOLDER', + "destination (e.g.\xa0`/path/to/Maildir', ". + "or\xa0`-'\x{a0}for\x{a0}stdout)" ], +'mua=s' => [ 'CMD', + "MUA to run on --output Maildir or mbox (e.g.\xa0`mutt\xa0-f\xa0%f')" ], 'show format|f=s' => [ 'OUT|plain|raw|html|mboxrd|mboxcl2|mboxcl', 'message/object output format' ], 'mark format|f=s' => $stdin_formats, 'forget format|f=s' => $stdin_formats, + +'add-external inbox-version=i' => [ 'NUM|1|2', + 'force a public-inbox version with --mirror'], +'add-external mirror=s' => [ 'URL', 'mirror a public-inbox'], + +# public-inbox-index options +'add-external jobs|j=i' => 'set parallelism when indexing after --mirror', +'fsync!' => 'speed up indexing after --mirror, risk index corruption', +'compact' => 'run compact index after mirroring', +'indexlevel|L=s' => [ 'LEVEL|full|medium|basic', + "indexlevel with --mirror (default: full)" ], +'max_size|max-size=s' => [ 'SIZE', + 'do not index messages larger than SIZE (default: infinity)' ], +'batch_size|batch-size=s' => [ 'SIZE', + 'flush changes to OS after given number of bytes (default: 1m)' ], +'sequential_shard|sequential-shard' => + 'index Xapian shards sequentially for slow storage', +'skip-docdata' => + 'drop compatibility w/ public-inbox <1.6 to save ~1.5% space', + 'q format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|mboxo|html|json|jsonl|concatjson', 'specify output format, default depends on --output'], +'q exclude=s@' => [ 'LOCATION', + 'exclude specified external(s) from search' ], +'q include|I=s@' => [ 'LOCATION', + 'include specified external(s) in search' ], +'q only=s@' => [ 'LOCATION', + 'only use specified external(s) for search' ], + +'q jobs=s' => [ '[SEARCH_JOBS][,WRITER_JOBS]', + 'control number of search and writer jobs' ], + +'import format|f=s' => $stdin_formats, + 'ls-query format|f=s' => $ls_format, 'ls-external format|f=s' => $ls_format, 'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ], 'offset=i' => ['OFF', 'search result offset (default: 0)'], -'sort|s=s' => [ 'VAL|received,relevance,docid', - "order of results `--output'-dependent"], -'reverse|r' => [ 'reverse search results' ], # like sort(1) +'sort|s=s' => [ 'VAL|received|relevance|docid', + "order of results is `--output'-dependent"], +'reverse|r' => 'reverse search results', # like sort(1) 'boost=i' => 'increase/decrease priority of results (default: 0)', @@ -256,7 +315,8 @@ my %OPTDESC = ( 'exact!' => 'rely on content match instead of exact header matches', 'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], -'jobs:i' => 'set parallelism level', + +'kw|keywords|flags!' => 'disable/enable importing flags', # xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere 'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines', @@ -268,48 +328,92 @@ my %CONFIG_KEYS = ( 'leistore.dir' => 'top-level storage location', ); +my @WQ_KEYS = qw(lxs l2m imp mrr cnv); # internal workers + # pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE sub x_it ($$) { my ($self, $code) = @_; # make sure client sees stdout before exit $self->{1}->autoflush(1) if $self->{1}; dump_and_clear_log(); - if (my $sock = $self->{sock}) { - send($sock, "x_it $code", MSG_EOR); - } elsif (my $signum = ($code & 127)) { # oneshot, usually SIGPIPE (13) - $SIG{PIPE} = 'DEFAULT'; # $SIG{$signum} doesn't work - kill $signum, $$; - sleep; # wait for signal - } else { # oneshot + if (my $s = $self->{pkt_op_p} // $self->{sock}) { + send($s, "x_it $code", MSG_EOR); + } elsif ($self->{oneshot}) { # don't want to end up using $? from child processes - for my $f (qw(lxs l2m)) { + for my $f (@WQ_KEYS) { my $wq = delete $self->{$f} or next; $wq->DESTROY; } - $quit->($code >> 8); - } + # cleanup anything that has tempfiles or open file handles + %PATH2CFG = (); + delete @$self{qw(ovv dedupe sto cfg)}; + if (my $signum = ($code & 127)) { # usually SIGPIPE (13) + $SIG{PIPE} = 'DEFAULT'; # $SIG{$signum} doesn't work + kill $signum, $$; + sleep(1) while 1; # wait for signal + } else { + $quit->($code >> 8); + } + } # else ignore if client disconnected } sub err ($;@) { my $self = shift; my $err = $self->{2} // ($self->{pgr} // [])->[2] // *STDERR{GLOB}; - my $eor = (substr($_[-1], -1, 1) eq "\n" ? () : "\n"); - print $err @_, $eor and return; + my @eor = (substr($_[-1]//'', -1, 1) eq "\n" ? () : ("\n")); + print $err @_, @eor and return; my $old_err = delete $self->{2}; - close($old_err) if $! == EPIPE && $old_err;; + close($old_err) if $! == EPIPE && $old_err; $err = $self->{2} = ($self->{pgr} // [])->[2] // *STDERR{GLOB}; - print $err @_, $eor or print STDERR @_, $eor; + print $err @_, @eor or print STDERR @_, @eor; } sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) } +sub fail_handler ($;$$) { + my ($lei, $code, $io) = @_; + for my $f (@WQ_KEYS) { + my $wq = delete $lei->{$f} or next; + $wq->wq_wait_old(undef, $lei) if $wq->wq_kill_old; # lei-daemon + } + close($io) if $io; # needed to avoid warnings on SIGPIPE + x_it($lei, $code // (1 << 8)); +} + +sub sigpipe_handler { # handles SIGPIPE from @WQ_KEYS workers + fail_handler($_[0], 13, delete $_[0]->{1}); +} + +# PublicInbox::OnDestroy callback for SIGINT to take out the entire pgid +sub sigint_reap { + my ($pgid) = @_; + dwaitpid($pgid) if kill('-INT', $pgid); +} + sub fail ($$;$) { my ($self, $buf, $exit_code) = @_; - err($self, $buf); + err($self, $buf) if defined $buf; + # calls fail_handler: + send($self->{pkt_op_p}, '!', MSG_EOR) if $self->{pkt_op_p}; x_it($self, ($exit_code // 1) << 8); undef; } +sub check_input_format ($;$) { + my ($self, $files) = @_; + my $opt_key = 'in-format'; + my $fmt = $self->{opt}->{$opt_key}; + if (!$fmt) { + my $err = $files ? "regular file(s):\n@$files" : '--stdin'; + return fail($self, "--$opt_key unset for $err"); + } + return 1 if $fmt eq 'eml'; + # XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail + require PublicInbox::MboxReader; + PublicInbox::MboxReader->can($fmt) || + fail($self, "--$opt_key=$fmt unrecognized"); +} + sub out ($;@) { my $self = shift; return if print { $self->{1} // return } @_; # likely @@ -322,184 +426,82 @@ sub out ($;@) { sub puts ($;@) { out(shift, map { "$_\n" } @_) } sub child_error { # passes non-fatal curl exit codes to user - my ($self, $child_error) = @_; # child_error is $? - if (my $sock = $self->{sock}) { # send to lei(1) client - send($sock, "child_error $child_error", MSG_EOR); - } else { # oneshot + my ($self, $child_error, $msg) = @_; # child_error is $? + $self->err($msg) if $msg; + if (my $s = $self->{pkt_op_p} // $self->{sock}) { + # send to the parent lei-daemon or to lei(1) client + send($s, "child_error $child_error", MSG_EOR); + } elsif (!$PublicInbox::DS::in_loop) { $self->{child_error} = $child_error; - } - undef; -} - -sub atfork_prepare_wq { - my ($self, $wq) = @_; - my $tcafc = $wq->{-ipc_atfork_child_close} //= [ $listener // () ]; - if (my $sock = $self->{sock}) { - push @$tcafc, @$self{qw(0 1 2 3)}, $sock; - } - if (my $pgr = $self->{pgr}) { - push @$tcafc, @$pgr[1,2]; - } - if (my $old_1 = $self->{old_1}) { - push @$tcafc, $old_1; - } - for my $f (qw(lxs l2m)) { - my $ipc = $self->{$f} or next; - push @$tcafc, grep { defined } - @$ipc{qw(-wq_s1 -wq_s2 -ipc_req -ipc_res)}; - } -} - -sub io_restore ($$) { - my ($dst, $src) = @_; - for my $i (0..2) { # standard FDs - my $io = delete $src->{$i} or next; - $dst->{$i} = $io; - } - for my $i (3..9) { # named (non-standard) FDs - my $io = $src->{$i} or next; - my @st = stat($io) or die "stat $src.$i ($io): $!"; - my $f = delete $dst->{"dev=$st[0],ino=$st[1]"} // next; - $dst->{$f} = $io; - delete $src->{$i}; - } + } # else noop if client disconnected } -# triggers sigpipe_handler -sub note_sigpipe { +sub note_sigpipe { # triggers sigpipe_handler my ($self, $fd) = @_; close(delete($self->{$fd})); # explicit close silences Perl warning - syswrite($self->{op_pipe}, '!') if $self->{op_pipe}; + send($self->{pkt_op_p}, '|', MSG_EOR) if $self->{pkt_op_p}; x_it($self, 13); } -sub atfork_child_wq { - my ($self, $wq) = @_; - io_restore($self, $wq); - -p $self->{op_pipe} or die 'BUG: {op_pipe} expected'; - io_restore($self->{l2m}, $wq); +sub lei_atfork_child { + my ($self, $persist) = @_; + # we need to explicitly close things which are on stack + if ($persist) { + my @io = delete @$self{qw(0 1 2 sock)}; + unless ($self->{oneshot}) { + close($_) for @io; + } + } else { + delete $self->{0}; + } + delete @$self{qw(cnv)}; + for (delete @$self{qw(3 old_1 au_done)}) { + close($_) if defined($_); + } + if (my $op_c = delete $self->{pkt_op_c}) { + close(delete $op_c->{sock}); + } + if (my $pgr = delete $self->{pgr}) { + close($_) for (@$pgr[1,2]); + } + close $listener if $listener; + undef $listener; %PATH2CFG = (); undef $errors_log; $quit = \&CORE::exit; - $current_lei = $self; # for SIG{__WARN__} -} - -sub io_extract ($;@) { - my ($obj, @fields) = @_; - my @io; - for my $f (@fields) { - my $io = delete $obj->{$f} or next; - my @st = stat($io) or die "W: stat $obj.$f ($io): $!"; - $obj->{"dev=$st[0],ino=$st[1]"} = $f; - push @io, $io; - } - @io + $current_lei = $persist ? undef : $self; # for SIG{__WARN__} } -# usage: ($lei, @io) = $lei->atfork_parent_wq($wq); -sub atfork_parent_wq { - my ($self, $wq) = @_; - my $env = delete $self->{env}; # env is inherited at fork - my $lei = bless { %$self }, ref($self); - for my $f (qw(dedupe ovv)) { - my $tmp = delete($lei->{$f}) or next; - $lei->{$f} = $wq->deep_clone($tmp); - } - $self->{env} = $env; - delete @$lei{qw(3 -lei_store cfg old_1 pgr lxs)}; # keep l2m - my @io = (delete(@$lei{qw(0 1 2)}), - io_extract($lei, qw(sock op_pipe startq))); - my $l2m = $lei->{l2m}; - if ($l2m && $l2m != $wq) { # $wq == lxs - if (my $wq_s1 = $l2m->{-wq_s1}) { - push @io, io_extract($l2m, '-wq_s1'); - $l2m->{-wq_s1} = $wq_s1; - } - $l2m->wq_close(1); - } - ($lei, @io); +sub workers_start { + my ($lei, $wq, $ident, $jobs, $ops) = @_; + $ops = { + '!' => [ $lei->can('fail_handler'), $lei ], + '|' => [ $lei->can('sigpipe_handler'), $lei ], + 'x_it' => [ $lei->can('x_it'), $lei ], + 'child_error' => [ $lei->can('child_error'), $lei ], + %$ops + }; + require PublicInbox::PktOp; + ($lei->{pkt_op_c}, $lei->{pkt_op_p}) = PublicInbox::PktOp->pair($ops); + $wq->wq_workers_start($ident, $jobs, $lei->oldset, { lei => $lei }); + delete $lei->{pkt_op_p}; + my $op = delete $lei->{pkt_op_c}; + $lei->event_step_init; + # oneshot needs $op, daemon-mode uses DS->EventLoop to handle $op + $lei->{oneshot} ? $op : undef; } -sub _help ($;$) { - my ($self, $errmsg) = @_; - my $cmd = $self->{cmd} // 'COMMAND'; - my @info = @{$CMD{$cmd} // [ '...', '...' ]}; - my @top = ($cmd, shift(@info) // ()); - my $cmd_desc = shift(@info); - $cmd_desc = $cmd_desc->($self) if ref($cmd_desc) eq 'CODE'; - my @opt_desc; - my $lpad = 2; - for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS) - my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next; - my $arg_vals = ''; - ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; - - # lower-case is a keyword (e.g. `content', `oid'), - # ALL_CAPS is a string description (e.g. `PATH') - if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) { - $desc .= "\ndefault: `$1'"; - } - my (@vals, @s, @l); - my $x = $sw; - if ($x =~ s/!\z//) { # solve! => --no-solve - $x = "no-$x"; - } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s" - @vals = (' [', undef, ']'); - } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s" - @vals = (' ', undef); - } # else: no args $x = 'thread|t' - for (split(/\|/, $x)) { # help|h - length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_"); - } - if (!scalar(@vals)) { # no args 'thread|t' - } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME" - $vals[1] = $1; - } else { - $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE" - } - if ($arg_vals =~ /([,\|])/) { - my $sep = $1; - my @allow = split(/\Q$sep\E/, $arg_vals); - my $must = $sep eq '|' ? 'Must' : 'Can'; - @allow = map { "`$_'" } @allow; - my $last = pop @allow; - $desc .= "\n$must be one of: " . - join(', ', @allow) . " or $last"; - } - my $lhs = join(', ', @s, @l) . join('', @vals); - if ($x =~ /\|\z/) { # "stdin|" or "clear|" - $lhs =~ s/\A--/- , --/; - } else { - $lhs =~ s/\A--/ --/; # pad if no short options - } - $lpad = length($lhs) if length($lhs) > $lpad; - push @opt_desc, $lhs, $desc; - } - my $msg = $errmsg ? "E: $errmsg\n" : ''; - $msg .= <{$errmsg ? 2 : 1} } $msg; - x_it($self, $errmsg ? 1 << 8 : 0); # stderr => failure - undef; +sub _help { + require PublicInbox::LeiHelp; + PublicInbox::LeiHelp::call($_[0], $_[1], \%CMD, \%OPTDESC); } sub optparse ($$$) { my ($self, $cmd, $argv) = @_; + # allow _complete --help to complete, not show help + return 1 if substr($cmd, 0, 1) eq '_'; $self->{cmd} = $cmd; - $OPT = $self->{opt} = {}; + $OPT = $self->{opt} //= {}; my $info = $CMD{$cmd} // [ '[...]' ]; my ($proto, undef, @spec) = @$info; my $glp = ref($spec[-1]) eq ref($GLP) ? pop(@spec) : $GLP; @@ -536,12 +538,13 @@ sub optparse ($$$) { } elsif ($var =~ /\A\[-?$POS_ARG\]\z/) { # one optional arg $i++; } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin + $inf = 1 if index($var, '...') > 0; my @or = split(/\|/, $var); my $ok; for my $o (@or) { if ($o =~ /\A--([a-z0-9\-]+)/) { $ok = defined($OPT->{$1}); - last; + last if $ok; } elsif (defined($argv->[$i])) { $ok = 1; $i++; @@ -567,15 +570,25 @@ sub dispatch { local $current_lei = $self; # for __WARN__ dump_and_clear_log("from previous run\n"); return _help($self, 'no command given') unless defined($cmd); + while ($cmd eq '-C') { # do not support Getopt bundling for this + my $d = shift(@argv) // return fail($self, '-C DIRECTORY'); + push @{$self->{opt}->{C}}, $d; + $cmd = shift(@argv) // return _help($self, 'no command given'); + } my $func = "lei_$cmd"; $func =~ tr/-/_/; if (my $cb = __PACKAGE__->can($func)) { optparse($self, $cmd, \@argv) or return; + if (my $chdir = $self->{opt}->{C}) { + for my $d (@$chdir) { + next if $d eq ''; # same as git(1) + chdir $d or return fail($self, "cd $d: $!"); + } + } $cb->($self, @argv); } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only - my $opt = {}; - $GLP->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or - return _help($self, 'bad arguments or options'); + $GLP->getoptionsfromarray([$cmd, @argv], {}, qw(help|h C=s@)) + or return _help($self, 'bad arguments or options'); _help($self); } else { fail($self, "`$cmd' is not an lei command"); @@ -593,16 +606,17 @@ sub _lei_cfg ($;$) { if (!@st) { unless ($creat) { delete $self->{cfg}; - return; + return bless {}, 'PublicInbox::Config'; } my (undef, $cfg_dir, undef) = File::Spec->splitpath($f); -d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n"; open my $fh, '>>', $f or die "open($f): $!\n"; @st = stat($fh) or die "fstat($f): $!\n"; $cur_st = pack('dd', $st[10], $st[7]); - qerr($self, "I: $f created") if $self->{cmd} ne 'config'; + qerr($self, "# $f created") if $self->{cmd} ne 'config'; } my $cfg = PublicInbox::Config::git_config_dump($f); + bless $cfg, 'PublicInbox::Config'; $cfg->{-st} = $cur_st; $cfg->{'-f'} = $f; $self->{cfg} = $PATH2CFG{$f} = $cfg; @@ -629,13 +643,11 @@ sub lei_mark { sub _config { my ($self, @argv) = @_; - my $env = $self->{env}; - delete local $env->{GIT_CONFIG}; - delete local $ENV{GIT_CONFIG}; + my %env = (%{$self->{env}}, GIT_CONFIG => undef); my $cfg = _lei_cfg($self, 1); my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ]; my %rdr = map { $_ => $self->{$_} } (0..2); - waitpid(spawn($cmd, $env, \%rdr), 0); + waitpid(spawn($cmd, \%env, \%rdr), 0); } sub lei_config { @@ -646,6 +658,16 @@ sub lei_config { x_it($self, $?) if $?; } +sub lei_import { + require PublicInbox::LeiImport; + PublicInbox::LeiImport->call(@_); +} + +sub lei_convert { + require PublicInbox::LeiConvert; + PublicInbox::LeiConvert->call(@_); +} + sub lei_init { my ($self, $dir) = @_; my $cfg = _lei_cfg($self, 1); @@ -655,7 +677,7 @@ sub lei_init { my @cur = stat($cur) if defined($cur); $cur = File::Spec->canonpath($cur // $dir); my @dir = stat($dir); - my $exists = "I: leistore.dir=$cur already initialized" if @dir; + my $exists = "# leistore.dir=$cur already initialized" if @dir; if (@cur) { if ($cur eq $dir) { _lei_store($self, 1)->done; @@ -674,7 +696,7 @@ E: leistore.dir=$cur already initialized and it is not $dir } lei_config($self, 'leistore.dir', $dir); _lei_store($self, 1)->done; - $exists //= "I: leistore.dir=$dir newly initialized"; + $exists //= "# leistore.dir=$dir newly initialized"; return qerr($self, $exists); } @@ -694,7 +716,7 @@ sub lei_help { _help($_[0]) } sub lei__complete { my ($self, @argv) = @_; # argv = qw(lei and any other args...) shift @argv; # ignore "lei", the entire command is sent - @argv or return puts $self, grep(!/^_/, keys %CMD), qw(--help -h); + @argv or return puts $self, grep(!/^_/, keys %CMD), qw(--help -h -C); my $cmd = shift @argv; my $info = $CMD{$cmd} // do { # filter matching commands @argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD); @@ -717,15 +739,18 @@ sub lei__complete { get-color-name get-colorbool); # fall-through } - puts $self, grep(/$re/, map { # generate short/long names + # generate short/long names from Getopt::Long specs + puts $self, grep(/$re/, qw(--help -h -C), map { if (s/[:=].+\z//) { # req/optional args, e.g output|o=i - } else { # negation: solve! => no-solve|solve - s/\A(.+)!\z/no-$1|$1/; + } elsif (s/\+\z//) { # verbose|v+ + } elsif (s/!\z//) { + # negation: solve! => no-solve|solve + s/([\w\-]+)/$1|no-$1/g } map { my $x = length > 1 ? "--$_" : "-$_"; $x eq $cur ? () : $x; - } split(/\|/, $_, -1) # help|h + } grep(!/_/, split(/\|/, $_, -1)) # help|h } grep { $OPTDESC{"$cmd\t$_"} || $OPTDESC{$_} } @spec); } elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) { puts $self, grep(/$re/, keys %CONFIG_KEYS); @@ -736,8 +761,7 @@ sub lei__complete { my $opt = quotemeta $1; puts $self, map { my $v = $OPTDESC{$_}; - $v = $v->[0] if ref($v); - my @v = split(/\|/, $v); + my @v = ref($v) ? split(/\|/, $v->[0]) : (); # get rid of ALL CAPS placeholder (e.g "OUT") # (TODO: completion for external paths) shift(@v) if uc($v[0]) eq $v[0]; @@ -762,7 +786,7 @@ sub exec_buf ($$) { sub start_mua { my ($self) = @_; - my $mua = $self->{opt}->{'mua-cmd'} // return; + my $mua = $self->{opt}->{mua} // return; my $mfolder = $self->{ovv}->{dst}; my (@cmd, $replaced); if ($mua =~ /\A(?:mutt|mailx|mail|neomutt)\z/) { @@ -770,38 +794,82 @@ sub start_mua { # TODO: help wanted: other common FOSS MUAs } else { require Text::ParseWords; - my @cmd = Text::ParseWords::shellwords($mua); + @cmd = Text::ParseWords::shellwords($mua); # mutt uses '%f' for open-hook with compressed mbox, we follow @cmd = map { $_ eq '%f' ? ($replaced = $mfolder) : $_ } @cmd; } push @cmd, $mfolder unless defined($replaced); if (my $sock = $self->{sock}) { # lei(1) client process runs it send($sock, exec_buf(\@cmd, {}), MSG_EOR); - } else { # oneshot - $self->{"mua.pid.$self.$$"} = spawn(\@cmd); + } elsif ($self->{oneshot}) { + $self->{"pid.$self.$$"}->{spawn(\@cmd)} = \@cmd; + } + if ($self->{lxs} && $self->{au_done}) { # kick wait_startq + syswrite($self->{au_done}, 'q' x ($self->{lxs}->{jobs} // 0)); + } + $self->{opt}->{quiet} = 1; + delete $self->{-progress}; + delete $self->{opt}->{verbose}; +} + +sub send_exec_cmd { # tell script/lei to execute a command + my ($self, $io, $cmd, $env) = @_; + my $sock = $self->{sock} // die 'lei client gone'; + my $fds = [ map { fileno($_) } @$io ]; + $send_cmd->($sock, $fds, exec_buf($cmd, $env), MSG_EOR); +} + +sub poke_mua { # forces terminal MUAs to wake up and hopefully notice new mail + my ($self) = @_; + my $alerts = $self->{opt}->{alert} // return; + while (my $op = shift(@$alerts)) { + if ($op eq ':WINCH') { + # hit the process group that started the MUA + if ($self->{sock}) { + send($self->{sock}, '-WINCH', MSG_EOR); + } elsif ($self->{oneshot}) { + kill('-WINCH', $$); + } + } elsif ($op eq ':bell') { + out($self, "\a"); + } elsif ($op =~ /(?{sock}) { + send($s, exec_buf($cmd, {}), MSG_EOR); + } elsif ($self->{oneshot}) { + $self->{"pid.$self.$$"}->{spawn($cmd)} = $cmd; + } + } else { + err($self, "W: unsupported --alert=$op"); # non-fatal + } } } # caller needs to "-t $self->{1}" to check if tty sub start_pager { my ($self) = @_; - my $env = $self->{env}; - my $fh = popen_rd([qw(git var GIT_PAGER)], $env); + my $fh = popen_rd([qw(git var GIT_PAGER)]); chomp(my $pager = <$fh> // ''); close($fh) or warn "`git var PAGER' error: \$?=$?"; return if $pager eq 'cat' || $pager eq ''; - # TODO TIOCGWINSZ - my $new_env = { LESS => 'FRX', LV => '-c', COLUMNS => 80 }; + my $new_env = { LESS => 'FRX', LV => '-c' }; $new_env->{MORE} = 'FRX' if $^O eq 'freebsd'; pipe(my ($r, $wpager)) or return warn "pipe: $!"; my $rdr = { 0 => $r, 1 => $self->{1}, 2 => $self->{2} }; - my $pgr = [ undef, @$rdr{1, 2}, $$ ]; - if (my $sock = $self->{sock}) { # lei(1) process runs it + my $pgr = [ undef, @$rdr{1, 2} ]; + my $env = $self->{env}; + if ($self->{sock}) { # lei(1) process runs it delete @$new_env{keys %$env}; # only set iff unset - my $fds = [ map { fileno($_) } @$rdr{0..2} ]; - $send_cmd->($sock, $fds, exec_buf([$pager], $new_env), MSG_EOR); + send_exec_cmd($self, [ @$rdr{0..2} ], [$pager], $new_env); + } elsif ($self->{oneshot}) { + my $cmd = [$pager]; + $self->{"pid.$self.$$"}->{spawn($cmd, $new_env, $rdr)} = $cmd; } else { - $pgr->[0] = spawn([$pager], $new_env, $rdr); + die 'BUG: start_pager w/o socket'; } $self->{1} = $wpager; $self->{2} = $wpager if -t $self->{2}; @@ -815,8 +883,6 @@ sub stop_pager { $self->{2} = $pgr->[2]; # do not restore original stdout, just close it so we error out close(delete($self->{1})) if $self->{1}; - my $pid = $pgr->[0]; - dwaitpid($pid, undef, $self->{sock}) if $pid && $pgr->[3] == $$; } sub accept_dispatch { # Listener {post_accept} callback @@ -826,17 +892,19 @@ sub accept_dispatch { # Listener {post_accept} callback vec(my $rvec = '', fileno($sock), 1) = 1; select($rvec, undef, undef, 60) or return send($sock, 'timed out waiting to recv FDs', MSG_EOR); - my @fds = $recv_cmd->($sock, my $buf, 4096 * 33); # >MAX_ARG_STRLEN + # (4096 * 33) >MAX_ARG_STRLEN + my @fds = $recv_cmd->($sock, my $buf, 4096 * 33) or return; # EOF if (scalar(@fds) == 4) { for my $i (0..3) { my $fd = shift(@fds); open($self->{$i}, '+<&=', $fd) and next; send($sock, "open(+<&=$fd) (FD=$i): $!", MSG_EOR); } - } else { - my $msg = "recv_cmd failed: $!"; - warn $msg; + } elsif (!defined($fds[0])) { + warn(my $msg = "recv_cmd failed: $!"); return send($sock, $msg, MSG_EOR); + } else { + return; } $self->{2}->autoflush(1); # keep stdout buffered until x_it|DESTROY # $ENV_STR = join('', map { "\0$_=$ENV{$_}" } keys %ENV); @@ -858,12 +926,13 @@ sub accept_dispatch { # Listener {post_accept} callback sub dclose { my ($self) = @_; - for my $f (qw(lxs l2m)) { + delete $self->{-progress}; + for my $f (@WQ_KEYS) { my $wq = delete $self->{$f} or next; if ($wq->wq_kill) { - $wq->wq_close + $wq->wq_close(0, undef, $self); } elsif ($wq->wq_kill_old) { - $wq->wq_wait_old($self); + $wq->wq_wait_old(undef, $self); } } close(delete $self->{1}) if $self->{1}; # may reap_compress @@ -899,6 +968,7 @@ sub event_step { sub event_step_init { my ($self) = @_; + return if $self->{-event_init_done}++; if (my $sock = $self->{sock}) { # using DS->EventLoop $self->SUPER::new($sock, EPOLLIN|EPOLLET); } @@ -906,7 +976,7 @@ sub event_step_init { sub noop {} -our $oldset; sub oldset { $oldset } +sub oldset { $oldset } sub dump_and_clear_log { if (defined($errors_log) && -s STDIN && seek(STDIN, 0, SEEK_SET)) { @@ -920,22 +990,27 @@ sub dump_and_clear_log { # lei(1) calls this when it can't connect sub lazy_start { my ($path, $errno, $narg) = @_; - if ($errno == ECONNREFUSED) { - unlink($path) or die "unlink($path): $!"; - } elsif ($errno != ENOENT) { + local ($errors_log, $listener); + ($errors_log) = ($path =~ m!\A(.+?/)[^/]+\z!); + $errors_log .= 'errors.log'; + my $addr = pack_sockaddr_un($path); + my $lk = bless { lock_path => $errors_log }, 'PublicInbox::Lock'; + $lk->lock_acquire; + socket($listener, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!"; + if ($errno == ECONNREFUSED || $errno == ENOENT) { + return if connect($listener, $addr); # another process won + if ($errno == ECONNREFUSED && -S $path) { + unlink($path) or die "unlink($path): $!"; + } + } else { $! = $errno; # allow interpolation to stringify in die die "connect($path): $!"; } - if (eval { require BSD::Resource }) { - my $NOFILE = BSD::Resource::RLIMIT_NOFILE(); - my ($s, $h) = BSD::Resource::getrlimit($NOFILE); - BSD::Resource::setrlimit($NOFILE, $h, $h) if $s < $h; - } umask(077) // die("umask(077): $!"); - local $listener; - socket($listener, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!"; - bind($listener, pack_sockaddr_un($path)) or die "bind($path): $!"; + bind($listener, $addr) or die "bind($path): $!"; listen($listener, 1024) or die "listen: $!"; + $lk->lock_release; + undef $lk; my @st = stat($path) or die "stat($path): $!"; my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino local $oldset = PublicInbox::DS::block_signals(); @@ -953,9 +1028,6 @@ sub lazy_start { require PublicInbox::Listener; require PublicInbox::EOFpipe; (-p STDOUT) or die "E: stdout must be a pipe\n"; - local $errors_log; - ($errors_log) = ($path =~ m!\A(.+?/)[^/]+\z!); - $errors_log .= 'errors.log'; open(STDIN, '+>>', $errors_log) or die "open($errors_log): $!"; STDIN->autoflush(1); dump_and_clear_log("from previous daemon process:\n"); @@ -1041,6 +1113,8 @@ sub lazy_start { exit($exit_code // 0); } +sub busy { 1 } # prevent daemon-shutdown if client is connected + # for users w/o Socket::Msghdr installed or Inline::C enabled sub oneshot { my ($main_pkg) = @_; @@ -1049,6 +1123,7 @@ sub oneshot { local %PATH2CFG; umask(077) // die("umask(077): $!"); my $self = bless { + oneshot => 1, 0 => *STDIN{GLOB}, 1 => *STDOUT{GLOB}, 2 => *STDERR{GLOB}, @@ -1064,9 +1139,10 @@ sub DESTROY { my ($self) = @_; $self->{1}->autoflush(1) if $self->{1}; stop_pager($self); - if (my $mua_pid = delete $self->{"mua.pid.$self.$$"}) { - waitpid($mua_pid, 0); - } + my $err = $?; + my $oneshot_pids = delete $self->{"pid.$self.$$"} or return; + waitpid($_, 0) for keys %$oneshot_pids; + $? = $err if $err; # preserve ->fail or ->x_it code } 1;