From: Eric Wong Date: Fri, 1 Jan 2021 04:58:39 +0000 (+0000) Subject: Merge remote-tracking branch 'origin/lei' into eidx X-Git-Tag: v1.7.0~1471 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=d49c0789d208e66121bfb68ff0c48d7612a7cd8e;hp=8862c33ae93eea1af6246cd3c7a81e0a122186bf Merge remote-tracking branch 'origin/lei' into eidx * origin/lei: (28 commits) lei: rename proposed "query" command to "q", add JSON output lei_xsearch: cross-(inbox|extindex) search lei: extinbox: start implementing in config file lei: revise output routines lei: support for -$DIGIT and -$SIG CLI switches build: add lei.sh + "make symlink-install" target lei: start working on bash completion lei: drop $SIG{__DIE__}, add oneshot fallbacks lei: restore default __DIE__ handler for event loop on_destroy: generic localized END lei_store: keyword extraction from mbox and Maildir lei_store: relax GIT_COMMITTER_IDENT check lei: micro-optimize startup time lei: rename $client => $self and bless lei: help: show actual paths being operated on lei: support pass-through for `lei config' rename LeiDaemon package to PublicInbox::LEI search: simplify initialization, add ->xdb_shards_flat lei_store: simplify git_epoch_max, slightly lei: support `daemon-env' for modifying long-lived env ... --- diff --git a/MANIFEST b/MANIFEST index a4cdedff..a5ff81cf 100644 --- a/MANIFEST +++ b/MANIFEST @@ -62,6 +62,7 @@ ci/README ci/deps.perl ci/profiles.sh ci/run.sh +contrib/completion/lei-completion.bash contrib/css/216dark.css contrib/css/216light.css contrib/css/README @@ -101,6 +102,7 @@ examples/unsubscribe-psgi@.service examples/unsubscribe.milter examples/unsubscribe.psgi examples/varnish-4.vcl +lei.sh lib/PublicInbox/Address.pm lib/PublicInbox/AddressPP.pm lib/PublicInbox/Admin.pm @@ -159,6 +161,11 @@ lib/PublicInbox/InboxIdle.pm lib/PublicInbox/InboxWritable.pm lib/PublicInbox/Isearch.pm lib/PublicInbox/KQNotify.pm +lib/PublicInbox/LEI.pm +lib/PublicInbox/LeiExtinbox.pm +lib/PublicInbox/LeiSearch.pm +lib/PublicInbox/LeiStore.pm +lib/PublicInbox/LeiXSearch.pm lib/PublicInbox/Linkify.pm lib/PublicInbox/Listener.pm lib/PublicInbox/Lock.pm @@ -178,6 +185,7 @@ lib/PublicInbox/NNTP.pm lib/PublicInbox/NNTPD.pm lib/PublicInbox/NNTPdeflate.pm lib/PublicInbox/NewsWWW.pm +lib/PublicInbox/OnDestroy.pm lib/PublicInbox/Over.pm lib/PublicInbox/OverIdx.pm lib/PublicInbox/ProcessPipe.pm @@ -226,6 +234,7 @@ sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre sa_config/user/.spamassassin/user_prefs +script/lei script/public-inbox-compact script/public-inbox-convert script/public-inbox-edit @@ -316,6 +325,10 @@ t/indexlevels-mirror.t t/init.t t/iso-2202-jp.eml t/kqnotify.t +t/lei-oneshot.t +t/lei.t +t/lei_store.t +t/lei_xsearch.t t/linkify.t t/main-bin/spamc t/mda-mime.eml @@ -336,6 +349,7 @@ t/nntpd-v2.t t/nntpd.t t/nodatacow.t t/nulsubject.t +t/on_destroy.t t/over.t t/plack-2-txt-bodies.eml t/plack-attached-patch.eml diff --git a/Makefile.PL b/Makefile.PL index 57592378..924e8dfd 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -223,5 +223,16 @@ Makefile.PL : MANIFEST touch -r MANIFEST \$@ \$(PERLRUN) \$@ +# Install symlinks to ~/bin (which is hopefuly in PATH) which point to +# this source tree. +# prefix + bindir matches git.git Makefile: +prefix = \$(HOME) +bindir = \$(prefix)/bin +symlink-install : + mkdir -p \$(bindir) + lei=\$\$(realpath lei.sh) && cd \$(bindir) && \\ + for x in \$(EXE_FILES); do \\ + ln -sf "\$\$lei" \$\$(basename "\$\$x"); \\ + done EOF } diff --git a/contrib/completion/lei-completion.bash b/contrib/completion/lei-completion.bash new file mode 100644 index 00000000..67cdd3ed --- /dev/null +++ b/contrib/completion/lei-completion.bash @@ -0,0 +1,11 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# preliminary bash completion support for lei (Local Email Interface) +# Needs a lot of work, see `lei__complete' in lib/PublicInbox::LEI.pm +_lei() { + COMPREPLY=($(compgen -W "$(lei _complete ${COMP_WORDS[@]})" \ + -- "${COMP_WORDS[COMP_CWORD]}")) + return 0 +} +complete -o filenames -o bashdefault -F _lei lei diff --git a/lei.sh b/lei.sh new file mode 100755 index 00000000..f1510a73 --- /dev/null +++ b/lei.sh @@ -0,0 +1,7 @@ +#!/bin/sh -e +# symlink this file to a directory in PATH to run lei (or anything in script/*) +# without needing perms to install globally. Used by "make symlink-install" +p=$(realpath "$0" || readlink "$0") # neither is POSIX, but common +p=$(dirname "$p") c=$(basename "$0") # both are POSIX +exec ${PERL-perl} -w -I"$p"/lib "$p"/script/"${c%.sh}" "$@" +: this script is too short to copyright diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index 1762be0b..bdf1dc45 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -1,7 +1,9 @@ # Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ -# contains common daemon code for the httpd, imapd, and nntpd servers. -# This may be used for read-only IMAP server if we decide to implement it. +# +# Contains common daemon code for the httpd, imapd, and nntpd servers +# and designed for handling thousands of untrusted clients over slow +# and/or lossy connections. package PublicInbox::Daemon; use strict; use warnings; diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm index 7c9586a6..2bcdece6 100644 --- a/lib/PublicInbox/ExtSearch.pm +++ b/lib/PublicInbox/ExtSearch.pm @@ -16,12 +16,12 @@ use DBI qw(:sql_types); # SQL_BLOB use parent qw(PublicInbox::Search); sub new { - my (undef, $topdir) = @_; + my ($class, $topdir) = @_; bless { topdir => $topdir, # xpfx => 'ei15' xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION - }, __PACKAGE__; + }, $class; } sub misc { @@ -29,12 +29,6 @@ sub misc { $self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc"); } -# overrides PublicInbox::Search::_xdb -sub _xdb { - my ($self) = @_; - $self->xdb_sharded; -} - # same as per-inbox ->over, for now... sub over { my ($self) = @_; diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 07e64698..a2d70205 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -935,18 +935,31 @@ sub idx_init { # similar to V2Writable return if $self->{idx_shards}; $self->git->cleanup; - + my $mode = 0644; my $ALL = $self->git->{git_dir}; # ALL.git - PublicInbox::Import::init_bare($ALL) unless -d $ALL; + my $old = -d $ALL; + if ($opt->{-private}) { # LeiStore + $mode = 0600; + if (!$old) { + umask 077; # don't bother restoring + PublicInbox::Import::init_bare($ALL); + $self->git->qx(qw(config core.sharedRepository 0600)); + } + } else { + PublicInbox::Import::init_bare($ALL) unless $old; + } my $info_dir = "$ALL/objects/info"; my $alt = "$info_dir/alternates"; - my $mode = 0644; my (@old, @new, %seen); # seen: st_dev + st_ino if (-e $alt) { open(my $fh, '<', $alt) or die "open $alt: $!"; $mode = (stat($fh))[2] & 07777; while (my $line = <$fh>) { chomp(my $d = $line); + + # expand relative path (/local/ stuff) + substr($d, 0, 3) eq '../' and + $d = "$ALL/objects/$d"; if (my @st = stat($d)) { next if $seen{"$st[0]\0$st[1]"}++; } else { @@ -956,6 +969,22 @@ sub idx_init { # similar to V2Writable push @old, $line; } } + + # for LeiStore, and possibly some mirror-only state + if (opendir(my $dh, my $local = "$self->{topdir}/local")) { + # highest numbered epoch first + for my $n (sort { $b <=> $a } map { substr($_, 0, -4) + 0 } + grep(/\A[0-9]+\.git\z/, readdir($dh))) { + my $d = "$local/$n.git/objects"; # absolute path + if (my @st = stat($d)) { + next if $seen{"$st[0]\0$st[1]"}++; + # favor relative paths for rename-friendliness + push @new, "../../local/$n.git/objects\n"; + } else { + warn "W: stat($d) failed: $!\n"; + } + } + } for my $ibx (@{$self->{ibx_list}}) { my $line = $ibx->git->{git_dir} . "/objects\n"; chomp(my $d = $line); diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index e0a84bfd..079afc5f 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -406,6 +406,10 @@ sub add { if ($smsg) { $smsg->{blob} = $self->get_mark(":$blob"); $smsg->{raw_bytes} = $n; + if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore + return if $oidx->blob_exists($smsg->{blob}); + } + # XXX do we need this? it's in git at this point $smsg->{-raw_email} = \$raw_email; } my $ref = $self->{ref}; diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm new file mode 100644 index 00000000..7002a1f7 --- /dev/null +++ b/lib/PublicInbox/LEI.pm @@ -0,0 +1,776 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# Backend for `lei' (local email interface). Unlike the C10K-oriented +# PublicInbox::Daemon, this is designed exclusively to handle trusted +# local clients with read/write access to the FS and use as many +# system resources as the local user has access to. +package PublicInbox::LEI; +use strict; +use v5.10.1; +use parent qw(PublicInbox::DS PublicInbox::LeiExtinbox); +use Getopt::Long (); +use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un); +use Errno qw(EAGAIN ECONNREFUSED ENOENT); +use POSIX (); +use IO::Handle (); +use Sys::Syslog qw(syslog openlog); +use PublicInbox::Config; +use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT); +use PublicInbox::Sigfd; +use PublicInbox::DS qw(now); +use PublicInbox::Spawn qw(spawn); +use PublicInbox::OnDestroy; +use Text::Wrap qw(wrap); +use File::Path qw(mkpath); +use File::Spec; +our $quit = \&CORE::exit; +my $GLP = Getopt::Long::Parser->new; +$GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); +my $GLP_PASS = Getopt::Long::Parser->new; +$GLP_PASS->configure(qw(gnu_getopt no_ignore_case auto_abbrev pass_through)); + +our %PATH2CFG; # persistent for socket daemon + +# TBD: this is a documentation mechanism to show a subcommand +# (may) pass options through to another command: +sub pass_through { $GLP_PASS } + +my $OPT; +sub opt_dash { + my ($spec, $re_str) = @_; # 'limit|n=i', '([0-9]+)' + my ($key) = ($spec =~ m/\A([a-z]+)/g); + my $cb = sub { # Getopt::Long "<>" catch-all handler + my ($arg) = @_; + if ($arg =~ /\A-($re_str)\z/) { + $OPT->{$key} = $1; + } else { + die "bad argument for --$key: $arg\n"; + } + }; + ($spec, '<>' => $cb, $GLP_PASS) +} + +sub _store_path ($) { + my ($env) = @_; + File::Spec->rel2abs(($env->{XDG_DATA_HOME} // + ($env->{HOME} // '/nonexistent').'/.local/share') + .'/lei/store', $env->{PWD}); +} + +sub _config_path ($) { + my ($env) = @_; + File::Spec->rel2abs(($env->{XDG_CONFIG_HOME} // + ($env->{HOME} // '/nonexistent').'/.config') + .'/lei/config', $env->{PWD}); +} + +# TODO: generate shell completion + help using %CMD and %OPTDESC +# command => [ positional_args, 1-line description, Getopt::Long option spec ] +our %CMD = ( # sorted in order of importance/use: +'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( + save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a + sort|s=s@ reverse|r offset=i remote local! extinbox! + since|after=s until|before=s), opt_dash('limit|n=i', '[0-9]+') ], + +'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)', + qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!), + pass_through('git show') ], + +'add-extinbox' => [ 'URL_OR_PATHNAME', + 'add/set priority of a publicinbox|extindex for extra matches', + qw(boost=i quiet|q) ], +'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations', + qw(format|f=s z|0 local remote quiet|q) ], +'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}', + 'exclude further results from a publicinbox|extindex', + qw(prune quiet|q) ], + +'ls-query' => [ '[FILTER...]', 'list saved search queries', + qw(name-only format|f=s z) ], +'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ], +'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ], + +'plonk' => [ '--thread|--from=IDENT', + 'exclude mail matching From: or thread from non-Message-ID searches', + qw(stdin| thread|t from|f=s mid=s oid=s) ], +'mark' => [ 'MESSAGE_FLAGS...', + 'set/unset flags on message(s) from stdin', + qw(stdin| oid=s exact by-mid|mid:s) ], +'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]', + "exclude message(s) on stdin from `q' search results", + qw(stdin| oid=s exact by-mid|mid:s quiet|q) ], + +'purge-mailsource' => [ '{URL_OR_PATHNAME|--all}', + 'remove imported messages from IMAP, Maildirs, and MH', + qw(exact! all jobs:i indexed) ], + +# code repos are used for `show' to solve blobs from patch mails +'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', + qw(boost=i) ], +'ls-coderepo' => [ '[FILTER_TERMS...]', + 'list known code repos', qw(format|f=s z) ], +'forget-coderepo' => [ 'PATHNAME', + 'stop using repo to solve blobs from patches', + qw(prune) ], + +'add-watch' => [ '[URL_OR_PATHNAME]', + 'watch for new messages and flag changes', + qw(import! flags! interval=s recursive|r exclude=s include=s) ], +'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status', + qw(format|f=s z) ], +'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], +'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], +'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', + qw(prune) ], + +'import' => [ '{URL_OR_PATHNAME|--stdin}', + 'one-shot import/update from URL or filesystem', + qw(stdin| offset=i recursive|r exclude=s include=s !flags), + ], + +'config' => [ '[...]', sub { + 'git-config(1) wrapper for '._config_path($_[0]); + }, qw(config-file|system|global|file|f=s), # for conflict detection + pass_through('git config') ], +'init' => [ '[PATHNAME]', sub { + 'initialize storage, default: '._store_path($_[0]); + }, qw(quiet|q) ], +'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon', + opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ], +'daemon-pid' => [ '', 'show the PID of the lei-daemon' ], +'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment', + qw(clear| unset|u=s@ z|0) ], +'help' => [ '[SUBCOMMAND]', 'show help' ], + +# XXX do we need this? +# 'git' => [ '[ANYTHING...]', 'git(1) wrapper', pass_through('git') ], + +'reorder-local-store-and-break-history' => [ '[REFNAME]', + 'rewrite git history in an attempt to improve compression', + 'gc!' ], + +# internal commands are prefixed with '_' +'_complete' => [ '[...]', 'internal shell completion helper', + pass_through('everything') ], +); # @CMD + +# switch descriptions, try to keep consistent across commands +# $spec: Getopt::Long option specification +# $spec => [@ALLOWED_VALUES (default is first), $description], +# $spec => $description +# "$SUB_COMMAND TAB $spec" => as above +my $stdin_formats = [ 'IN|auto|raw|mboxrd|mboxcl2|mboxcl|mboxo', + 'specify message input format' ]; +my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ]; + +my %OPTDESC = ( +'help|h' => 'show this built-in help', +'quiet|q' => 'be quiet', +'solve!' => 'do not attempt to reconstruct blobs from emails', +'save-as=s' => ['NAME', 'save a search terms by given name'], + +'type=s' => [ 'any|mid|git', 'disambiguate type' ], + +'dedupe|d=s' => ['STRAT|content|oid|mid', + 'deduplication strategy'], +'show thread|t' => 'display entire thread a message belongs to', +'q thread|t' => + 'return all messages in the same thread as the actual match(es)', +'augment|a' => 'augment --output destination instead of clobbering', + +'output|o=s' => [ 'DEST', + "destination (e.g. `/path/to/Maildir', or `-' for stdout)" ], + +'show format|f=s' => [ 'OUT|plain|raw|html|mboxrd|mboxcl2|mboxcl', + 'message/object output format' ], +'mark format|f=s' => $stdin_formats, +'forget format|f=s' => $stdin_formats, +'q format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|html|oid|json', + 'specify output format, default depends on --output'], +'ls-query format|f=s' => $ls_format, +'ls-extinbox format|f=s' => $ls_format, + +'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ], +'offset=i' => ['OFF', 'search result offset (default: 0)'], + +'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid', + "order of results `--output'-dependent"], + +'boost=i' => 'increase/decrease priority of results (default: 0)', + +'local' => 'limit operations to the local filesystem', +'local!' => 'exclude results from the local filesystem', +'remote' => 'limit operations to those requiring network access', +'remote!' => 'prevent operations requiring network access', + +'mid=s' => 'specify the Message-ID of a message', +'oid=s' => 'specify the git object ID of a message', + +'recursive|r' => 'scan directories/mailboxes/newsgroups recursively', +'exclude=s' => 'exclude mailboxes/newsgroups based on pattern', +'include=s' => 'include mailboxes/newsgroups based on pattern', + +'exact' => 'operate on exact header matches only', +'exact!' => 'rely on content match instead of exact header matches', + +'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], +'jobs:i' => 'set parallelism level', + +# xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere +'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines', + +# note: no "--ignore-environment" / "-i" support like env(1) since that +# is one-shot and this is for a persistent daemon: +'clear|' => 'clear the daemon environment', +'unset|u=s@' => ['NAME', + 'unset matching NAME, may be specified multiple times'], + +'signal|s=s' => [ 'SIG', 'signal to send lei-daemon (default: TERM)' ], +); # %OPTDESC + +my %CONFIG_KEYS = ( + 'leistore.dir' => 'top-level storage location', +); + +sub x_it ($$) { # pronounced "exit" + my ($self, $code) = @_; + if (my $sig = ($code & 127)) { + kill($sig, $self->{pid} // $$); + } else { + $code >>= 8; + if (my $sock = $self->{sock}) { + say $sock "exit=$code"; + } else { # for oneshot + $quit->($code); + } + } +} + +sub puts ($;@) { print { shift->{1} } map { "$_\n" } @_ } + +sub out ($;@) { print { shift->{1} } @_ } + +sub err ($;@) { + print { shift->{2} } @_, (substr($_[-1], -1, 1) eq "\n" ? () : "\n"); +} + +sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) } + +sub fail ($$;$) { + my ($self, $buf, $exit_code) = @_; + err($self, $buf); + x_it($self, ($exit_code // 1) << 8); + undef; +} + +sub _help ($;$) { + my ($self, $errmsg) = @_; + my $cmd = $self->{cmd} // 'COMMAND'; + my @info = @{$CMD{$cmd} // [ '...', '...' ]}; + my @top = ($cmd, shift(@info) // ()); + my $cmd_desc = shift(@info); + $cmd_desc = $cmd_desc->($self->{env}) if ref($cmd_desc) eq 'CODE'; + my @opt_desc; + my $lpad = 2; + for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS) + my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next; + my $arg_vals = ''; + ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; + + # lower-case is a keyword (e.g. `content', `oid'), + # ALL_CAPS is a string description (e.g. `PATH') + if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) { + $desc .= "\ndefault: `$1'"; + } + my (@vals, @s, @l); + my $x = $sw; + if ($x =~ s/!\z//) { # solve! => --no-solve + $x = "no-$x"; + } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s" + @vals = (' [', undef, ']'); + } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s" + @vals = (' ', undef); + } # else: no args $x = 'thread|t' + for (split(/\|/, $x)) { # help|h + length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_"); + } + if (!scalar(@vals)) { # no args 'thread|t' + } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME" + $vals[1] = $1; + } else { + $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE" + } + if ($arg_vals =~ /([,\|])/) { + my $sep = $1; + my @allow = split(/\Q$sep\E/, $arg_vals); + my $must = $sep eq '|' ? 'Must' : 'Can'; + @allow = map { "`$_'" } @allow; + my $last = pop @allow; + $desc .= "\n$must be one of: " . + join(', ', @allow) . " or $last"; + } + my $lhs = join(', ', @s, @l) . join('', @vals); + if ($x =~ /\|\z/) { # "stdin|" or "clear|" + $lhs =~ s/\A--/- , --/; + } else { + $lhs =~ s/\A--/ --/; # pad if no short options + } + $lpad = length($lhs) if length($lhs) > $lpad; + push @opt_desc, $lhs, $desc; + } + my $msg = $errmsg ? "E: $errmsg\n" : ''; + $msg .= <{$errmsg ? 2 : 1} } $msg; + x_it($self, $errmsg ? 1 << 8 : 0); # stderr => failure + undef; +} + +sub optparse ($$$) { + my ($self, $cmd, $argv) = @_; + $self->{cmd} = $cmd; + $OPT = $self->{opt} = {}; + my $info = $CMD{$cmd} // [ '[...]' ]; + my ($proto, undef, @spec) = @$info; + my $glp = ref($spec[-1]) eq ref($GLP) ? pop(@spec) : $GLP; + push @spec, qw(help|h); + my $lone_dash; + if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias + $lone_dash = $spec[0]; + $OPT->{$spec[0]} = \(my $var); + push @spec, '' => \$var; + } + $glp->getoptionsfromarray($argv, $OPT, @spec) or + return _help($self, "bad arguments or options for $cmd"); + return _help($self) if $OPT->{help}; + + # "-" aliases "stdin" or "clear" + $OPT->{$lone_dash} = ${$OPT->{$lone_dash}} if defined $lone_dash; + + my $i = 0; + my $POS_ARG = '[A-Z][A-Z0-9_]+'; + my ($err, $inf); + my @args = split(/ /, $proto); + for my $var (@args) { + if ($var =~ /\A$POS_ARG\.\.\.\z/o) { # >= 1 args; + $inf = defined($argv->[$i]) and last; + $var =~ s/\.\.\.\z//; + $err = "$var not supplied"; + } elsif ($var =~ /\A$POS_ARG\z/o) { # required arg at $i + $argv->[$i++] // ($err = "$var not supplied"); + } elsif ($var =~ /\.\.\.\]\z/) { # optional args start + $inf = 1; + last; + } elsif ($var =~ /\A\[-?$POS_ARG\]\z/) { # one optional arg + $i++; + } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin + my @or = split(/\|/, $var); + my $ok; + for my $o (@or) { + if ($o =~ /\A--([a-z0-9\-]+)/) { + $ok = defined($OPT->{$1}); + last; + } elsif (defined($argv->[$i])) { + $ok = 1; + $i++; + last; + } # else continue looping + } + my $last = pop @or; + $err = join(', ', @or) . " or $last must be set"; + } else { + warn "BUG: can't parse `$var' in $proto"; + } + last if $err; + } + if (!$inf && scalar(@$argv) > scalar(@args)) { + $err //= 'too many arguments'; + } + $err ? fail($self, "usage: lei $cmd $proto\nE: $err") : 1; +} + +sub dispatch { + my ($self, $cmd, @argv) = @_; + local $SIG{__WARN__} = sub { err($self, @_) }; + return _help($self, 'no command given') unless defined($cmd); + my $func = "lei_$cmd"; + $func =~ tr/-/_/; + if (my $cb = __PACKAGE__->can($func)) { + optparse($self, $cmd, \@argv) or return; + $cb->($self, @argv); + } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only + my $opt = {}; + $GLP->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or + return _help($self, 'bad arguments or options'); + _help($self); + } else { + fail($self, "`$cmd' is not an lei command"); + } +} + +sub _lei_cfg ($;$) { + my ($self, $creat) = @_; + my $f = _config_path($self->{env}); + my @st = stat($f); + my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size + if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case + return ($self->{cfg} = $cfg) if $cur_st eq $cfg->{-st}; + } + if (!@st) { + unless ($creat) { + delete $self->{cfg}; + return; + } + my (undef, $cfg_dir, undef) = File::Spec->splitpath($f); + -d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n"; + open my $fh, '>>', $f or die "open($f): $!\n"; + @st = stat($fh) or die "fstat($f): $!\n"; + $cur_st = pack('dd', $st[10], $st[7]); + qerr($self, "I: $f created") if $self->{cmd} ne 'config'; + } + my $cfg = PublicInbox::Config::git_config_dump($f); + $cfg->{-st} = $cur_st; + $cfg->{'-f'} = $f; + $self->{cfg} = $PATH2CFG{$f} = $cfg; +} + +sub _lei_store ($;$) { + my ($self, $creat) = @_; + my $cfg = _lei_cfg($self, $creat); + $cfg->{-lei_store} //= do { + require PublicInbox::LeiStore; + PublicInbox::SearchIdx::load_xapian_writable(); + my $dir = $cfg->{'leistore.dir'}; + $dir //= _store_path($self->{env}) if $creat; + return unless $dir; + PublicInbox::LeiStore->new($dir, { creat => $creat }); + }; +} + +sub lei_show { + my ($self, @argv) = @_; +} + +sub lei_query { + my ($self, @argv) = @_; +} + +sub lei_mark { + my ($self, @argv) = @_; +} + +sub lei_config { + my ($self, @argv) = @_; + $self->{opt}->{'config-file'} and return fail $self, + "config file switches not supported by `lei config'"; + my $env = $self->{env}; + delete local $env->{GIT_CONFIG}; + my $cfg = _lei_cfg($self, 1); + my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ]; + my %rdr = map { $_ => $self->{$_} } (0..2); + require PublicInbox::Import; + PublicInbox::Import::run_die($cmd, $env, \%rdr); +} + +sub lei_init { + my ($self, $dir) = @_; + my $cfg = _lei_cfg($self, 1); + my $cur = $cfg->{'leistore.dir'}; + my $env = $self->{env}; + $dir //= _store_path($env); + $dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware + my @cur = stat($cur) if defined($cur); + $cur = File::Spec->canonpath($cur) if $cur; + my @dir = stat($dir); + my $exists = "I: leistore.dir=$cur already initialized" if @dir; + if (@cur) { + if ($cur eq $dir) { + _lei_store($self, 1)->done; + return qerr($self, $exists); + } + + # some folks like symlinks and bind mounts :P + if (@dir && "$cur[0] $cur[1]" eq "$dir[0] $dir[1]") { + lei_config($self, 'leistore.dir', $dir); + _lei_store($self, 1)->done; + return qerr($self, "$exists (as $cur)"); + } + return fail($self, <<""); +E: leistore.dir=$cur already initialized and it is not $dir + + } + lei_config($self, 'leistore.dir', $dir); + _lei_store($self, 1)->done; + $exists //= "I: leistore.dir=$dir newly initialized"; + return qerr($self, $exists); +} + +sub lei_daemon_pid { puts shift, $$ } + +sub lei_daemon_kill { + my ($self) = @_; + my $sig = $self->{opt}->{signal} // 'TERM'; + kill($sig, $$) or fail($self, "kill($sig, $$): $!"); +} + +sub lei_daemon_env { + my ($self, @argv) = @_; + my $opt = $self->{opt}; + if (defined $opt->{clear}) { + %ENV = (); + } elsif (my $u = $opt->{unset}) { + delete @ENV{@$u}; + } + if (@argv) { + %ENV = (%ENV, map { split(/=/, $_, 2) } @argv); + } elsif (!defined($opt->{clear}) && !$opt->{unset}) { + my $eor = $opt->{z} ? "\0" : "\n"; + my $buf = ''; + while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" } + out $self, $buf; + } +} + +sub lei_help { _help($_[0]) } + +# Shell completion helper. Used by lei-completion.bash and hopefully +# other shells. Try to do as much here as possible to avoid redundancy +# and improve maintainability. +sub lei__complete { + my ($self, @argv) = @_; # argv = qw(lei and any other args...) + shift @argv; # ignore "lei", the entire command is sent + @argv or return puts $self, grep(!/^_/, keys %CMD), qw(--help -h); + my $cmd = shift @argv; + my $info = $CMD{$cmd} // do { # filter matching commands + @argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD); + return; + }; + my ($proto, undef, @spec) = @$info; + my $cur = pop @argv; + my $re = defined($cur) ? qr/\A\Q$cur\E/ : qr/./; + if (substr($cur // '-', 0, 1) eq '-') { # --switches + # gross special case since the only git-config options + # Consider moving to a table if we need more special cases + # we use Getopt::Long for are the ones we reject, so these + # are the ones we don't reject: + if ($cmd eq 'config') { + puts $self, grep(/$re/, keys %CONFIG_KEYS); + @spec = qw(add z|null get get-all unset unset-all + replace-all get-urlmatch + remove-section rename-section + name-only list|l edit|e + get-color-name get-colorbool); + # fall-through + } + # TODO: arg support + puts $self, grep(/$re/, map { # generate short/long names + my $eq = ''; + if (s/=.+\z//) { # required arg, e.g. output|o=i + $eq = '='; + } elsif (s/:.+\z//) { # optional arg, e.g. mid:s + } else { # negation: solve! => no-solve|solve + s/\A(.+)!\z/no-$1|$1/; + } + map { + length > 1 ? "--$_$eq" : "-$_" + } split(/\|/, $_, -1) # help|h + } grep { $OPTDESC{"$cmd\t$_"} || $OPTDESC{$_} } @spec); + } elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) { + puts $self, grep(/$re/, keys %CONFIG_KEYS); + } + # TODO: URLs, pathnames, OIDs, MIDs, etc... See optparse() for + # proto parsing. +} + +sub reap_exec { # dwaitpid callback + my ($self, $pid) = @_; + x_it($self, $?); +} + +sub lei_git { # support passing through random git commands + my ($self, @argv) = @_; + my %rdr = map { $_ => $self->{$_} } (0..2); + my $pid = spawn(['git', @argv], $self->{env}, \%rdr); + PublicInbox::DS::dwaitpid($pid, \&reap_exec, $self); +} + +sub accept_dispatch { # Listener {post_accept} callback + my ($sock) = @_; # ignore other + $sock->blocking(1); + $sock->autoflush(1); + my $self = bless { sock => $sock }, __PACKAGE__; + vec(my $rin = '', fileno($sock), 1) = 1; + # `say $sock' triggers "die" in lei(1) + for my $i (0..2) { + if (select(my $rout = $rin, undef, undef, 1)) { + my $fd = IO::FDPass::recv(fileno($sock)); + if ($fd >= 0) { + my $rdr = ($fd == 0 ? '<&=' : '>&='); + if (open(my $fh, $rdr, $fd)) { + $self->{$i} = $fh; + } else { + say $sock "open($rdr$fd) (FD=$i): $!"; + return; + } + } else { + say $sock "recv FD=$i: $!"; + return; + } + } else { + say $sock "timed out waiting to recv FD=$i"; + return; + } + } + # $ARGV_STR = join("]\0[", @ARGV); + # $ENV_STR = join('', map { "$_=$ENV{$_}\0" } keys %ENV); + # $line = "$$\0\0>$ARGV_STR\0\0>$ENV_STR\0\0"; + my ($client_pid, $argv, $env) = do { + local $/ = "\0\0\0"; # yes, 3 NULs at EOL, not 2 + chomp(my $line = <$sock>); + split(/\0\0>/, $line, 3); + }; + my %env = map { split(/=/, $_, 2) } split(/\0/, $env); + if (chdir($env{PWD})) { + $self->{env} = \%env; + $self->{pid} = $client_pid; + eval { dispatch($self, split(/\]\0\[/, $argv)) }; + say $sock $@ if $@; + } else { + say $sock "chdir($env{PWD}): $!"; # implicit close + } +} + +sub noop {} + +# lei(1) calls this when it can't connect +sub lazy_start { + my ($path, $errno) = @_; + if ($errno == ECONNREFUSED) { + unlink($path) or die "unlink($path): $!"; + } elsif ($errno != ENOENT) { + $! = $errno; # allow interpolation to stringify in die + die "connect($path): $!"; + } + umask(077) // die("umask(077): $!"); + socket(my $l, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!"; + bind($l, pack_sockaddr_un($path)) or die "bind($path): $!"; + listen($l, 1024) or die "listen: $!"; + my @st = stat($path) or die "stat($path): $!"; + my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino + pipe(my ($eof_r, $eof_w)) or die "pipe: $!"; + my $oldset = PublicInbox::Sigfd::block_signals(); + require IO::FDPass; + require PublicInbox::Listener; + require PublicInbox::EOFpipe; + (-p STDOUT && -p STDERR) or die "E: stdout+stderr must be pipes\n"; + open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!"; + POSIX::setsid() > 0 or die "setsid: $!"; + my $pid = fork // die "fork: $!"; + return if $pid; + $0 = "lei-daemon $path"; + local %PATH2CFG; + $_->blocking(0) for ($l, $eof_r, $eof_w); + $l = PublicInbox::Listener->new($l, \&accept_dispatch, $l); + my $exit_code; + local $quit = sub { + $exit_code //= shift; + my $listener = $l or exit($exit_code); + unlink($path) if defined($path); + # closing eof_w triggers \&noop wakeup + $eof_w = $l = $path = undef; + $listener->close; # DS::close + PublicInbox::DS->SetLoopTimeout(1000); + }; + PublicInbox::EOFpipe->new($eof_r, \&noop, undef); + my $sig = { + CHLD => \&PublicInbox::DS::enqueue_reap, + QUIT => $quit, + INT => $quit, + TERM => $quit, + HUP => \&noop, + USR1 => \&noop, + USR2 => \&noop, + }; + my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK); + local %SIG = (%SIG, %$sig) if !$sigfd; + if ($sigfd) { # TODO: use inotify/kqueue to detect unlinked sockets + PublicInbox::DS->SetLoopTimeout(5000); + } else { + # wake up every second to accept signals if we don't + # have signalfd or IO::KQueue: + PublicInbox::Sigfd::sig_setmask($oldset); + PublicInbox::DS->SetLoopTimeout(1000); + } + PublicInbox::DS->SetPostLoopCallback(sub { + my ($dmap, undef) = @_; + if (@st = defined($path) ? stat($path) : ()) { + if ($dev_ino_expect ne pack('dd', $st[0], $st[1])) { + warn "$path dev/ino changed, quitting\n"; + $path = undef; + } + } elsif (defined($path)) { + warn "stat($path): $!, quitting ...\n"; + undef $path; # don't unlink + $quit->(); + } + return 1 if defined($path); + my $now = now(); + my $n = 0; + for my $s (values %$dmap) { + $s->can('busy') or next; + if ($s->busy($now)) { + ++$n; + } else { + $s->close; + } + } + $n; # true: continue, false: stop + }); + + # STDIN was redirected to /dev/null above, closing STDOUT and + # STDERR will cause the calling `lei' client process to finish + # reading <$daemon> pipe. + open STDOUT, '>&STDIN' or die "redirect stdout failed: $!"; + openlog($path, 'pid', 'user'); + local $SIG{__WARN__} = sub { syslog('warning', "@_") }; + my $owner_pid = $$; + my $on_destroy = PublicInbox::OnDestroy->new(sub { + syslog('crit', "$@") if $@ && $$ == $owner_pid; + }); + open STDERR, '>&STDIN' or die "redirect stderr failed: $!"; + # $daemon pipe to `lei' closed, main loop begins: + PublicInbox::DS->EventLoop; + @$on_destroy = (); # cancel on_destroy if we get here + exit($exit_code // 0); +} + +# for users w/o IO::FDPass +sub oneshot { + my ($main_pkg) = @_; + my $exit = $main_pkg->can('exit'); # caller may override exit() + local $quit = $exit if $exit; + local %PATH2CFG; + umask(077) // die("umask(077): $!"); + dispatch((bless { + 0 => *STDIN{IO}, + 1 => *STDOUT{IO}, + 2 => *STDERR{IO}, + env => \%ENV + }, __PACKAGE__), @ARGV); +} + +1; diff --git a/lib/PublicInbox/LeiExtinbox.pm b/lib/PublicInbox/LeiExtinbox.pm new file mode 100644 index 00000000..c2de7735 --- /dev/null +++ b/lib/PublicInbox/LeiExtinbox.pm @@ -0,0 +1,51 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# *-extinbox commands of lei +package PublicInbox::LeiExtinbox; +use strict; +use v5.10.1; +use parent qw(Exporter); +our @EXPORT = qw(lei_ls_extinbox lei_add_extinbox lei_forget_extinbox); + +sub lei_ls_extinbox { + my ($self, @argv) = @_; + my $stor = $self->_lei_store(0); + my $cfg = $self->_lei_cfg(0); + my $out = $self->{1}; + my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n"); + my (%boost, @loc); + for my $sec (grep(/\Aextinbox\./, @{$cfg->{-section_order}})) { + my $loc = substr($sec, length('extinbox.')); + $boost{$loc} = $cfg->{"$sec.boost"}; + push @loc, $loc; + } + use sort 'stable'; + # highest boost first, but stable for alphabetic tie break + for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) { + # TODO: use miscidx and show docid so forget/set is easier + print $out $_, $OFS, 'boost=', $boost{$_}, $ORS; + } +} + +sub lei_add_extinbox { + my ($self, $url_or_dir) = @_; + my $cfg = $self->_lei_cfg(1); + if ($url_or_dir !~ m!\Ahttps?://!) { + $url_or_dir = File::Spec->canonpath($url_or_dir); + } + my $new_boost = $self->{opt}->{boost} // 0; + my $key = "extinbox.$url_or_dir.boost"; + my $cur_boost = $cfg->{$key}; + return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent + $self->lei_config($key, $new_boost); + my $stor = $self->_lei_store(1); + # TODO: add to MiscIdx + $stor->done; +} + +sub lei_forget_extinbox { + # TODO +} + +1; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm new file mode 100644 index 00000000..0b962b11 --- /dev/null +++ b/lib/PublicInbox/LeiSearch.pm @@ -0,0 +1,37 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +package PublicInbox::LeiSearch; +use strict; +use v5.10.1; +use parent qw(PublicInbox::ExtSearch); +use PublicInbox::Search; + +# get combined docid from over.num: +# (not generic Xapian, only works with our sharding scheme) +sub num2docid ($$) { + my ($self, $num) = @_; + my $nshard = $self->{nshard}; + ($num - 1) * $nshard + $num % $nshard + 1; +} + +sub msg_keywords { + my ($self, $num) = @_; # num_or_mitem + my $xdb = $self->xdb; # set {nshard}; + my $docid = ref($num) ? $num->get_docid : num2docid($self, $num); + my %kw; + eval { + my $end = $xdb->termlist_end($docid); + my $cur = $xdb->termlist_begin($docid); + for (; $cur != $end; $cur++) { + $cur->skip_to('K'); + last if $cur == $end; + my $kw = $cur->get_termname; + $kw =~ s/\AK//s and $kw{$kw} = undef; + } + }; + warn "E: #$docid ($num): $@\n" if $@; + wantarray ? sort(keys(%kw)) : \%kw; +} + +1; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm new file mode 100644 index 00000000..553adbc8 --- /dev/null +++ b/lib/PublicInbox/LeiStore.pm @@ -0,0 +1,227 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +# +# Local storage (cache/memo) for lei(1), suitable for personal/private +# mail iff on encrypted device/FS. Based on v2, but only deduplicates +# based on git OID. +# +# for xref3, the following are constant: $eidx_key = '.', $xnum = -1 +package PublicInbox::LeiStore; +use strict; +use v5.10.1; +use parent qw(PublicInbox::Lock); +use PublicInbox::SearchIdx qw(crlf_adjust); +use PublicInbox::ExtSearchIdx; +use PublicInbox::Import; +use PublicInbox::InboxWritable; +use PublicInbox::V2Writable; +use PublicInbox::ContentHash qw(content_hash); +use PublicInbox::MID qw(mids); +use PublicInbox::LeiSearch; +use List::Util qw(max); + +sub new { + my (undef, $dir, $opt) = @_; + my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt); + my $self = bless { priv_eidx => $eidx }, __PACKAGE__; + if ($opt->{creat}) { + PublicInbox::SearchIdx::load_xapian_writable(); + eidx_init($self); + } + $self; +} + +sub git { $_[0]->{priv_eidx}->git } # read-only + +sub packing_factor { $PublicInbox::V2Writable::PACKING_FACTOR } + +sub rotate_bytes { + $_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor) +} + +sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" }; + +sub git_epoch_max { + my ($self) = @_; + if (opendir(my $dh, $self->git_pfx)) { + max(map { + substr($_, 0, -4) + 0; # drop ".git" suffix + } grep(/\A[0-9]+\.git\z/, readdir($dh))) // 0; + } else { + $!{ENOENT} ? 0 : die("opendir ${\$self->git_pfx}: $!\n"); + } +} + +sub git_ident ($) { + my ($git) = @_; + chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT))); + warn "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?; + $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ ? ($1, $2) : + ('lei user', 'x@example.com') +} + +sub importer { + my ($self) = @_; + my $max; + my $im = $self->{im}; + if ($im) { + return $im if $im->{bytes_added} < $self->rotate_bytes; + + delete $self->{im}; + $im->done; + undef $im; + $self->checkpoint; + $max = $self->git_epoch_max + 1; + } + my $pfx = $self->git_pfx; + $max //= $self->git_epoch_max; + while (1) { + my $latest = "$pfx/$max.git"; + my $old = -e $latest; + my $git = PublicInbox::Git->new($latest); + PublicInbox::Import::init_bare({ git => $git }); + $git->qx(qw(config core.sharedRepository 0600)) if !$old; + my $packed_bytes = $git->packed_bytes; + my $unpacked_bytes = $packed_bytes / $self->packing_factor; + if ($unpacked_bytes >= $self->rotate_bytes) { + $max++; + next; + } + my ($n, $e) = git_ident($git); + $self->{im} = $im = PublicInbox::Import->new($git, $n, $e); + $im->{bytes_added} = int($packed_bytes / $self->packing_factor); + $im->{lock_path} = undef; + $im->{path_type} = 'v2'; + return $im; + } +} + +sub search { + PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir}); +} + +sub eidx_init { + my ($self) = @_; + my $eidx = $self->{priv_eidx}; + $eidx->idx_init({-private => 1}); + $eidx; +} + +sub _docids_for ($$) { + my ($self, $eml) = @_; + my %docids; + my $chash = content_hash($eml); + my $eidx = eidx_init($self); + my $oidx = $eidx->{oidx}; + my $im = $self->{im}; + for my $mid (@{mids($eml)}) { + my ($id, $prev); + while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) { + my $oid = $cur->{blob}; + my $docid = $cur->{num}; + my $bref = $im ? $im->cat_blob($oid) : undef; + $bref //= $eidx->git->cat_file($oid) // do { + warn "W: $oid (#$docid) <$mid> not found\n"; + next; + }; + local $self->{current_info} = $oid; + my $x = PublicInbox::Eml->new($bref); + $docids{$docid} = $docid if content_hash($x) eq $chash; + } + } + sort { $a <=> $b } values %docids; +} + +sub set_eml_keywords { + my ($self, $eml, @kw) = @_; + my $eidx = eidx_init($self); + my @docids = _docids_for($self, $eml); + for my $docid (@docids) { + $eidx->idx_shard($docid)->shard_set_keywords($docid, @kw); + } + \@docids; +} + +sub add_eml_keywords { + my ($self, $eml, @kw) = @_; + my $eidx = eidx_init($self); + my @docids = _docids_for($self, $eml); + for my $docid (@docids) { + $eidx->idx_shard($docid)->shard_add_keywords($docid, @kw); + } + \@docids; +} + +sub remove_eml_keywords { + my ($self, $eml, @kw) = @_; + my $eidx = eidx_init($self); + my @docids = _docids_for($self, $eml); + for my $docid (@docids) { + $eidx->idx_shard($docid)->shard_remove_keywords($docid, @kw); + } + \@docids; +} + +# cf: https://doc.dovecot.org/configuration_manual/mail_location/mbox/ +my %status2kw = (F => 'flagged', A => 'answered', R => 'seen', T => 'draft'); +# O (old/non-recent), and D (deleted) aren't in JMAP, +# so probably won't be supported by us. +sub mbox_keywords { + my $eml = $_[-1]; + my $s = "@{[$eml->header_raw('X-Status'),$eml->header_raw('Status')]}"; + my %kw; + $s =~ s/([FART])/$kw{$status2kw{$1}} = 1/sge; + sort(keys %kw); +} + +# cf: https://cr.yp.to/proto/maildir.html +my %c2kw = ('D' => 'draft', F => 'flagged', R => 'answered', S => 'seen'); +sub maildir_keywords { + $_[-1] =~ /:2,([A-Z]+)\z/i ? + sort(map { $c2kw{$_} // () } split(//, $1)) : (); +} + +sub add_eml { + my ($self, $eml, @kw) = @_; + my $eidx = eidx_init($self); + my $oidx = $eidx->{oidx}; + my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; + my $im = $self->importer; + $im->add($eml, undef, $smsg) or return; # duplicate returns undef + my $msgref = delete $smsg->{-raw_email}; + $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref); + + local $self->{current_info} = $smsg->{blob}; + if (my @docids = _docids_for($self, $eml)) { + for my $docid (@docids) { + my $idx = $eidx->idx_shard($docid); + $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); + $idx->shard_add_eidx_info($docid, '.', $eml); # List-Id + $idx->shard_add_keywords($docid, @kw) if @kw; + } + } else { + $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); + $oidx->add_overview($eml, $smsg); + $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); + my $idx = $eidx->idx_shard($smsg->{num}); + $idx->index_raw($msgref, $eml, $smsg); + $idx->shard_add_keywords($smsg->{num}, @kw) if @kw; + } + $smsg->{blob} +} + +sub done { + my ($self) = @_; + my $err = ''; + if (my $im = delete($self->{im})) { + eval { $im->done }; + if ($@) { + $err .= "import done: $@\n"; + warn $err; + } + } + $self->{priv_eidx}->done; + die $err if $err; +} + +1; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm new file mode 100644 index 00000000..1a81b14a --- /dev/null +++ b/lib/PublicInbox/LeiXSearch.pm @@ -0,0 +1,72 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# Combine any combination of PublicInbox::Search, +# PublicInbox::ExtSearch, and PublicInbox::LeiSearch objects +# into one Xapian DB +package PublicInbox::LeiXSearch; +use strict; +use v5.10.1; +use parent qw(PublicInbox::LeiSearch); + +sub new { + my ($class) = @_; + PublicInbox::Search::load_xapian(); + bless { + qp_flags => $PublicInbox::Search::QP_FLAGS | + PublicInbox::Search::FLAG_PURE_NOT(), + }, $class +} + +sub attach_extinbox { + my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox + if (!$ibxish->can('over')) { + push @{$self->{remotes}}, $ibxish + } + if (delete $self->{xdb}) { # XXX: do we need this? + # clobber existing {xdb} if amending + my $expect = delete $self->{nshard}; + my $shards = delete $self->{shards_flat}; + scalar(@$shards) == $expect or die + "BUG: {nshard}$expect != shards=".scalar(@$shards); + + my $prev = {}; + for my $old_ibxish (@{$self->{shard2ibx}}) { + next if $prev == $old_ibxish; + $prev = $old_ibxish; + my @shards = $old_ibxish->search->xdb_shards_flat; + push @{$self->{shards_flat}}, @shards; + } + my $nr = scalar(@{$self->{shards_flat}}); + $nr == $expect or die + "BUG: reloaded $nr shards, expected $expect" + } + my @shards = $ibxish->search->xdb_shards_flat; + push @{$self->{shards_flat}}, @shards; + push(@{$self->{shard2ibx}}, $ibxish) for (@shards); +} + +# called by PublicInbox::Search::xdb +sub xdb_shards_flat { @{$_[0]->{shards_flat}} } + +# like over->get_art +sub smsg_for { + my ($self, $mitem) = @_; + # cf. https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID + my $nshard = $self->{nshard}; + my $docid = $mitem->get_docid; + my $shard = ($docid - 1) % $nshard; + my $num = int(($docid - 1) / $nshard) + 1; + my $smsg = $self->{shard2ibx}->[$shard]->over->get_art($num); + $smsg->{docid} = $docid; + $smsg; +} + +sub recent { + my ($self, $qstr, $opt) = @_; + $opt //= {}; + $opt->{relevance} //= -2; + $self->mset($qstr //= 'bytes:1..', $opt); +} + +1; diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm index e02450fa..37ee63d0 100644 --- a/lib/PublicInbox/ManifestJsGz.pm +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -11,7 +11,7 @@ use PublicInbox::Config; use IO::Compress::Gzip qw(gzip); use HTTP::Date qw(time2str); -our $json = PublicInbox::Config::json(); +my $json = PublicInbox::Config::json(); # called by WwwListing sub url_regexp { diff --git a/lib/PublicInbox/OnDestroy.pm b/lib/PublicInbox/OnDestroy.pm new file mode 100644 index 00000000..841f87d4 --- /dev/null +++ b/lib/PublicInbox/OnDestroy.pm @@ -0,0 +1,16 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +package PublicInbox::OnDestroy; + +sub new { + shift; # ($class, $cb, @args) + bless [ @_ ], __PACKAGE__; +} + +sub DESTROY { + my ($cb, @args) = @{$_[0]}; + $cb->(@args) if $cb; +} + +1; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index dcc2cff3..bc2e3ef4 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -688,4 +688,14 @@ DELETE FROM eidxq WHERE docid = ? } +sub blob_exists { + my ($self, $oidhex) = @_; + my $sth = $self->dbh->prepare_cached(<<'', undef, 1); +SELECT COUNT(*) FROM xref3 WHERE oidbin = ? + + $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); + $sth->execute; + $sth->fetchrow_array; +} + 1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index fb3e9975..58653c9e 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -190,41 +190,28 @@ sub xdir ($;$) { } } -sub xdb_sharded { +# returns all shards as separate Xapian::Database objects w/o combining +sub xdb_shards_flat ($) { my ($self) = @_; - opendir(my $dh, $self->{xpfx}) or return; # not initialized yet - - # We need numeric sorting so shard[0] is first for reading - # Xapian metadata, if needed - my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return; + my $xpfx = $self->{xpfx}; my (@xdb, $slow_phrase); - for (0..$last) { - my $shard_dir = "$self->{xpfx}/$_"; - if (-d $shard_dir && -r _) { + load_xapian(); + if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) { + @xdb = ($X{Database}->new($xpfx)); + $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert"; + } else { + opendir(my $dh, $xpfx) or return (); # not initialized yet + # We need numeric sorting so shard[0] is first for reading + # Xapian metadata, if needed + my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return (); + for (0..$last) { + my $shard_dir = "$self->{xpfx}/$_"; push @xdb, $X{Database}->new($shard_dir); $slow_phrase ||= -f "$shard_dir/iamchert"; - } else { # gaps from missing epochs throw off mdocid() - warn "E: $shard_dir missing or unreadable\n"; - return; } + $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; } - $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; - $self->{nshard} = scalar(@xdb); - my $xdb = shift @xdb; - $xdb->add_database($_) for @xdb; - $xdb; -} - -sub _xdb { - my ($self) = @_; - my $dir = xdir($self, 1); - $self->{qp_flags} //= $QP_FLAGS; - if ($self->{ibx_ver} >= 2) { - xdb_sharded($self); - } else { - $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert"; - $X{Database}->new($dir); - } + @xdb; } # v2 Xapian docids don't conflict, so they're identical to @@ -238,37 +225,30 @@ sub mdocid { sub mset_to_artnums { my ($self, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; [ map { mdocid($nshard, $_) } $mset->items ]; } sub xdb ($) { my ($self) = @_; $self->{xdb} //= do { - load_xapian(); - $self->_xdb; + $self->{qp_flags} //= $QP_FLAGS; + my @xdb = $self->xdb_shards_flat or return; + $self->{nshard} = scalar(@xdb); + my $xdb = shift @xdb; + $xdb->add_database($_) for @xdb; + $xdb; }; } -sub xpfx_init ($) { - my ($self) = @_; - if ($self->{ibx_ver} == 1) { - $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION; - } else { - $self->{xpfx} .= '/xap'.SCHEMA_VERSION; - } -} - sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; - my $self = bless { - xpfx => $ibx->{inboxdir}, # for xpfx_init + my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian'; + bless { + xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION, altid => $ibx->{altid}, - ibx_ver => $ibx->version, }, $class; - xpfx_init($self); - $self; } sub reopen { @@ -364,7 +344,7 @@ sub _enquire_once { # retry_reopen callback sub mset_to_smsg { my ($self, $ibx, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; my $i = 0; my %order = map { mdocid($nshard, $_) => ++$i } $mset->items; my @msgs = sort { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index b3361e05..95f4234c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -1,6 +1,6 @@ # Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ -# based on notmuch, but with no concept of folders, files or flags +# based on notmuch, but with no concept of folders, files # # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use # with the web and NNTP interfaces. This index maintains thread @@ -54,14 +54,11 @@ sub new { } } $ibx = PublicInbox::InboxWritable->new($ibx); - my $self = bless { - ibx => $ibx, - xpfx => $inboxdir, # for xpfx_init - -altid => $altid, - ibx_ver => $version, - indexlevel => $indexlevel, - }, $class; - $self->xpfx_init; + my $self = PublicInbox::Search->new($ibx); + bless $self, $class; + $self->{ibx} = $ibx; + $self->{-altid} = $altid; + $self->{indexlevel} = $indexlevel; $self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium'; if ($ibx->{-skip_docdata}) { $self->{-set_skip_docdata_once} = 1; @@ -369,7 +366,7 @@ sub eml2doc ($$$;$) { index_headers($self, $smsg); if (defined(my $eidx_key = $smsg->{eidx_key})) { - $doc->add_boolean_term('O'.$eidx_key); + $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; } msg_iter($eml, \&index_xapian, [ $self, $doc ]); index_ids($self, $doc, $eml, $mids); @@ -406,7 +403,7 @@ sub add_xapian ($$$$) { sub _msgmap_init ($) { my ($self) = @_; - die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1; + die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1; $self->{mm} //= eval { require PublicInbox::Msgmap; my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1; @@ -465,7 +462,7 @@ sub add_eidx_info { begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; term_generator($self)->set_document($doc); - $doc->add_boolean_term('O'.$eidx_key); + $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; index_list_id($self, $doc, $eml); $self->{xdb}->replace_document($docid, $doc); } @@ -499,6 +496,47 @@ sub remove_eidx_info { $self->{xdb}->replace_document($docid, $doc); } +sub set_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + my %keep = map { $_ => 1 } @kw; + my %add = %keep; + my @rm; + my $end = $doc->termlist_end; + for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) { + $cur->skip_to('K'); + last if $cur == $end; + my $kw = $cur->get_termname; + $kw =~ s/\AK//s or next; + $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw); + } + return unless (scalar(@rm) + scalar(keys %add)); + $doc->remove_term('K'.$_) for @rm; + $doc->add_boolean_term('K'.$_) for (keys %add); + $self->{xdb}->replace_document($docid, $doc); +} + +sub add_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + $doc->add_boolean_term('K'.$_) for @kw; + $self->{xdb}->replace_document($docid, $doc); +} + +sub remove_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + my $replace; + eval { + $doc->remove_term('K'.$_); + $replace = 1 + } for @kw; + $self->{xdb}->replace_document($docid, $doc) if $replace; +} + sub smsg_from_doc ($) { my ($doc) = @_; my $data = $doc->get_data or return; diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 2e654769..87b0bad6 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -89,6 +89,12 @@ sub shard_worker_loop ($$$$$) { my ($len, $docid, $eidx_key) = split(/ /, $line, 3); $self->remove_eidx_info($docid, $eidx_key, eml($r, $len)); + } elsif ($line =~ s/\A=K (\d+) //) { + $self->set_keywords($1 + 0, split(/ /, $line)); + } elsif ($line =~ s/\A-K (\d+) //) { + $self->remove_keywords($1 + 0, split(/ /, $line)); + } elsif ($line =~ s/\A\+K (\d+) //) { + $self->add_keywords($1 + 0, split(/ /, $line)); } elsif ($line =~ s/\AO ([^\n]+)//) { my $over_fn = $1; $over_fn =~ tr/\0/\n/; @@ -210,6 +216,33 @@ sub shard_remove { } } +sub shard_set_keywords { + my ($self, $docid, @kw) = @_; + if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child + print $w "=K $docid @kw\n" or die "failed to write: $!"; + } else { # same process + $self->set_keywords($docid, @kw); + } +} + +sub shard_remove_keywords { + my ($self, $docid, @kw) = @_; + if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child + print $w "-K $docid @kw\n" or die "failed to write: $!"; + } else { # same process + $self->remove_keywords($docid, @kw); + } +} + +sub shard_add_keywords { + my ($self, $docid, @kw) = @_; + if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child + print $w "+K $docid @kw\n" or die "failed to write: $!"; + } else { # same process + $self->add_keywords($docid, @kw); + } +} + sub shard_over_check { my ($self, $over) = @_; if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 299b9c6a..338e760c 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -75,6 +75,10 @@ sub require_mods { my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/; my @need; while (my $mod = shift(@mods)) { + if ($mod eq 'json') { + $mod = 'Cpanel::JSON::XS||JSON::MaybeXS||'. + 'JSON||JSON::PP' + } if ($mod eq 'Search::Xapian') { if (eval { require PublicInbox::Search } && PublicInbox::Search::load_xapian()) { @@ -164,7 +168,7 @@ sub run_script_exit { die RUN_SCRIPT_EXIT; } -my %cached_scripts; +our %cached_scripts; sub key2sub ($) { my ($key) = @_; $cached_scripts{$key} //= do { @@ -257,6 +261,7 @@ sub run_script ($;$$) { my $orig_io = _prepare_redirects($fhref); _run_sub($sub, $key, \@argv); _undo_redirects($orig_io); + select STDOUT; } # slurp the redirects back into user-supplied strings diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 567582c5..edb8ba57 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -24,7 +24,7 @@ use File::Temp (); my $OID = qr/[a-f0-9]{40,}/; # an estimate of the post-packed size to the raw uncompressed size -my $PACKING_FACTOR = 0.4; +our $PACKING_FACTOR = 0.4; # SATA storage lags behind what CPUs are capable of, so relying on # nproc(1) can be misleading and having extra Xapian shards is a @@ -73,13 +73,7 @@ sub count_shards ($) { delete $ibx->{search}; $srch->{nshard} // 0 } else { # ExtSearchIdx - $self->{nshard} // do { - if ($self->xdb_sharded) { - $self->{nshard} // die 'BUG: {nshard} unset'; - } else { - 0; - } - } + $self->{nshard} ||= scalar($self->xdb_shards_flat); } } diff --git a/script/lei b/script/lei new file mode 100755 index 00000000..ceaf1e00 --- /dev/null +++ b/script/lei @@ -0,0 +1,76 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use v5.10.1; +use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un); +if (my ($sock, $pwd) = eval { + require IO::FDPass; # will try to use a daemon to reduce load time + my $path = do { + my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei'; + if ($runtime_dir eq '/lei') { + require File::Spec; + $runtime_dir = File::Spec->tmpdir."/lei-$<"; + } + unless (-d $runtime_dir) { + require File::Path; + File::Path::mkpath($runtime_dir, 0, 0700); + } + "$runtime_dir/sock"; + }; + my $addr = pack_sockaddr_un($path); + socket(my $sock, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!"; + unless (connect($sock, $addr)) { # start the daemon if not started + my $cmd = [ $^X, qw[-MPublicInbox::LEI + -E PublicInbox::LEI::lazy_start(@ARGV)], + $path, $! + 0 ]; + my $env = { PERL5LIB => join(':', @INC) }; + pipe(my ($daemon, $w)) or die "pipe: $!"; + my $opt = { 1 => $w, 2 => $w }; + require PublicInbox::Spawn; + my $pid = PublicInbox::Spawn::spawn($cmd, $env, $opt); + $opt = $w = undef; + while (<$daemon>) { warn $_ } # EOF when STDERR is redirected + waitpid($pid, 0) or warn <<""; +lei-daemon could not start, PID:$pid exited with \$?=$? + + # try connecting again anyways, unlink+bind may be racy + unless (connect($sock, $addr)) { + die <<""; +connect($path): $! (after attempted daemon start) +Falling back to (slow) one-shot mode + + } + } + require Cwd; + my $cwd = Cwd::fastcwd() // die "fastcwd(PWD=".($ENV{PWD}//'').": $!"; + my $pwd = $ENV{PWD} // ''; + if ($pwd ne $cwd) { # prefer ENV{PWD} if it's a symlink to real cwd + my @st_cwd = stat($cwd) or die "stat(cwd=$cwd): $!"; + my @st_pwd = stat($pwd); # PWD invalid, use cwd + # make sure st_dev/st_ino match for {PWD} to be valid + $pwd = $cwd if (!@st_pwd || $st_pwd[1] != $st_cwd[1] || + $st_pwd[0] != $st_cwd[0]); + } else { + $pwd = $cwd; + } + ($sock, $pwd); +}) { # IO::FDPass, $sock, $pwd are all available: + local $ENV{PWD} = $pwd; + my $buf = "$$\0\0>" . join("]\0[", @ARGV) . "\0\0>"; + while (my ($k, $v) = each %ENV) { $buf .= "$k=$v\0" } + $buf .= "\0\0"; + select $sock; + $| = 1; # unbuffer selected $sock + IO::FDPass::send(fileno($sock), $_) for (0..2); + print $sock $buf or die "print(sock, buf): $!"; + while ($buf = <$sock>) { + $buf =~ /\Aexit=([0-9]+)\n\z/ and exit($1 + 0); + die $buf; + } +} else { # for systems lacking IO::FDPass + # don't warn about IO::FDPass since it's not commonly installed + warn $@ if $@ && index($@, 'IO::FDPass') < 0; + require PublicInbox::LEI; + PublicInbox::LEI::oneshot(__PACKAGE__); +} diff --git a/t/extsearch.t b/t/extsearch.t index fb31b0ab..ffbc10e2 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -8,9 +8,8 @@ use PublicInbox::Config; use PublicInbox::Search; use PublicInbox::InboxWritable; use Fcntl qw(:seek); -my $json = PublicInbox::Config::json() or plan skip_all => 'JSON missing'; require_git(2.6); -require_mods(qw(DBD::SQLite Search::Xapian)); +require_mods(qw(json DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::ExtSearch'; use_ok 'PublicInbox::ExtSearchIdx'; use_ok 'PublicInbox::OverIdx'; diff --git a/t/lei-oneshot.t b/t/lei-oneshot.t new file mode 100644 index 00000000..3b8e412d --- /dev/null +++ b/t/lei-oneshot.t @@ -0,0 +1,25 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use v5.10.1; +use PublicInbox::TestCommon; +$PublicInbox::TestCommon::cached_scripts{'lei-oneshot'} //= do { + eval <<'EOF'; +package LeiOneshot; +use strict; +use subs qw(exit); +*exit = \&PublicInbox::TestCommon::run_script_exit; +sub main { +# the below "line" directive is a magic comment, see perlsyn(1) manpage +# line 1 "lei-oneshot" + require PublicInbox::LEI; + PublicInbox::LEI::oneshot(__PACKAGE__); + 0; +} +1; +EOF + LeiOneshot->can('main'); +}; +local $ENV{TEST_LEI_ONESHOT} = '1'; +require './t/lei.t'; diff --git a/t/lei.t b/t/lei.t new file mode 100644 index 00000000..a95a0efc --- /dev/null +++ b/t/lei.t @@ -0,0 +1,306 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use v5.10.1; +use Test::More; +use PublicInbox::TestCommon; +use PublicInbox::Config; +use File::Path qw(rmtree); +require_git 2.6; +require_mods(qw(json DBD::SQLite Search::Xapian)); +my $LEI = 'lei'; +my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') }; +my $lei = sub { + my ($cmd, $env, $xopt) = @_; + $out = $err = ''; + if (!ref($cmd)) { + ($env, $xopt) = grep { (!defined) || ref } @_; + $cmd = [ grep { defined && !ref } @_ ]; + } + run_script([$LEI, @$cmd], $env, $xopt // $opt); +}; + +my ($home, $for_destroy) = tmpdir(); +delete local $ENV{XDG_DATA_HOME}; +delete local $ENV{XDG_CONFIG_HOME}; +local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run"; +local $ENV{HOME} = $home; +local $ENV{FOO} = 'BAR'; +mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!"; +my $home_trash = [ "$home/.local", "$home/.config" ]; +my $cleanup = sub { rmtree([@$home_trash, @_]) }; +my $config_file = "$home/.config/lei/config"; +my $store_dir = "$home/.local/share/lei"; + +my $test_help = sub { + ok(!$lei->([], undef, $opt), 'no args fails'); + is($? >> 8, 1, '$? is 1'); + is($out, '', 'nothing in stdout'); + like($err, qr/^usage:/sm, 'usage in stderr'); + + for my $arg (['-h'], ['--help'], ['help'], [qw(daemon-pid --help)]) { + $out = $err = ''; + ok($lei->($arg, undef, $opt), "lei @$arg"); + like($out, qr/^usage:/sm, "usage in stdout (@$arg)"); + is($err, '', "nothing in stderr (@$arg)"); + } + + for my $arg ([''], ['--halp'], ['halp'], [qw(daemon-pid --halp)]) { + $out = $err = ''; + ok(!$lei->($arg, undef, $opt), "lei @$arg"); + is($? >> 8, 1, '$? set correctly'); + isnt($err, '', 'something in stderr'); + is($out, '', 'nothing in stdout'); + } + ok($lei->(qw(init -h), undef, $opt), 'init -h'); + like($out, qr! \Q$home\E/\.local/share/lei/store\b!, + 'actual path shown in init -h'); + ok($lei->(qw(init -h), { XDG_DATA_HOME => '/XDH' }, $opt), + 'init with XDG_DATA_HOME'); + like($out, qr! /XDH/lei/store\b!, 'XDG_DATA_HOME in init -h'); + is($err, '', 'no errors from init -h'); + + ok($lei->(qw(config -h), undef, $opt), 'config-h'); + like($out, qr! \Q$home\E/\.config/lei/config\b!, + 'actual path shown in config -h'); + ok($lei->(qw(config -h), { XDG_CONFIG_HOME => '/XDC' }, $opt), + 'config with XDG_CONFIG_HOME'); + like($out, qr! /XDC/lei/config\b!, 'XDG_CONFIG_HOME in config -h'); + is($err, '', 'no errors from config -h'); +}; + +my $ok_err_info = sub { + my ($msg) = @_; + is(grep(!/^I:/, split(/^/, $err)), 0, $msg) or + diag "$msg: err=$err"; + $err = ''; +}; + +my $test_init = sub { + $cleanup->(); + ok($lei->(['init'], undef, $opt), 'init w/o args'); + $ok_err_info->('after init w/o args'); + ok($lei->(['init'], undef, $opt), 'idempotent init w/o args'); + $ok_err_info->('after idempotent init w/o args'); + + ok(!$lei->(['init', "$home/x"], undef, $opt), + 'init conflict'); + is(grep(/^E:/, split(/^/, $err)), 1, 'got error on conflict'); + ok(!-e "$home/x", 'nothing created on conflict'); + $cleanup->(); + + ok($lei->(['init', "$home/x"], undef, $opt), 'init conflict resolved'); + $ok_err_info->('init w/ arg'); + ok($lei->(['init', "$home/x"], undef, $opt), 'init idempotent w/ path'); + $ok_err_info->('init idempotent w/ arg'); + ok(-d "$home/x", 'created dir'); + $cleanup->("$home/x"); + + ok(!$lei->(['init', "$home/x", "$home/2" ], undef, $opt), + 'too many args fails'); + like($err, qr/too many/, 'noted excessive'); + ok(!-e "$home/x", 'x not created on excessive'); + for my $d (@$home_trash) { + my $base = (split(m!/!, $d))[-1]; + ok(!-d $d, "$base not created"); + } + is($out, '', 'nothing in stdout on init failure'); +}; + +my $test_config = sub { + $cleanup->(); + ok($lei->([qw(config a.b c)], undef, $opt), 'config set var'); + is($out.$err, '', 'no output on var set'); + ok($lei->([qw(config -l)], undef, $opt), 'config -l'); + is($err, '', 'no errors on listing'); + is($out, "a.b=c\n", 'got expected output'); + ok(!$lei->([qw(config -f), "$home/.config/f", qw(x.y z)], undef, $opt), + 'config set var with -f fails'); + like($err, qr/not supported/, 'not supported noted'); + ok(!-f "$home/config/f", 'no file created'); +}; + +my $setup_publicinboxes = sub { + state $done = ''; + return if $done eq $home; + use PublicInbox::InboxWritable; + for my $V (1, 2) { + run_script([qw(-init -Lmedium), "-V$V", "t$V", + '--newsgroup', "t.$V", + "$home/t$V", "http://example.com/t$V", + "t$V\@example.com" ]) or BAIL_OUT "init v$V"; + } + my $cfg = PublicInbox::Config->new; + my $seen = 0; + $cfg->each_inbox(sub { + my ($ibx) = @_; + my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); + my $V = $ibx->version; + my @eml = glob('t/*.eml'); + push(@eml, 't/data/0001.patch') if $V == 2; + for (@eml) { + next if $_ eq 't/psgi_v2-old.eml'; # dup mid + $im->add(eml_load($_)) or BAIL_OUT "v$V add $_"; + $seen++; + } + $im->done; + if ($V == 1) { + run_script(['-index', $ibx->{inboxdir}]) or + BAIL_OUT 'index v1'; + } + }); + $done = $home; + $seen || BAIL_OUT 'no imports'; +}; + +my $test_extinbox = sub { + $setup_publicinboxes->(); + $cleanup->(); + $lei->('ls-extinbox'); + is($out.$err, '', 'ls-extinbox no output, yet'); + ok(!-e $config_file && !-e $store_dir, + 'nothing created by ls-extinbox'); + + my $cfg = PublicInbox::Config->new; + $cfg->each_inbox(sub { + my ($ibx) = @_; + ok($lei->(qw(add-extinbox -q), $ibx->{inboxdir}), + 'added extinbox'); + is($out.$err, '', 'no output'); + }); + ok(-s $config_file && -e $store_dir, + 'add-extinbox created config + store'); + my $lcfg = PublicInbox::Config->new($config_file); + $cfg->each_inbox(sub { + my ($ibx) = @_; + is($lcfg->{"extinbox.$ibx->{inboxdir}.boost"}, 0, + "configured boost on $ibx->{name}"); + }); + $lei->('ls-extinbox'); + like($out, qr/boost=0\n/s, 'ls-extinbox has output'); +}; + +my $test_lei_common = sub { + $test_help->(); + $test_config->(); + $test_init->(); + $test_extinbox->(); +}; + +my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT}; +SKIP: { + last SKIP if $test_lei_oneshot; + require_mods(qw(IO::FDPass Cwd), 46); + my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/sock"; + + ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid'); + is($err, '', 'no error from daemon-pid'); + like($out, qr/\A[0-9]+\n\z/s, 'pid returned') or BAIL_OUT; + chomp(my $pid = $out); + ok(kill(0, $pid), 'pid is valid'); + ok(-S $sock, 'sock created'); + + $test_lei_common->(); + + $out = ''; + ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid'); + chomp(my $pid_again = $out); + is($pid, $pid_again, 'daemon-pid idempotent'); + + $out = ''; + ok(run_script([qw(lei daemon-env -0)], undef, $opt), 'show env'); + is($err, '', 'no errors in env dump'); + my @env = split(/\0/, $out); + is(scalar grep(/\AHOME=\Q$home\E\z/, @env), 1, 'env has HOME'); + is(scalar grep(/\AFOO=BAR\z/, @env), 1, 'env has FOO=BAR'); + is(scalar grep(/\AXDG_RUNTIME_DIR=/, @env), 1, 'has XDG_RUNTIME_DIR'); + + $out = ''; + ok(run_script([qw(lei daemon-env -u FOO)], undef, $opt), 'unset'); + is($out.$err, '', 'no output for unset'); + ok(run_script([qw(lei daemon-env -0)], undef, $opt), 'show again'); + is($err, '', 'no errors in env dump'); + @env = split(/\0/, $out); + is(scalar grep(/\AFOO=BAR\z/, @env), 0, 'env unset FOO'); + + $out = ''; + ok(run_script([qw(lei daemon-env -u FOO -u HOME -u XDG_RUNTIME_DIR)], + undef, $opt), 'unset multiple'); + is($out.$err, '', 'no errors output for unset'); + ok(run_script([qw(lei daemon-env -0)], undef, $opt), 'show again'); + is($err, '', 'no errors in env dump'); + @env = split(/\0/, $out); + is(scalar grep(/\A(?:HOME|XDG_RUNTIME_DIR)=\z/, @env), 0, 'env unset@'); + $out = ''; + ok(run_script([qw(lei daemon-env -)], undef, $opt), 'clear env'); + is($out.$err, '', 'no output'); + ok(run_script([qw(lei daemon-env)], undef, $opt), 'env is empty'); + is($out, '', 'env cleared'); + + ok(run_script([qw(lei daemon-kill)], undef, $opt), 'daemon-kill'); + is($out, '', 'no output from daemon-kill'); + is($err, '', 'no error from daemon-kill'); + for (0..100) { + kill(0, $pid) or last; + tick(); + } + ok(!-S $sock, 'sock gone'); + ok(!kill(0, $pid), 'pid gone after stop'); + + ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid'); + chomp(my $new_pid = $out); + ok(kill(0, $new_pid), 'new pid is running'); + ok(-S $sock, 'sock exists again'); + + $out = $err = ''; + for my $sig (qw(-0 -CHLD)) { + ok(run_script([qw(lei daemon-kill), $sig ], undef, $opt), + "handles $sig"); + } + is($out.$err, '', 'no output on innocuous signals'); + ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid'); + chomp $out; + is($out, $new_pid, 'PID unchanged after -0/-CHLD'); + + if ('socket inaccessible') { + chmod 0000, $sock or BAIL_OUT "chmod 0000: $!"; + $out = $err = ''; + ok(run_script([qw(lei help)], undef, $opt), + 'connect fail, one-shot fallback works'); + like($err, qr/\bconnect\(/, 'connect error noted'); + like($out, qr/^usage: /, 'help output works'); + chmod 0700, $sock or BAIL_OUT "chmod 0700: $!"; + } + if ('oneshot on cwd gone') { + my $cwd = Cwd::fastcwd() or BAIL_OUT "fastcwd: $!"; + my $d = "$home/to-be-removed"; + mkdir $d or BAIL_OUT "mkdir($d) $!"; + chdir $d or BAIL_OUT "chdir($d) $!"; + if (rmdir($d)) { + $out = $err = ''; + ok(run_script([qw(lei help)], undef, $opt), + 'cwd fail, one-shot fallback works'); + } else { + $err = "rmdir=$!"; + } + chdir $cwd or BAIL_OUT "chdir($cwd) $!"; + like($err, qr/cwd\(/, 'cwd error noted'); + like($out, qr/^usage: /, 'help output still works'); + } + + unlink $sock or BAIL_OUT "unlink($sock) $!"; + for (0..100) { + kill('CHLD', $new_pid) or last; + tick(); + } + ok(!kill(0, $new_pid), 'daemon exits after unlink'); + # success over socket, can't test without + $test_lei_common = undef; +}; + +require_ok 'PublicInbox::LEI'; +$LEI = 'lei-oneshot' if $test_lei_oneshot; +$test_lei_common->() if $test_lei_common; + +done_testing; diff --git a/t/lei_store.t b/t/lei_store.t new file mode 100644 index 00000000..03ab5af6 --- /dev/null +++ b/t/lei_store.t @@ -0,0 +1,88 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use v5.10.1; +use Test::More; +use PublicInbox::TestCommon; +require_mods(qw(DBD::SQLite Search::Xapian)); +require_git 2.6; +require_ok 'PublicInbox::LeiStore'; +require_ok 'PublicInbox::ExtSearch'; +my ($home, $for_destroy) = tmpdir(); +my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') }; +my $store_dir = "$home/lst"; +my $lst = PublicInbox::LeiStore->new($store_dir, { creat => 1 }); +ok($lst, '->new'); +my $oid = $lst->add_eml(eml_load('t/data/0001.patch')); +like($oid, qr/\A[0-9a-f]+\z/, 'add returned OID'); +my $eml = eml_load('t/data/0001.patch'); +is($lst->add_eml($eml), undef, 'idempotent'); +$lst->done; +is_deeply([$lst->mbox_keywords($eml)], [], 'no keywords'); +$eml->header_set('Status', 'RO'); +is_deeply([$lst->mbox_keywords($eml)], ['seen'], 'seen extracted'); +$eml->header_set('X-Status', 'A'); +is_deeply([$lst->mbox_keywords($eml)], [qw(answered seen)], + 'seen+answered extracted'); +$eml->header_set($_) for qw(Status X-Status); + +is_deeply([$lst->maildir_keywords('/foo:2,')], [], 'Maildir no keywords'); +is_deeply([$lst->maildir_keywords('/foo:2,S')], ['seen'], 'Maildir seen'); +is_deeply([$lst->maildir_keywords('/foo:2,RS')], ['answered', 'seen'], + 'Maildir answered + seen'); +is_deeply([$lst->maildir_keywords('/foo:2,RSZ')], ['answered', 'seen'], + 'Maildir answered + seen w/o Z'); +{ + my $es = $lst->search; + my $msgs = $es->over->query_xover(0, 1000); + is(scalar(@$msgs), 1, 'one message'); + is($msgs->[0]->{blob}, $oid, 'blob matches'); + my $mset = $es->mset("mid:$msgs->[0]->{mid}"); + is($mset->size, 1, 'search works'); + is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ], + 'mset_to_artnums'); + my @kw = $es->msg_keywords(($mset->items)[0]); + is_deeply(\@kw, [], 'no flags'); +} + +for my $parallel (0, 1) { + $lst->{priv_eidx}->{parallel} = $parallel; + my $docids = $lst->set_eml_keywords($eml, qw(seen draft)); + is(scalar @$docids, 1, 'set keywords on one doc'); + $lst->done; + my @kw = $lst->search->msg_keywords($docids->[0]); + is_deeply(\@kw, [qw(draft seen)], 'kw matches'); + + $docids = $lst->add_eml_keywords($eml, qw(seen draft)); + $lst->done; + is(scalar @$docids, 1, 'idempotently added keywords to doc'); + @kw = $lst->search->msg_keywords($docids->[0]); + is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop'); + + $docids = $lst->remove_eml_keywords($eml, qw(seen draft)); + is(scalar @$docids, 1, 'removed from one doc'); + $lst->done; + @kw = $lst->search->msg_keywords($docids->[0]); + is_deeply(\@kw, [], 'kw matches after remove'); + + $docids = $lst->remove_eml_keywords($eml, qw(answered)); + is(scalar @$docids, 1, 'removed from one doc (idempotently)'); + $lst->done; + @kw = $lst->search->msg_keywords($docids->[0]); + is_deeply(\@kw, [], 'kw matches after remove (idempotent)'); + + $docids = $lst->add_eml_keywords($eml, qw(answered)); + is(scalar @$docids, 1, 'added to empty doc'); + $lst->done; + @kw = $lst->search->msg_keywords($docids->[0]); + is_deeply(\@kw, ['answered'], 'kw matches after add'); + + $docids = $lst->set_eml_keywords($eml); + is(scalar @$docids, 1, 'set to clobber'); + $lst->done; + @kw = $lst->search->msg_keywords($docids->[0]); + is_deeply(\@kw, [], 'set clobbers all'); +} + +done_testing; diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t new file mode 100644 index 00000000..c41213bd --- /dev/null +++ b/t/lei_xsearch.t @@ -0,0 +1,73 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use v5.10.1; +use Test::More; +use List::Util qw(shuffle max); +use PublicInbox::TestCommon; +use PublicInbox::ExtSearchIdx; +use PublicInbox::Eml; +use PublicInbox::InboxWritable; +require_mods(qw(DBD::SQLite Search::Xapian)); +require_git 2.6; +require_ok 'PublicInbox::LeiXSearch'; +my ($home, $for_destroy) = tmpdir(); +my @ibx; +for my $V (1..2) { + for my $i (3..6) { + my $ibx = PublicInbox::InboxWritable->new({ + inboxdir => "$home/v$V-$i", + name => "test-v$V-$i", + version => $V, + indexlevel => 'medium', + -primary_address => "v$V-$i\@example.com", + }, { nproc => int(rand(8)) + 1 }); + push @ibx, $ibx; + my $im = $ibx->importer(0); + for my $j (0..9) { + my $eml = PublicInbox::Eml->new(<{-primary_address} +Date: Fri, 02 Oct 1993 0$V:0$i:0$j +0000 +Subject: v${V}i${i}j$j +Message-ID: + +${V}er ${i}on j$j +EOF + $im->add($eml); + } + $im->done; + } +} +my $first = shift @ibx; is($first->{name}, 'test-v1-3', 'first plucked'); +my $last = pop @ibx; is($last->{name}, 'test-v2-6', 'last plucked'); +my $eidx = PublicInbox::ExtSearchIdx->new("$home/eidx"); +$eidx->attach_inbox($first); +$eidx->attach_inbox($last); +$eidx->eidx_sync({fsync => 0}); +my $es = PublicInbox::ExtSearch->new("$home/eidx"); +my $lxs = PublicInbox::LeiXSearch->new; +for my $ibxish (shuffle($es, @ibx)) { + $lxs->attach_extinbox($ibxish); +} +my $nr = $lxs->xdb->get_doccount; +my $mset = $lxs->mset('d:19931002..19931003', { limit => $nr }); +is($mset->size, $nr, 'got all messages'); +my @msgs; +for my $mi ($mset->items) { + if (my $smsg = $lxs->smsg_for($mi)) { + push @msgs, $smsg; + } else { + diag "E: ${\$mi->get_docid} missing"; + } +} +is(scalar(@msgs), $nr, 'smsgs retrieved for all'); + +$mset = $lxs->recent(undef, { limit => 1 }); +is($mset->size, 1, 'one result'); +my $max = max(map { $_->{docid} } @msgs); +is($lxs->smsg_for(($mset->items)[0])->{docid}, $max, + 'got highest docid'); + +done_testing; diff --git a/t/on_destroy.t b/t/on_destroy.t new file mode 100644 index 00000000..8b85b48e --- /dev/null +++ b/t/on_destroy.t @@ -0,0 +1,25 @@ +#!perl -w +use strict; +use v5.10.1; +use Test::More; +require_ok 'PublicInbox::OnDestroy'; +my @x; +my $od = PublicInbox::OnDestroy->new(sub { push @x, 'hi' }); +is_deeply(\@x, [], 'not called, yet'); +undef $od; +is_deeply(\@x, [ 'hi' ], 'no args works'); +$od = PublicInbox::OnDestroy->new(sub { $x[0] = $_[0] }, 'bye'); +is_deeply(\@x, [ 'hi' ], 'nothing changed while alive'); +undef $od; +is_deeply(\@x, [ 'bye' ], 'arg passed'); +$od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y)); +undef $od; +is_deeply(\@x, [ 'x', 'y' ], '2 args passed'); + +if (my $nr = $ENV{TEST_LEAK_NR}) { + for (0..$nr) { + $od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y)); + } +} + +done_testing; diff --git a/t/www_listing.t b/t/www_listing.t index 63613371..94c1e5bb 100644 --- a/t/www_listing.t +++ b/t/www_listing.t @@ -7,14 +7,12 @@ use Test::More; use PublicInbox::Spawn qw(which); use PublicInbox::TestCommon; use PublicInbox::Import; -require_mods(qw(URI::Escape Plack::Builder Digest::SHA +require_mods(qw(json URI::Escape Plack::Builder Digest::SHA IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny)); require PublicInbox::WwwListing; require PublicInbox::ManifestJsGz; -my $json = do { - no warnings 'once'; - $PublicInbox::ManifestJsGz::json; -} or plan skip_all => "JSON module missing"; +use PublicInbox::Config; +my $json = PublicInbox::Config::json(); use_ok 'PublicInbox::Git';