Copyright
---------
-Copyright 2013-2020 all contributors <meta@public-inbox.org>
+Copyright 2013-2021 all contributors <meta@public-inbox.org>
License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt>
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# prints a manpage to stdout
use strict;
public-inbox-imapd\n
public-inbox-nntpd]
-# Copyright 2020 all contributors <meta@public-inbox.org>
+# Copyright 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
| public-inbox-nntpd |
+--------------------+
-# Copyright 2020 all contributors <meta@public-inbox.org>
+# Copyright 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
all::
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Generates NEWS, NEWS.atom, and NEWS.html files using release emails
# this uses unstable internal APIs of public-inbox, and this script
);
$ibx->{-primary_address} = $addr;
my $ctx = {
- -inbox => $ibx,
+ ibx => $ibx,
-upfx => "$base_url/",
-hr => 1,
};
}
sub html_end {
- print $out <<EOF or die;
- git clone $PublicInbox::WwwStream::CODE_URL
-</pre></body></html>
-EOF
+ for (@$PublicInbox::WwwStream::CODE_URL) {
+ print $out " git clone $_\n" or die;
+ }
+ print $out "</pre></body></html>\n" or die;
}
sub atom_start {
# WwwAtomStream stats this dir for mtime
my $astream = PublicInbox::WwwAtomStream->new($ctx);
delete $astream->{emit_header};
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $title = PublicInbox::WwwAtomStream::title_tag($ibx->description);
my $updated = PublicInbox::WwwAtomStream::feed_updated($mtime);
print $out <<EOF or die;
=head1 COPYRIGHT
-Copyright 2018-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2018-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2016-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
--- /dev/null
+% public-inbox developer manual
+
+=head1 NAME
+
+public-inbox extindex format description
+
+=head1 DESCRIPTION
+
+The extindex is an index-only evolution of the per-inbox
+SQLite and Xapian indices used by L<public-inbox-v2-format(5)>
+and L<public-inbox-v1-format(5)>. It exists to facilitate
+searches across multiple inboxes as well as to reduce index
+space when messages are cross-posted to several existing
+inboxes.
+
+It transparently indexes messages across any combination of v1 and v2
+inboxes and data about inboxes themselves.
+
+=head1 DIRECTORY LAYOUT
+
+While inspired by v2, there is no git blob storage nor
+C<msgmap.sqlite3> DB.
+
+Instead, there is an C<ALL.git> (all caps) git repo which treats
+every indexed v1 inbox or v2 epoch as a git alternate.
+
+As with v2 inboxes, it uses C<over.sqlite3> and Xapian "shards"
+for WWW and IMAP use. Several exclusive new tables are added
+to deal with L</XREF3 DEDUPLICATION> and metadata.
+
+Unlike v1 and v2 inboxes, it is NOT designed to map to a NNTP
+newsgroup. Thus it lacks C<msgmap.sqlite3> to enforce the
+unique Message-ID requirement of NNTP.
+
+=head2 INDEX OVERVIEW AND DEFINITIONS
+
+ $SCHEMA_VERSION - DB schema version (for Xapian)
+ $SHARD - Integer starting with 0 based on parallelism
+
+ foo/ # "foo" is the name of the index
+ - ei.lock # lock file to protect global state
+ - ALL.git # empty, alternates for inboxes
+ - ei$SCHEMA_VERSION/$SHARD # per-shard Xapian DB
+ - ei$SCHEMA_VERSION/over.sqlite3 # overview DB for WWW, IMAP
+ - ei$SCHEMA_VERSION/misc # misc Xapian DB
+
+File and directory names are intentionally different from
+analogous v2 names to ensure extindex and v2 inboxes can
+easily be distinguished from each other.
+
+=head2 XREF3 DEDUPLICATION
+
+Due to cross-posted messages being the norm in the large Linux kernel
+development community and Xapian indices being the primary consumer of
+storage, it makes sense to deduplicate indexing as much as possible.
+
+The internal storage format is based on the NNTP "Xref" tuple,
+but with the addition of a third element: the git blob OID.
+Thus the triple is expressed in string form as:
+
+ $NEWSGROUP_NAME:$ARTICLE_NUM:$OID
+
+If no C<newsgroup> is configured for an inbox, the C<inboxdir>
+of the inbox is used.
+
+This data is stored in the C<xref3> table of over.sqlite3.
+
+=head2 misc XAPIAN DB
+
+In addition to the numeric Xapian shards for indexing messages,
+there is a new, in-development Xapian index for storing data
+about inboxes themselves and other non-message data. This
+index allows us to speed up operations involving hundreds or
+thousands of inboxes.
+
+=head1 BENEFITS
+
+In addition to providing cross-inbox search capabilities, it can
+also replace per-inbox Xapian shards (but not per-inbox
+over.sqlite3). This allows reduction in disk space, open file
+handles, and associated memory use.
+
+=head1 CAVEATS
+
+Relocating v1 and v2 inboxes on the filesystem will require
+extindex to be garbage-collected and/or reindexed.
+
+Configuring and maintaining stable C<newsgroup> names before any
+messages are indexed from every inbox can avoid expensive
+reindexing and rely exclusively on GC.
+
+=head1 LOCKING
+
+L<flock(2)> locking exclusively locks the empty ei.lock file
+for all non-atomic operations.
+
+=head1 THANKS
+
+Thanks to the Linux Foundation for sponsoring the development
+and testing.
+
+=head1 COPYRIGHT
+
+Copyright 2020-2021 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<public-inbox-v2-format(5)>
=head1 COPYRIGHT
-Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2020-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
Available in public-inbox 1.6.0+.
+=item --update-extindex=EXTINDEX, -E
+
+Update the given external index (L<public-inbox-extindex-format(5)>.
+Either the configured section name (e.g. C<all>) or a directory name
+may be specified.
+
+Defaults to C<all> if C<[extindex "all"]> is configured,
+otherwise no external indices are updated.
+
+May be specified multiple times in rare cases where multiple
+external indices are configured.
+
+=item --no-update-extindex
+
+Do not update the C<all> external index by default. This negates
+all uses of C<-E> / C<--update-extindex=> on the command-line.
+
=back
=head1 FILES
=head1 COPYRIGHT
-Copyright 2016-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 SEE ALSO
-L<Search::Xapian>, L<DBD::SQLite>
+L<Search::Xapian>, L<DBD::SQLite>, L<public-inbox-extindex-format(5)>
=head1 COPYRIGHT
-Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2016-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2020-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2018-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2018-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2016-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 COPYRIGHT
-Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
#!/usr/bin/perl -w
use strict;
-# Copyright 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
print <<EOF;
1036 => 'Standard for Interchange of USENET Messages',
5536 => 'Netnews Article Format',
5537 => 'Netnews Architecture and Protocols',
+ 1738 => 'Uniform resource locators',
+ 5092 => 'IMAP URL scheme',
+ 5538 => 'NNTP URI schemes',
6048 => 'NNTP additions to LIST command (TODO)',
8054 => 'NNTP compression',
4642 => 'NNTP TLS',
#!/usr/bin/env perl
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Stupid script to make HTML from preformatted, utf-8 text versions,
---------------------------------------
This is for folks who want to setup their own public-inbox instance.
-Clients should use normal git-clone/git-fetch, or NNTP clients
+Clients should use normal git-clone/git-fetch, IMAP or NNTP clients
if they want to import mail into their personal inboxes.
public-inbox is developed on Debian GNU/Linux systems and will
* Git (1.8.0+, 2.6+ for writing v2 inboxes)
* Perl 5.10.1+
-* DBD::SQLite (needed for NNTP, message threading, and v2 inboxes)
+* DBD::SQLite (needed for IMAP, NNTP, message threading, and v2 inboxes)
To accept incoming mail into a public inbox, you'll likely want:
- DBD::SQLite deb: libdbd-sqlite3-perl
pkg: p5-DBD-SQLite
rpm: perl-DBD-SQLite
- (for v2, NNTP, or gzipped mboxes)
+ (for v2, IMAP, NNTP, or gzipped mboxes)
- Search::Xapian deb: libsearch-xapian-perl
pkg: p5-Search-Xapian
rpm: perl-Search-Xapian
- (HTTP search)
+ (HTTP and IMAP search)
- Net::Server deb: libnet-server-perl
pkg: pkg-Net-Server
rpm: perl-Net-Server
- (for HTTP/NNTP background daemons,
+ (for HTTP/IMAP/NNTP background daemons,
not needed as systemd services or
foreground servers)
- Email::Address::XS deb: libemail-address-xs-perl
pkg: pkg-Email-Address-XS
(correct parsing of tricky email
- addresses, phrases and comments)
+ addresses, phrases and comments,
+ required for IMAP)
+
+- Parse::RecDescent deb: libparse-recdescent-perl
+ pkg: p5-Parse-RecDescent
+ rpm: perl-ParseRecDescent
+ (optional, for public-inbox-imapd(1))
+
- Plack::Middleware::ReverseProxy deb: libplack-middleware-reverseproxy-perl
pkg: p5-Plack-Middleware-ReverseProxy
- Linux::Inotify2 deb: liblinux-inotify2-perl
rpm: perl-Linux-Inotify2
- (for public-inbox-watch on Linux)
+ (for public-inbox-watch and -imapd on Linux)
- IO::Compress (::Gzip) deb: perl-modules (or libio-compress-perl)
pkg: perl5
Copyright
---------
-Copyright 2013-2020 all contributors <meta@public-inbox.org>
+Copyright 2013-2021 all contributors <meta@public-inbox.org>
License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
Documentation/public-inbox-convert.pod
Documentation/public-inbox-daemon.pod
Documentation/public-inbox-edit.pod
+Documentation/public-inbox-extindex-format.pod
Documentation/public-inbox-httpd.pod
Documentation/public-inbox-imapd.pod
Documentation/public-inbox-index.pod
ci/deps.perl
ci/profiles.sh
ci/run.sh
+contrib/completion/lei-completion.bash
contrib/css/216dark.css
contrib/css/216light.css
contrib/css/README
examples/unsubscribe.milter
examples/unsubscribe.psgi
examples/varnish-4.vcl
+lei.sh
lib/PublicInbox/Address.pm
lib/PublicInbox/AddressPP.pm
lib/PublicInbox/Admin.pm
lib/PublicInbox/AdminEdit.pm
lib/PublicInbox/AltId.pm
lib/PublicInbox/Cgit.pm
+lib/PublicInbox/CmdIPC4.pm
lib/PublicInbox/CompressNoop.pm
lib/PublicInbox/Config.pm
lib/PublicInbox/ConfigIter.pm
lib/PublicInbox/Eml.pm
lib/PublicInbox/EmlContentFoo.pm
lib/PublicInbox/ExtMsg.pm
+lib/PublicInbox/ExtSearch.pm
+lib/PublicInbox/ExtSearchIdx.pm
lib/PublicInbox/FakeInotify.pm
lib/PublicInbox/Feed.pm
lib/PublicInbox/Filter/Base.pm
lib/PublicInbox/Filter/RubyLang.pm
lib/PublicInbox/Filter/SubjectTag.pm
lib/PublicInbox/Filter/Vger.pm
+lib/PublicInbox/Gcf2.pm
+lib/PublicInbox/Gcf2Client.pm
lib/PublicInbox/GetlineBody.pm
lib/PublicInbox/Git.pm
lib/PublicInbox/GitAsyncCat.pm
lib/PublicInbox/IMAPTracker.pm
lib/PublicInbox/IMAPdeflate.pm
lib/PublicInbox/IMAPsearchqp.pm
+lib/PublicInbox/IPC.pm
lib/PublicInbox/IdxStack.pm
lib/PublicInbox/Import.pm
lib/PublicInbox/In2Tie.pm
lib/PublicInbox/Inbox.pm
lib/PublicInbox/InboxIdle.pm
lib/PublicInbox/InboxWritable.pm
+lib/PublicInbox/Isearch.pm
lib/PublicInbox/KQNotify.pm
+lib/PublicInbox/LEI.pm
+lib/PublicInbox/LeiDedupe.pm
+lib/PublicInbox/LeiExternal.pm
+lib/PublicInbox/LeiOverview.pm
+lib/PublicInbox/LeiQuery.pm
+lib/PublicInbox/LeiSearch.pm
+lib/PublicInbox/LeiStore.pm
+lib/PublicInbox/LeiToMail.pm
+lib/PublicInbox/LeiXSearch.pm
lib/PublicInbox/Linkify.pm
lib/PublicInbox/Listener.pm
lib/PublicInbox/Lock.pm
lib/PublicInbox/ManifestJsGz.pm
lib/PublicInbox/Mbox.pm
lib/PublicInbox/MboxGz.pm
+lib/PublicInbox/MboxReader.pm
+lib/PublicInbox/MiscIdx.pm
+lib/PublicInbox/MiscSearch.pm
lib/PublicInbox/MsgIter.pm
lib/PublicInbox/MsgTime.pm
lib/PublicInbox/Msgmap.pm
lib/PublicInbox/NNTPD.pm
lib/PublicInbox/NNTPdeflate.pm
lib/PublicInbox/NewsWWW.pm
+lib/PublicInbox/OnDestroy.pm
+lib/PublicInbox/OpPipe.pm
lib/PublicInbox/Over.pm
lib/PublicInbox/OverIdx.pm
lib/PublicInbox/ProcessPipe.pm
lib/PublicInbox/SearchQuery.pm
lib/PublicInbox/SearchThread.pm
lib/PublicInbox/SearchView.pm
+lib/PublicInbox/SharedKV.pm
lib/PublicInbox/Sigfd.pm
lib/PublicInbox/Smsg.pm
lib/PublicInbox/SolverGit.pm
lib/PublicInbox/WwwStream.pm
lib/PublicInbox/WwwText.pm
lib/PublicInbox/Xapcmd.pm
+lib/PublicInbox/gcf2_libgit2.h
sa_config/Makefile
sa_config/README
sa_config/root/etc/spamassassin/public-inbox.pre
sa_config/user/.spamassassin/user_prefs
+script/lei
script/public-inbox-compact
script/public-inbox-convert
script/public-inbox-edit
+script/public-inbox-extindex
script/public-inbox-httpd
script/public-inbox-imapd
script/public-inbox-index
t/altid_v2.t
t/cgi.t
t/check-www-inbox.perl
+t/cmd_ipc.t
t/config.t
t/config_limiter.t
t/content_hash.t
t/eml_content_disposition.t
t/eml_content_type.t
t/epoll.t
+t/extsearch.t
t/fail-bin/spamc
t/fake_inotify.t
t/feed.t
t/filter_rubylang.t
t/filter_subjecttag.t
t/filter_vger.t
+t/gcf2.t
+t/gcf2_client.t
t/git-http-backend.psgi
t/git.fast-import-data
t/git.t
t/indexlevels-mirror-v1.t
t/indexlevels-mirror.t
t/init.t
+t/ipc.t
t/iso-2202-jp.eml
t/kqnotify.t
+t/lei-oneshot.t
+t/lei.t
+t/lei_dedupe.t
+t/lei_external.t
+t/lei_overview.t
+t/lei_store.t
+t/lei_to_mail.t
+t/lei_xsearch.t
t/linkify.t
t/main-bin/spamc
+t/mbox_reader.t
t/mda-mime.eml
t/mda.t
t/mda_filter_rubylang.t
t/mid.t
t/mime.t
+t/miscsearch.t
t/msg_iter-nested.eml
t/msg_iter-order.eml
t/msg_iter.t
t/nntpd.t
t/nodatacow.t
t/nulsubject.t
+t/on_destroy.t
t/over.t
t/plack-2-txt-bodies.eml
t/plack-attached-patch.eml
t/search-amsg.eml
t/search-thr-index.t
t/search.t
+t/shared_kv.t
t/sigfd.t
t/solve/0001-simple-mod.patch
t/solve/0002-rename-with-modifications.patch
t/xcpdb-reshard.t
xt/cmp-msgstr.t
xt/cmp-msgview.t
+xt/create-many-inboxes.t
xt/eml_check_limits.t
xt/git-http-backend.t
xt/git_async_cmp.t
xt/httpd-async-stream.t
xt/imapd-mbsync-oimap.t
xt/imapd-validate.t
+xt/lei-sigpipe.t
xt/mem-imapd-tls.t
xt/mem-msgview.t
xt/msgtime_cmp.t
#!/usr/bin/perl -w
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use ExtUtils::MakeMaker;
@syn = grep(!/SaPlugin/, @syn) if !eval { require Mail::SpamAssasin };
$v->{syn_files} = \@syn;
$v->{my_syntax} = [map { "$_.syntax" } @syn];
-$v->{-m1} = [ map { (split('/'))[-1] } @EXE_FILES ];
+my @no_pod;
+$v->{-m1} = [ map {
+ my $x = (split('/'))[-1];
+ my $pod = "Documentation/$x.pod";
+ if (-f $pod) {
+ $x;
+ } else {
+ warn "W: $pod missing\n";
+ push @no_pod, $x;
+ ();
+ }
+ } @EXE_FILES ];
$v->{-m5} = [ qw(public-inbox-config public-inbox-v1-format
- public-inbox-v2-format) ];
+ public-inbox-v2-format public-inbox-extindex-format) ];
$v->{-m7} = [ qw(public-inbox-overview public-inbox-tuning) ];
$v->{-m8} = [ qw(public-inbox-daemon) ];
my @sections = (1, 5, 7, 8);
$mod =~ s/\.\w+\z//;
"lib/PublicInbox/$_" => "blib/man3/PublicInbox::$mod.\$(MAN3EXT)"
} qw(Git.pm Import.pm WWW.pod SaPlugin/ListMirror.pod);
+my $warn_no_pod = @no_pod ? "\n\t\@echo W: missing .pod: @no_pod\n" : '';
WriteMakefile(
NAME => 'PublicInbox', # n.b. camel-case is not our choice
-include Documentation/include.mk
$TGTS
+check-man ::$warn_no_pod
+
# syntax checks are currently GNU make only:
%.syntax :: %
@\$(PERL) -w -I lib -c \$<
touch -r MANIFEST \$@
\$(PERLRUN) \$@
+# Install symlinks to ~/bin (which is hopefuly in PATH) which point to
+# this source tree.
+# prefix + bindir matches git.git Makefile:
+prefix = \$(HOME)
+bindir = \$(prefix)/bin
+symlink-install :
+ mkdir -p \$(bindir)
+ lei=\$\$(realpath lei.sh) && cd \$(bindir) && \\
+ for x in \$(EXE_FILES); do \\
+ ln -sf "\$\$lei" \$\$(basename "\$\$x"); \\
+ done
+
+update-copyrights :
+ \@case '\$(GNULIB_PATH)' in '') echo >&2 GNULIB_PATH unset; false;; esac
+ git ls-files | UPDATE_COPYRIGHT_HOLDER='all contributors' \\
+ UPDATE_COPYRIGHT_USE_INTERVALS=2 \\
+ xargs \$(GNULIB_PATH)/build-aux/update-copyright
EOF
}
public-inbox implements the sharing of an email inbox via git to
complement or replace traditional mailing lists. Readers may
-read via NNTP, Atom feeds or HTML archives.
+read via NNTP, IMAP, Atom feeds or HTML archives.
public-inbox spawned around three main ideas:
failures.
public-inbox uses the "pull" model. Casual readers may
-follow the list via NNTP, Atom feed or HTML archives.
+follow the list via NNTP, IMAP, Atom feed or HTML archives.
If a reader loses interest, they simply stop following.
* stores email in git, readers may have a complete archive of the inbox
-* Atom feed and NNTP allows casual readers to follow via feed reader
+* Atom feed, IMAP, NNTP allows casual readers to follow via local tools
* uses only well-documented and easy-to-implement data formats
Requirements for reading:
-* any software capable of NNTP or following Atom feeds
+* any software capable of IMAP, NNTP or following Atom feeds
Any basic web browser will do for the HTML archives.
We primarily develop on w3m to maximize accessibility.
git clone https://public-inbox.org/public-inbox.git
git clone https://repo.or.cz/public-inbox.git
+ torsocks git clone http://ou63pmih66umazou.onion/public-inbox.git
torsocks git clone http://hjrcffqmbrq6wope.onion/public-inbox
See below for contact info.
tangentially related projects we depend on (e.g. git developers
on git@vger.kernel.org).
-The archives are readable via NNTP or HTTP:
+The archives are readable via IMAP, NNTP or HTTP:
- nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
+ nntps://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
+ imaps://news.public-inbox.org/inbox.comp.mail.public-inbox.meta.0
https://public-inbox.org/meta/
+AUTH=ANONYMOUS is supported for IMAP, but any username + password works
+
And as Tor hidden services:
http://hjrcffqmbrq6wope.onion/meta/
nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
+ imap://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta.0
You may also clone all messages via git:
Copyright
---------
-Copyright 2013-2020 all contributors <meta@public-inbox.org>
+Copyright 2013-2021 all contributors <meta@public-inbox.org>
License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
This program is free software: you can redistribute it and/or modify
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Helper script for installing/uninstalling packages for CI use
# Intended for use on non-production chroots or VMs since it
#!/bin/sh
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Prints OS-specific package profiles to stdout (one per-newline) to use
#!/bin/sh
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
set -e
SUDO=${SUDO-'sudo'} PERL=${PERL-'perl'} MAKE=${MAKE-'make'}
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# preliminary bash completion support for lei (Local Email Interface)
+# Needs a lot of work, see `lei__complete' in lib/PublicInbox::LEI.pm
+_lei() {
+ COMPREPLY=($(compgen -W "$(lei _complete ${COMP_WORDS[@]})" \
+ -- "${COMP_WORDS[COMP_CWORD]}"))
+ return 0
+}
+complete -o default -o bashdefault -F _lei lei
# This is not needed for mlmmj since mlmmj uses SMTP:
# non_smtpd_milters = local:/var/spool/postfix/unsubscribe/unsubscribe.sock
-Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
--- Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+-- Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
-- License: GPLv2 or later <https://www.gnu.org/licenses/gpl-2.0.txt>
-- This commit filter maps a subject line to a search URL of a public-inbox
-- disclaimer: written by someone who does not know Lua.
--- Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+-- Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
-- License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
--
-- This filter accesses the PublicInbox::WwwHighlight PSGI endpoint
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
#
# PublicInbox::Cgit may be used independently of WWW.
use Plack::Builder;
use PublicInbox::Cgit;
use PublicInbox::Config;
-my $pi_config = PublicInbox::Config->new;
-my $cgit = PublicInbox::Cgit->new($pi_config);
+my $pi_cfg = PublicInbox::Config->new;
+my $cgit = PublicInbox::Cgit->new($pi_cfg);
builder {
eval { enable 'ReverseProxy' };
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Usage: plackup [OPTIONS] /path/to/this/file
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
#
# NewsWWW may be used independently of WWW. This can be useful
#!/usr/bin/perl -w
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
# Note: this is part of our test suite, update t/plack.t if this changes
# Usage: plackup [OPTIONS] /path/to/this/file
#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
# This should not require any other PublicInbox code, but may use
# PublicInbox::Config if ~/.public-inbox/config exists or
--- /dev/null
+#!/bin/sh -e
+# symlink this file to a directory in PATH to run lei (or anything in script/*)
+# without needing perms to install globally. Used by "make symlink-install"
+p=$(realpath "$0" || readlink "$0") # neither is POSIX, but common
+p=$(dirname "$p") c=$(basename "$0") # both are POSIX
+exec ${PERL-perl} -w -I"$p"/lib "$p"/script/"${c%.sh}" "$@"
+: this script is too short to copyright
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::Address;
use strict;
-use warnings;
+use v5.10.1;
+use parent 'Exporter';
+our @EXPORT_OK = qw(pairs);
sub xs_emails {
grep { defined } map { $_->address() } parse_email_addresses($_[0])
} parse_email_addresses($_[0]);
}
+sub xs_pairs { # for JMAP, RFC 8621 section 4.1.2.3
+ [ map { # LHS (name) may be undef
+ [ $_->phrase // $_->comment, $_->address ]
+ } parse_email_addresses($_[0]) ];
+}
+
eval {
require Email::Address::XS;
Email::Address::XS->import(qw(parse_email_addresses));
*emails = \&xs_emails;
*names = \&xs_names;
+ *pairs = \&xs_pairs;
};
if ($@) {
require PublicInbox::AddressPP;
*emails = \&PublicInbox::AddressPP::emails;
*names = \&PublicInbox::AddressPP::names;
+ *pairs = \&PublicInbox::AddressPP::pairs;
}
1;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::AddressPP;
use strict;
}
sub names {
+ # split by address and post-address comment
my @p = split(/<?([^@<>]+)\@[\w\.\-]+>?\s*(\(.*?\))?(?:,\s*|\z)/,
$_[0]);
my @ret;
@ret;
}
+sub pairs { # for JMAP, RFC 8621 section 4.1.2.3
+ my ($s) = @_;
+ [ map {
+ my $addr = $_;
+ if ($s =~ s/\A\s*(.*?)\s*<\Q$addr\E>\s*(.*?)\s*(?:,|\z)// ||
+ $s =~ s/\A\s*(.*?)\s*\Q$addr\E\s*(.*?)\s*(?:,|\z)//) {
+ my ($phrase, $comment) = ($1, $2);
+ $phrase =~ tr/\r\n\t / /s;
+ $phrase =~ s/\A['"\s]*//;
+ $phrase =~ s/['"\s]*\z//;
+ $phrase =~ s/\s*<*\s*\z//;
+ $phrase = undef if $phrase !~ /\S/;
+ $comment = ($comment =~ /\((.*?)\)/) ? $1 : undef;
+ [ $phrase // $comment, $addr ]
+ } else {
+ ();
+ }
+ } emails($s) ];
+}
+
1;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# common stuff for administrative command-line tools
package PublicInbox::Admin;
use strict;
use parent qw(Exporter);
-use Cwd qw(abs_path);
-use POSIX ();
-our @EXPORT_OK = qw(resolve_repo_dir setup_signals);
+our @EXPORT_OK = qw(setup_signals);
use PublicInbox::Config;
use PublicInbox::Inbox;
use PublicInbox::Spawn qw(popen_rd);
+*rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed;
sub setup_signals {
my ($cb, $arg) = @_; # optional
+ require POSIX;
# we call exit() here instead of _exit() so DESTROY methods
# get called (e.g. File::Temp::Dir and PublicInbox::Msgmap)
};
}
-sub resolve_repo_dir {
+sub resolve_inboxdir {
my ($cd, $ver) = @_;
- my $prefix = defined $cd ? $cd : './';
- if (-d $prefix && -f "$prefix/inbox.lock") { # v2
- $$ver = 2 if $ver;
- return abs_path($prefix);
+ my $try = $cd // '.';
+ my $root_dev_ino;
+ while (1) { # favor v2, first
+ if (-f "$try/inbox.lock") {
+ $$ver = 2 if $ver;
+ return rel2abs_collapsed($try);
+ } elsif (-d $try) {
+ my @try = stat _;
+ $root_dev_ino //= do {
+ my @root = stat('/') or die "stat /: $!\n";
+ "$root[0]\0$root[1]";
+ };
+ last if "$try[0]\0$try[1]" eq $root_dev_ino;
+ $try .= '/..'; # continue, cd up
+ } else {
+ die "`$try' is not a directory\n";
+ }
}
+ # try v1 bare git dirs
my $cmd = [ qw(git rev-parse --git-dir) ];
my $fh = popen_rd($cmd, undef, {-C => $cd});
my $dir = do { local $/; <$fh> };
- close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n";
+ close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n";
chomp $dir;
$$ver = 1 if $ver;
- return abs_path($cd) if ($dir eq '.' && defined $cd);
- abs_path($dir);
+ rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir);
}
# for unconfigured inboxes
name => $name,
address => [ "$name\@example.com" ],
inboxdir => $dir,
- # TODO: consumers may want to warn on this:
- #-unconfigured => 1,
+ # consumers (-convert) warn on this:
+ -unconfigured => 1,
});
}
}
my $min_ver = $opt->{-min_inbox_version} || 0;
+ # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames,
+ # pathnames are not unique due to symlinks and bind mounts
my (@old, @ibxs);
- my %dir2ibx;
- if ($cfg) {
+ if ($opt->{all}) {
$cfg->each_inbox(sub {
my ($ibx) = @_;
- my $path = abs_path($ibx->{inboxdir});
- if (defined($path)) {
- $dir2ibx{$path} = $ibx;
+ if (-e $ibx->{inboxdir}) {
+ push(@ibxs, $ibx) if $ibx->version >= $min_ver;
} else {
- warn <<EOF;
-W: $ibx->{name} $ibx->{inboxdir}: $!
-EOF
+ warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
}
});
- }
- if ($opt->{all}) {
- my @all = values %dir2ibx;
- @all = grep { $_->version >= $min_ver } @all;
- push @ibxs, @all;
} else { # directories specified on the command-line
- my $i = 0;
my @dirs = @$argv;
- push @dirs, '.' unless @dirs;
- foreach (@dirs) {
- my $v;
- my $dir = resolve_repo_dir($_, \$v);
- if ($v < $min_ver) {
+ push @dirs, '.' if !@dirs && $opt->{-use_cwd};
+ my %s2i; # "st_dev\0st_ino" => array index
+ for (my $i = 0; $i <= $#dirs; $i++) {
+ my $dir = $dirs[$i];
+ my @st = stat($dir) or die "stat($dir): $!\n";
+ $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver));
+ if ($ver >= $min_ver) {
+ $s2i{"$st[0]\0$st[1]"} //= $i;
+ } else {
push @old, $dir;
- next;
}
- my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i);
- $i++;
- push @ibxs, $ibx;
}
+ my $done = \'done';
+ eval {
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ return if $ibx->version < $min_ver;
+ my $dir = $ibx->{inboxdir};
+ if (my @s = stat $dir) {
+ my $i = delete($s2i{"$s[0]\0$s[1]"})
+ // return;
+ $ibxs[$i] = $ibx;
+ die $done if !keys(%s2i);
+ } else {
+ warn "W: $ibx->{name} $dir: $!\n";
+ }
+ });
+ };
+ die $@ if $@ && $@ ne $done;
+ for my $i (sort { $a <=> $b } values %s2i) {
+ $ibxs[$i] = unconfigured_ibx($dirs[$i], $i);
+ }
+ @ibxs = grep { defined } @ibxs; # duplicates are undef
}
if (@old) {
die "-V$min_ver inboxes not supported by $0\n\t",
sub index_inbox {
my ($ibx, $im, $opt) = @_;
+ require PublicInbox::InboxWritable;
my $jobs = delete $opt->{jobs} if $opt;
if (my $pr = $opt->{-progress}) {
$pr->("indexing $ibx->{inboxdir} ...\n");
}
local %SIG = %SIG;
setup_signals(\&index_terminate, $ibx);
+ my $warn_cb = $SIG{__WARN__} // \&CORE::warn;
+ my $idx = { current_info => $ibx->{inboxdir} };
+ my $warn_ignore = PublicInbox::InboxWritable->can('warn_ignore');
+ local $SIG{__WARN__} = sub {
+ return if $warn_ignore->(@_);
+ $warn_cb->($idx->{current_info}, ': ', @_);
+ };
if (ref($ibx) && $ibx->version == 2) {
eval { require PublicInbox::V2Writable };
die "v2 requirements not met: $@\n" if $@;
} else {
my $n = $v2w->{shards};
if ($jobs < ($n + 1) && !$opt->{reshard}) {
- warn
-"Unable to respect --jobs=$jobs on index, inbox was created with $n shards\n";
+ warn <<EOM;
+Unable to respect --jobs=$jobs on index, inbox was created with $n shards
+EOM
}
}
}
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
- local $SIG{__WARN__} = sub {
- $warn_cb->($v2w->{current_info}, ': ', @_);
- };
- $v2w->index_sync($opt);
+ $idx = $v2w;
} else {
require PublicInbox::SearchIdx;
- my $s = PublicInbox::SearchIdx->new($ibx, 1);
- $s->index_sync($opt);
+ $idx = PublicInbox::SearchIdx->new($ibx, 1);
}
+ $idx->index_sync($opt);
+ $idx->{nidx} // 0; # returns number processed
}
sub progress_prepare ($) {
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# common stuff between -edit, -purge (and maybe -learn in the future)
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Used for giving serial numbers to messages. This can be tied to
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# wrapper for cgit(1) and git-http-backend(1) for browsing and
use PublicInbox::WwwStatic qw(r);
sub locate_cgit ($) {
- my ($pi_config) = @_;
- my $cgit_bin = $pi_config->{'publicinbox.cgitbin'};
- my $cgit_data = $pi_config->{'publicinbox.cgitdata'};
+ my ($pi_cfg) = @_;
+ my $cgit_bin = $pi_cfg->{'publicinbox.cgitbin'};
+ my $cgit_data = $pi_cfg->{'publicinbox.cgitdata'};
# /var/www/htdocs/cgit is the default install path from cgit.git
# /usr/{lib,share}/cgit is where Debian puts cgit
}
sub new {
- my ($class, $pi_config) = @_;
- my ($cgit_bin, $cgit_data) = locate_cgit($pi_config);
+ my ($class, $pi_cfg) = @_;
+ my ($cgit_bin, $cgit_data) = locate_cgit($pi_cfg);
my $self = bless {
cmd => [ $cgit_bin ],
cgit_data => $cgit_data,
- pi_config => $pi_config,
+ pi_cfg => $pi_cfg,
}, $class;
- $pi_config->fill_all; # fill in -code_repos mapped to inboxes
+ $pi_cfg->fill_all; # fill in -code_repos mapped to inboxes
# some cgit repos may not be mapped to inboxes, so ensure those exist:
- my $code_repos = $pi_config->{-code_repos};
- foreach my $k (keys %$pi_config) {
+ my $code_repos = $pi_cfg->{-code_repos};
+ foreach my $k (keys %$pi_cfg) {
$k =~ /\Acoderepo\.(.+)\.dir\z/ or next;
- my $dir = $pi_config->{$k};
+ my $dir = $pi_cfg->{$k};
$code_repos->{$1} ||= PublicInbox::Git->new($dir);
}
while (my ($nick, $repo) = each %$code_repos) {
$self->{"\0$nick"} = $repo;
}
- my $cgit_static = $pi_config->{-cgit_static};
+ my $cgit_static = $pi_cfg->{-cgit_static};
my $static = join('|', map { quotemeta $_ } keys %$cgit_static);
$self->{static} = qr/\A($static)\z/;
$self;
my $rdr = input_prepare($env) or return r(500);
my $qsp = PublicInbox::Qspawn->new($self->{cmd}, $cgi_env, $rdr);
- my $limiter = $self->{pi_config}->limiter('-cgit');
+ my $limiter = $self->{pi_cfg}->limiter('-cgit');
$qsp->psgi_return($env, $limiter, $parse_cgi_headers);
}
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# callers should use PublicInbox::CmdIPC4->can('send_cmd4') (or recv_cmd4)
+# first choice for script/lei front-end and 2nd choice for lei backend
+# libsocket-msghdr-perl is in Debian but many other distros as of 2021.
+package PublicInbox::CmdIPC4;
+use strict;
+use v5.10.1;
+use Socket qw(SOL_SOCKET SCM_RIGHTS);
+BEGIN { eval {
+require Socket::MsgHdr; # XS
+no warnings 'once';
+
+# 3 FDs per-sendmsg(2) + buffer
+*send_cmd4 = sub ($$$$) { # (sock, fds, buf, flags) = @_;
+ my ($sock, $fds, undef, $flags) = @_;
+ my $mh = Socket::MsgHdr->new(buf => $_[2]);
+ $mh->cmsghdr(SOL_SOCKET, SCM_RIGHTS,
+ pack('i' x scalar(@$fds), @$fds));
+ Socket::MsgHdr::sendmsg($sock, $mh, $flags);
+};
+
+*recv_cmd4 = sub ($$$) {
+ my ($s, undef, $len) = @_; # $_[1] = destination buffer
+ my $mh = Socket::MsgHdr->new(buflen => $len, controllen => 256);
+ my $r = Socket::MsgHdr::recvmsg($s, $mh, 0) // return ($_[1] = undef);
+ $_[1] = $mh->buf;
+ return () if $r == 0;
+ my (undef, undef, $data) = $mh->cmsghdr;
+ defined($data) ? unpack('i' x (length($data) / 4), $data) : ();
+};
+
+} } # /eval /BEGIN
+
+1;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Provide the same methods as Compress::Raw::Zlib::Deflate but
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used throughout the project for reading configuration
$self->{-by_list_id} = {};
$self->{-by_name} = {};
$self->{-by_newsgroup} = {};
+ $self->{-by_eidx_key} = {};
$self->{-no_obfuscate} = {};
$self->{-limiters} = {};
$self->{-code_repos} = {}; # nick => PublicInbox::Git object
$self->{-by_name}->{$name} // _fill($self, "publicinbox.$name");
}
+sub lookup_ei {
+ my ($self, $name) = @_;
+ $self->{-ei_by_name}->{$name} //= _fill_ei($self, "extindex.$name");
+}
+
+# special case for [extindex "all"]
+sub ALL { lookup_ei($_[0], 'all') }
+
sub each_inbox {
my ($self, $cb, @arg) = @_;
# may auto-vivify if config file is non-existent:
sub config_fh_parse ($$$) {
my ($fh, $rs, $fs) = @_;
- my %rv;
- my (%section_seen, @section_order);
+ my (%rv, %seen, @section_order, $line, $k, $v, $section, $cur, $i);
local $/ = $rs;
- while (defined(my $line = <$fh>)) {
- chomp $line;
- my ($k, $v) = split($fs, $line, 2);
- my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/);
- unless (defined $section_seen{$section}) {
- $section_seen{$section} = 1;
- push @section_order, $section;
- }
-
- my $cur = $rv{$k};
- if (defined $cur) {
+ while (defined($line = <$fh>)) { # perf critical with giant configs
+ $i = index($line, $fs);
+ $k = substr($line, 0, $i);
+ $v = substr($line, $i + 1, -1); # chop off $fs
+ $section = substr($k, 0, rindex($k, '.'));
+ $seen{$section} //= push(@section_order, $section);
+
+ if (defined($cur = $rv{$k})) {
if (ref($cur) eq "ARRAY") {
push @$cur, $v;
} else {
sub git_config_dump {
my ($file) = @_;
return {} unless -e $file;
- my @cmd = (qw/git config -z -l --includes/, "--file=$file");
- my $cmd = join(' ', @cmd);
- my $fh = popen_rd(\@cmd);
+ my $cmd = [ qw(git config -z -l --includes), "--file=$file" ];
+ my $fh = popen_rd($cmd);
my $rv = config_fh_parse($fh, "\0", "\n");
- close $fh or die "failed to close ($cmd) pipe: $?";
+ close $fh or die "failed to close (@$cmd) pipe: $?";
$rv;
}
}
}
+# abs_path resolves symlinks, so we want to avoid it if rel2abs
+# is sufficient and doesn't leave "/.." or "/../"
+sub rel2abs_collapsed {
+ require File::Spec;
+ my $p = File::Spec->rel2abs($_[-1]);
+ return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0;
+ require Cwd;
+ Cwd::abs_path($p);
+}
+
sub _fill {
my ($self, $pfx) = @_;
my $ibx = {};
}
}
- # backwards compatibility:
- $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"};
- if (($ibx->{inboxdir} // '') =~ /\n/s) {
- warn "E: `$ibx->{inboxdir}' must not contain `\\n'\n";
+ # "mainrepo" is backwards compatibility:
+ my $dir = $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"} // return;
+ if (index($dir, "\n") >= 0) {
+ warn "E: `$dir' must not contain `\\n'\n";
return;
}
foreach my $k (qw(obfuscate)) {
}
}
- return unless defined($ibx->{inboxdir});
- my $name = $pfx;
- $name =~ s/\Apublicinbox\.//;
-
+ my $name = substr($pfx, length('publicinbox.'));
if (!valid_inbox_name($name)) {
warn "invalid inbox name: '$name'\n";
return;
}
$ibx->{name} = $name;
- $ibx->{-pi_config} = $self;
+ $ibx->{-pi_cfg} = $self;
$ibx = PublicInbox::Inbox->new($ibx);
foreach (@{$ibx->{address}}) {
my $lc_addr = lc($_);
$self->{-by_list_id}->{lc($list_id)} = $ibx;
}
}
- if (my $ng = $ibx->{newsgroup}) {
- $self->{-by_newsgroup}->{$ng} = $ibx;
+ if (defined(my $ngname = $ibx->{newsgroup})) {
+ if (ref($ngname)) {
+ delete $ibx->{newsgroup};
+ warn 'multiple newsgroups not supported: '.
+ join(', ', @$ngname). "\n";
+ # Newsgroup name needs to be compatible with RFC 3977
+ # wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR.
+ # Leave out a few chars likely to cause problems or conflicts:
+ # '|', '<', '>', ';', '#', '$', '&',
+ } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! ||
+ $ngname eq '') {
+ delete $ibx->{newsgroup};
+ warn "newsgroup name invalid: `$ngname'\n";
+ } else {
+ # PublicInbox::NNTPD does stricter ->nntp_usable
+ # checks, keep this lean for startup speed
+ $self->{-by_newsgroup}->{$ngname} = $ibx;
+ }
+ }
+ unless (defined $ibx->{newsgroup}) { # for ->eidx_key
+ my $abs = rel2abs_collapsed($dir);
+ if ($abs ne $dir) {
+ warn "W: `$dir' canonicalized to `$abs'\n";
+ $ibx->{inboxdir} = $abs;
+ }
}
$self->{-by_name}->{$name} = $ibx;
if ($ibx->{obfuscate}) {
push @$repo_objs, $repo if $repo;
}
}
+ if (my $es = ALL($self)) {
+ require PublicInbox::Isearch;
+ $ibx->{isrch} = PublicInbox::Isearch->new($ibx, $es);
+ }
+ $self->{-by_eidx_key}->{$ibx->eidx_key} = $ibx;
+}
- $ibx
+sub _fill_ei ($$) {
+ my ($self, $pfx) = @_;
+ require PublicInbox::ExtSearch;
+ my $d = $self->{"$pfx.topdir"};
+ defined($d) && -d $d ? PublicInbox::ExtSearch->new($d) : undef;
}
sub urlmatch {
}
}
+sub json {
+ state $json;
+ $json //= do {
+ for my $mod (qw(Cpanel::JSON::XS JSON::MaybeXS JSON JSON::PP)) {
+ eval "require $mod" or next;
+ # ->ascii encodes non-ASCII to "\uXXXX"
+ $json = $mod->new->ascii(1) and last;
+ }
+ $json;
+ };
+}
+
1;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Intended for PublicInbox::DS->EventLoop in read-only daemons
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Unstable internal API.
# (tmpio = [ GLOB, offset, [ length ] ])
package PublicInbox::DS;
use strict;
+use v5.10.1;
+use parent qw(Exporter);
use bytes;
-use POSIX qw(WNOHANG);
+use POSIX qw(WNOHANG sigprocmask SIG_SETMASK);
use IO::Handle qw();
use Fcntl qw(SEEK_SET :DEFAULT O_APPEND);
use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
-use parent qw(Exporter);
-our @EXPORT_OK = qw(now msg_more);
-use 5.010_001;
use Scalar::Util qw(blessed);
use PublicInbox::Syscall qw(:epoll);
use PublicInbox::Tmpfile;
use Errno qw(EAGAIN EINVAL);
use Carp qw(confess carp);
+our @EXPORT_OK = qw(now msg_more dwaitpid);
my $nextq; # queue for next_tick
my $wait_pids; # list of [ pid, callback, callback_arg ]
$PostLoopCallback, # subref to call at the end of each loop, if defined (global)
$LoopTimeout, # timeout of event loop in milliseconds
- $DoneInit, # if we've done the one-time module init yet
@Timers, # timers
$in_loop,
);
=cut
sub Reset {
+ $in_loop = undef; # first in case DESTROY callbacks use this
%DescriptorMap = ();
- $in_loop = $wait_pids = $later_queue = $reap_armed = undef;
+ $wait_pids = $later_queue = $reap_armed = undef;
$EXPMAP = {};
$nextq = $ToClose = $later_timer = $exp_timer = undef;
$LoopTimeout = -1; # no timeout by default
@Timers = ();
$PostLoopCallback = undef;
- $DoneInit = 0;
$_io = undef; # closes real $Epoll FD
$Epoll = undef; # may call DSKQXS::DESTROY
-
- *EventLoop = *FirstTimeEventLoop;
}
=head2 C<< CLASS->SetLoopTimeout( $timeout ) >>
immediately.
=cut
-sub SetLoopTimeout {
- return $LoopTimeout = $_[1] + 0;
-}
+sub SetLoopTimeout { $LoopTimeout = $_[1] + 0 }
=head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef, $arg) >>
fcntl($_io, F_SETFD, $fl | FD_CLOEXEC);
}
+# caller sets return value to $Epoll
sub _InitPoller
{
- return if $DoneInit;
- $DoneInit = 1;
-
if (PublicInbox::Syscall::epoll_defined()) {
- $Epoll = epoll_create();
- set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0);
+ my $fd = epoll_create();
+ set_cloexec($fd) if (defined($fd) && $fd >= 0);
+ $fd;
} else {
my $cls;
for (qw(DSKQXS DSPoll)) {
last if eval "require $cls";
}
$cls->import(qw(epoll_ctl epoll_wait));
- $Epoll = $cls->new;
+ $cls->new;
}
- *EventLoop = *EpollEventLoop;
}
=head2 C<< CLASS->EventLoop() >>
C<PostLoopCallback> below for how to exit the loop.
=cut
-sub FirstTimeEventLoop {
- my $class = shift;
-
- _InitPoller();
-
- EventLoop($class);
-}
sub now () { clock_gettime(CLOCK_MONOTONIC) }
my $timeout = int(($Timers[0][0] - $now) * 1000) + 1;
# -1 is an infinite timeout, so prefer a real timeout
- return $timeout if $LoopTimeout == -1;
+ ($LoopTimeout < 0 || $LoopTimeout >= $timeout) ? $timeout : $LoopTimeout;
+}
+
+sub sig_setmask { sigprocmask(SIG_SETMASK, @_) or die "sigprocmask: $!" }
- # otherwise pick the lower of our regular timeout and time until
- # the next timer
- return $LoopTimeout if $LoopTimeout < $timeout;
- return $timeout;
+sub block_signals () {
+ my $oldset = POSIX::SigSet->new;
+ my $newset = POSIX::SigSet->new;
+ $newset->fillset or die "fillset: $!";
+ sig_setmask($newset, $oldset);
+ $oldset;
}
# We can't use waitpid(-1) safely here since it can hit ``, system(),
$reap_armed = undef;
my $tmp = $wait_pids or return;
$wait_pids = undef;
+ my $oldset = block_signals();
foreach my $ary (@$tmp) {
my ($pid, $cb, $arg) = @$ary;
my $ret = waitpid($pid, WNOHANG);
if ($ret == 0) {
push @$wait_pids, $ary; # autovivifies @$wait_pids
- } elsif ($cb) {
- eval { $cb->($arg, $pid) };
+ } elsif ($ret == $pid) {
+ if ($cb) {
+ eval { $cb->($arg, $pid) };
+ warn "E: dwaitpid($pid) in_loop: $@" if $@;
+ }
+ } else {
+ warn "waitpid($pid, WNOHANG) = $ret, \$!=$!, \$?=$?";
}
}
- # we may not be done, yet, and could've missed/masked a SIGCHLD:
- $reap_armed //= requeue(\&reap_pids) if $wait_pids;
+ sig_setmask($oldset);
}
# reentrant SIGCHLD handler (since reap_pids is not reentrant)
$PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1;
}
-sub EpollEventLoop {
+sub EventLoop {
+ $Epoll //= _InitPoller();
local $in_loop = 1;
+ my @events;
do {
- my @events;
- my $i;
my $timeout = RunTimers();
# get up to 1000 events
- my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events);
- for ($i=0; $i<$evcount; $i++) {
+ epoll_wait($Epoll, 1000, $timeout, \@events);
+ for my $fd (@events) {
# it's possible epoll_wait returned many events, including some at the end
# that ones in the front triggered unregister-interest actions. if we
# can't find the %sock entry, it's because we're no longer interested
# in that event.
- $DescriptorMap{$events[$i]->[0]}->event_step;
+ $DescriptorMap{$fd}->event_step;
}
} while (PostEventLoop());
_run_later();
$self->{sock} = $sock;
my $fd = fileno($sock);
- _InitPoller();
-
+ $Epoll //= _InitPoller();
retry:
if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
}
}
-# must be called with eval, PublicInbox::DS may not be loaded (see t/qspawn.t)
-sub dwaitpid ($$$) {
- die "Not in EventLoop\n" unless $in_loop;
- push @$wait_pids, [ @_ ]; # [ $pid, $cb, $arg ]
-
- # We could've just missed our SIGCHLD, cover it, here:
- enqueue_reap();
+sub dwaitpid ($;$$) {
+ my ($pid, $cb, $arg) = @_;
+ if ($in_loop) {
+ push @$wait_pids, [ $pid, $cb, $arg ];
+ # We could've just missed our SIGCHLD, cover it, here:
+ enqueue_reap();
+ } else {
+ my $ret = waitpid($pid, 0);
+ if ($ret == $pid) {
+ if ($cb) {
+ eval { $cb->($arg, $pid) };
+ carp "E: dwaitpid($pid) !in_loop: $@" if $@;
+ }
+ } else {
+ carp "waitpid($pid, 0) = $ret, \$!=$!, \$?=$?";
+ }
+ }
}
sub _run_later () {
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# Licensed the same as Danga::Socket (and Perl5)
# License: GPL-1.0+ or Artistic-1.0-Perl
# <https://www.gnu.org/licenses/gpl-1.0.txt>
use IO::KQueue;
use Errno qw(EAGAIN);
use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLLET
- EPOLL_CTL_ADD EPOLL_CTL_MOD EPOLL_CTL_DEL $SFD_NONBLOCK);
+ EPOLL_CTL_ADD EPOLL_CTL_MOD EPOLL_CTL_DEL SFD_NONBLOCK);
our @EXPORT_OK = qw(epoll_ctl epoll_wait);
sub EV_DISPATCH () { 0x0080 }
sub TIEHANDLE { # similar to signalfd()
my ($class, $signo, $flags) = @_;
my $self = $class->new;
- $self->{timeout} = ($flags & $SFD_NONBLOCK) ? 0 : -1;
+ $self->{timeout} = ($flags & SFD_NONBLOCK) ? 0 : -1;
my $kq = $self->{kq};
$kq->EV_SET($_, EVFILT_SIGNAL, EV_ADD) for @$signo;
$self;
}
}
# caller only cares for $events[$i]->[0]
- scalar(@$events);
+ $_ = $_->[0] for @$events;
}
# kqueue is close-on-fork (not exec), so we must not close it
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# Licensed the same as Danga::Socket (and Perl5)
# License: GPL-1.0+ or Artistic-1.0-Perl
# <https://www.gnu.org/licenses/gpl-1.0.txt>
my $fd = $pset[$i++];
my $revents = $pset[$i++] or next;
delete($self->{$fd}) if $self->{$fd} & EPOLLONESHOT;
- push @$events, [ $fd ];
+ push @$events, $fd;
}
my $nevents = scalar @$events;
if ($n != $nevents) {
warn "BUG? poll() returned $n, but got $nevents";
}
}
- $n;
}
1;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# contains common daemon code for the httpd, imapd, and nntpd servers.
-# This may be used for read-only IMAP server if we decide to implement it.
+#
+# Contains common daemon code for the httpd, imapd, and nntpd servers
+# and designed for handling thousands of untrusted clients over slow
+# and/or lossy connections.
package PublicInbox::Daemon;
use strict;
use warnings;
use POSIX qw(WNOHANG :signal_h);
use Socket qw(IPPROTO_TCP SOL_SOCKET);
sub SO_ACCEPTFILTER () { 0x1000 }
-use Cwd qw/abs_path/;
STDOUT->autoflush(1);
STDERR->autoflush(1);
use PublicInbox::DS qw(now);
-use PublicInbox::Syscall qw($SFD_NONBLOCK);
+use PublicInbox::Syscall qw(SFD_NONBLOCK);
require PublicInbox::Listener;
use PublicInbox::EOFpipe;
use PublicInbox::Sigfd;
+use PublicInbox::GitAsyncCat;
my @CMD;
my ($set_user, $oldset);
my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize);
sub daemon_prepare ($) {
my ($default_listen) = @_;
my $listener_names = {}; # sockname => IO::Handle
- $oldset = PublicInbox::Sigfd::block_signals();
+ $oldset = PublicInbox::DS::block_signals();
@CMD = ($0, @ARGV);
my ($prog) = ($CMD[0] =~ m!([^/]+)\z!g);
my $help = <<EOF;
sub daemonize () {
if ($daemonize) {
+ require Cwd;
foreach my $i (0..$#ARGV) {
my $arg = $ARGV[$i];
next unless -e $arg;
- $ARGV[$i] = abs_path($arg);
+ $ARGV[$i] = Cwd::abs_path($arg);
}
check_absolute('stdout', $stdout);
check_absolute('stderr', $stderr);
};
if ($daemonize) {
- my $pid = fork;
- die "could not fork: $!\n" unless defined $pid;
+ my $pid = fork // die "fork: $!";
exit if $pid;
open(STDIN, '+<', '/dev/null') or
open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n";
open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n";
POSIX::setsid();
- $pid = fork;
- die "could not fork: $!\n" unless defined $pid;
+ $pid = fork // die "fork: $!";
exit if $pid;
}
return unless defined $pid_file;
foreach my $fd (3..$end) {
my $s = IO::Handle->new_from_fd($fd, 'r');
if (my $k = sockname($s)) {
- if ($s->blocking) {
- $s->blocking(0);
- warn <<"";
+ my $prev_was_blocking = $s->blocking(0);
+ warn <<"" if $prev_was_blocking;
Inherited socket (fd=$fd) is blocking, making it non-blocking.
Set 'NonBlocking = true' in the systemd.service unit to avoid stalled
processes when multiple service instances start.
- }
$listener_names->{$k} = $s;
push @rv, $s;
} else {
}
sub kill_workers ($) {
- my ($s) = @_;
-
- while (my ($pid, $id) = each %pids) {
- kill $s, $pid;
- }
+ my ($sig) = @_;
+ kill $sig, keys(%pids);
}
sub upgrade_aborted ($) {
CHLD => \&reap_children,
};
my $sigfd = PublicInbox::Sigfd->new($sig, 0);
- local %SIG = (%SIG, %$sig) if !$sigfd;
- PublicInbox::Sigfd::sig_setmask($oldset) if !$sigfd;
+ local @SIG{keys %$sig} = values(%$sig) unless $sigfd;
+ PublicInbox::DS::sig_setmask($oldset) if !$sigfd;
while (1) { # main loop
my $n = scalar keys %pids;
unless (@listeners) {
}
my $want = $worker_processes - 1;
if ($n <= $want) {
- PublicInbox::Sigfd::block_signals() if !$sigfd;
+ PublicInbox::DS::block_signals() if !$sigfd;
for my $i ($n..$want) {
+ my $seed = rand(0xffffffff);
my $pid = fork;
if (!defined $pid) {
warn "failed to fork worker[$i]: $!\n";
} elsif ($pid == 0) {
+ srand($seed);
+ eval { Net::SSLeay::randomize() };
$set_user->() if $set_user;
return $p0; # run normal work code
} else {
$pids{$pid} = $i;
}
}
- PublicInbox::Sigfd::sig_setmask($oldset) if !$sigfd;
+ PublicInbox::DS::sig_setmask($oldset) if !$sigfd;
}
if ($sigfd) { # Linux and IO::KQueue users:
# this calls epoll_create:
PublicInbox::Listener->new($_, $tls_cb || $post_accept)
} @listeners;
- my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK);
- local %SIG = (%SIG, %$sig) if !$sigfd;
+ my $sigfd = PublicInbox::Sigfd->new($sig, SFD_NONBLOCK);
+ local @SIG{keys %$sig} = values(%$sig) unless $sigfd;
if (!$sigfd) {
# wake up every second to accept signals if we don't
# have signalfd or IO::KQueue:
- PublicInbox::Sigfd::sig_setmask($oldset);
+ PublicInbox::DS::sig_setmask($oldset);
PublicInbox::DS->SetLoopTimeout(1000);
}
PublicInbox::DS->EventLoop;
daemon_prepare($default);
my $af_default = $default =~ /:8080\z/ ? 'httpready' : undef;
my $for_destroy = daemonize();
+
+ # localize GCF2C for tests:
+ local $PublicInbox::GitAsyncCat::GCF2C;
+
daemon_loop($refresh, $post_accept, $tlsd, $af_default);
PublicInbox::DS->Reset;
# ->DESTROY runs when $for_destroy goes out-of-scope
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Used by public-inbox-watch for Maildir (and possibly MH in the future)
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# An EXAMINE-able, PublicInbox::Inbox-like object for IMAP. Some
package PublicInbox::DummyInbox;
use strict;
-sub created_at { 0 } # Msgmap::created_at
+sub uidvalidity { 0 } # Msgmap::created_at
sub mm { shift }
sub uid_range { [] } # Over::uid_range
sub subscribe_unlock { undef };
no warnings 'once';
-*max = \&created_at;
+*max = \&uidvalidity;
*query_xover = \&uid_range;
*over = \&mm;
-*search = *unsubscribe_unlock =
+*isrch = *search = *unsubscribe_unlock =
*get_art = *description = *base_url = \&subscribe_unlock;
1;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::EOFpipe;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Emergency Maildir delivery for MDA
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Lazy MIME parser, it still slurps the full message but keeps short
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used by the web interface to link to messages outside of the our
sub search_partial ($$) {
my ($ibx, $mid) = @_;
return if length($mid) < $MIN_PARTIAL_LEN;
- my $srch = $ibx->search or return;
- my $opt = { limit => PARTIAL_MAX, mset => 2 };
+ my $srch = $ibx->search or return; # NOT ->isrch, we already try ->ALL
+ my $opt = { limit => PARTIAL_MAX, relevance => -1 };
my @try = ("m:$mid*");
my $chop = $mid;
if ($chop =~ s/(\W+)(\w*)\z//) {
sub ext_msg_i {
my ($other, $ctx) = @_;
- return if $other->{name} eq $ctx->{-inbox}->{name} || !$other->base_url;
+ return if $other->{name} eq $ctx->{ibx}->{name} || !$other->base_url;
my $mm = $other->mm or return;
}
}
+sub ext_msg_ALL ($) {
+ my ($ctx) = @_;
+ my $ALL = $ctx->{www}->{pi_cfg}->ALL or return;
+ my $by_eidx_key = $ctx->{www}->{pi_cfg}->{-by_eidx_key};
+ my $cur_key = eval { $ctx->{ibx}->eidx_key } //
+ return partial_response($ctx); # $cur->{ibx} == $ALL
+ my %seen = ($cur_key => 1);
+ my ($id, $prev);
+ while (my $x = $ALL->over->next_by_mid($ctx->{mid}, \$id, \$prev)) {
+ my $xr3 = $ALL->over->get_xref3($x->{num});
+ for my $k (@$xr3) {
+ $k =~ s/:[0-9]+:$x->{blob}\z// or next;
+ next if $k eq $cur_key;
+ my $ibx = $by_eidx_key->{$k} // next;
+ my $url = $ibx->base_url or next;
+ push(@{$ctx->{found}}, $ibx) unless $seen{$k}++;
+ }
+ }
+ return exact($ctx) if $ctx->{found};
+
+ # fall back to partial MID matching
+ for my $ibxish ($ctx->{ibx}, $ALL) {
+ my $mids = search_partial($ibxish, $ctx->{mid}) or next;
+ push @{$ctx->{partial}}, [ $ibxish, $mids ];
+ last if ($ctx->{n_partial} += scalar(@$mids)) >= PARTIAL_MAX;
+ }
+ partial_response($ctx);
+}
+
sub ext_msg {
my ($ctx) = @_;
- sub {
+ ext_msg_ALL($ctx) // sub {
$ctx->{-wcb} = $_[0]; # HTTP server write callback
if ($ctx->{env}->{'pi-httpd.async'}) {
require PublicInbox::ConfigIter;
my $iter = PublicInbox::ConfigIter->new(
- $ctx->{www}->{pi_config},
+ $ctx->{www}->{pi_cfg},
\&ext_msg_step, $ctx);
$iter->event_step;
} else {
- $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $ctx);
+ $ctx->{www}->{pi_cfg}->each_inbox(\&ext_msg_i, $ctx);
finalize_exact($ctx);
}
};
# fall back to partial MID matching
my $mid = $ctx->{mid};
- my $cur = $ctx->{-inbox};
+ my $cur = $ctx->{ibx};
my $mids = search_partial($cur, $mid);
if ($mids) {
$ctx->{n_partial} = scalar(@$mids);
finalize_partial($ctx);
}
-sub finalize_partial {
+sub partial_response ($) {
my ($ctx) = @_;
my $mid = $ctx->{mid};
my $code = 404;
my $es = $n_partial == 1 ? '' : 'es';
$n_partial .= '+' if ($n_partial == PARTIAL_MAX);
$s .= "\n$n_partial partial match$es found:\n\n";
- my $cur_name = $ctx->{-inbox}->{name};
+ my $cur_name = $ctx->{ibx}->{name};
foreach my $pair (@{$ctx->{partial}}) {
my ($ibx, $res) = @$pair;
my $env = $ctx->{env} if $ibx->{name} eq $cur_name;
$ctx->{-html_tip} = $s .= '</pre>';
$ctx->{-title_html} = $title;
$ctx->{-upfx} = '../';
- $ctx->{-wcb}->(html_oneshot($ctx, $code));
+ html_oneshot($ctx, $code);
}
+sub finalize_partial ($) { $_[0]->{-wcb}->(partial_response($_[0])) }
+
sub ext_urls {
my ($ctx, $mid, $href, $html) = @_;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Read-only external (detached) index for cross inbox search.
+# This is a read-only counterpart to PublicInbox::ExtSearchIdx
+# and behaves like PublicInbox::Inbox AND PublicInbox::Search
+package PublicInbox::ExtSearch;
+use strict;
+use v5.10.1;
+use PublicInbox::Over;
+use PublicInbox::Inbox;
+use PublicInbox::MiscSearch;
+use DBI qw(:sql_types); # SQL_BLOB
+
+# for ->reopen, ->mset, ->mset_to_artnums
+use parent qw(PublicInbox::Search);
+
+sub new {
+ my ($class, $topdir) = @_;
+ bless {
+ topdir => $topdir,
+ # xpfx => 'ei15'
+ xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION
+ }, $class;
+}
+
+sub misc {
+ my ($self) = @_;
+ $self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc");
+}
+
+# same as per-inbox ->over, for now...
+sub over {
+ my ($self) = @_;
+ $self->{over} //= PublicInbox::Over->new("$self->{xpfx}/over.sqlite3");
+}
+
+sub git {
+ my ($self) = @_;
+ $self->{git} //= PublicInbox::Git->new("$self->{topdir}/ALL.git");
+}
+
+# returns a hashref of { $NEWSGROUP_NAME => $ART_NO } using the `xref3' table
+sub nntp_xref_for { # NNTP only
+ my ($self, $xibx, $xsmsg) = @_;
+ my $dbh = over($self)->dbh;
+
+ my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
+
+ $sth->execute($xibx->{newsgroup});
+ my $xibx_id = $sth->fetchrow_array // do {
+ warn "W: `$xibx->{newsgroup}' not found in $self->{topdir}\n";
+ return;
+ };
+
+ $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT docid FROM xref3 WHERE oidbin = ? AND xnum = ? AND ibx_id = ? LIMIT 1
+
+ $sth->bind_param(1, pack('H*', $xsmsg->{blob}), SQL_BLOB);
+
+ # NNTP::cmd_over can set {num} to zero according to RFC 3977 8.3.2
+ $sth->bind_param(2, $xsmsg->{num} || $xsmsg->{-orig_num});
+ $sth->bind_param(3, $xibx_id);
+ $sth->execute;
+ my $docid = $sth->fetchrow_array // do {
+ warn <<EOF;
+W: `$xibx->{newsgroup}:$xsmsg->{num}' not found in $self->{topdir}"
+EOF
+ return;
+ };
+
+ # LIMIT is number of newsgroups on server:
+ $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id,xnum FROM xref3 WHERE docid = ? AND ibx_id != ?
+
+ $sth->execute($docid, $xibx_id);
+ my $rows = $sth->fetchall_arrayref;
+
+ my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1
+
+ my %xref = map {
+ my ($ibx_id, $xnum) = @$_;
+
+ $eidx_key_sth->execute($ibx_id);
+ my $eidx_key = $eidx_key_sth->fetchrow_array;
+
+ # only include if there's a newsgroup name
+ $eidx_key && index($eidx_key, '/') >= 0 ?
+ () : ($eidx_key => $xnum)
+ } @$rows;
+ $xref{$xibx->{newsgroup}} = $xsmsg->{num};
+ \%xref;
+}
+
+sub mm { undef }
+
+sub altid_map { {} }
+
+sub description {
+ my ($self) = @_;
+ ($self->{description} //=
+ PublicInbox::Inbox::cat_desc("$self->{topdir}/description")) //
+ '$EXTINDEX_DIR/description missing';
+}
+
+sub cloneurl { [] } # TODO
+
+sub base_url { 'https://example.com/TODO/' }
+sub nntp_url { [] }
+
+no warnings 'once';
+*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
+*smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid;
+*msg_by_mid = \&PublicInbox::Inbox::msg_by_mid;
+*modified = \&PublicInbox::Inbox::modified;
+*recent = \&PublicInbox::Inbox::recent;
+
+*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
+*isrch = *search = \&PublicInbox::Search::reopen;
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Detached/external index cross inbox search indexing support
+# read-write counterpart to PublicInbox::ExtSearch
+#
+# It's based on the same ideas as public-inbox-v2-format(5) using
+# over.sqlite3 for dedupe and sharded Xapian. msgmap.sqlite3 is
+# missing, so there is no Message-ID conflict resolution, meaning
+# no NNTP support for now.
+#
+# v2 has a 1:1 mapping of index:inbox or msgmap for NNTP support.
+# This is intended to be an M:N index:inbox mapping, but it'll likely
+# be 1:N in common practice (M==1)
+
+package PublicInbox::ExtSearchIdx;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::ExtSearch PublicInbox::Lock);
+use Carp qw(croak carp);
+use Sys::Hostname qw(hostname);
+use POSIX qw(strftime);
+use PublicInbox::Search;
+use PublicInbox::SearchIdx qw(prepare_stack is_ancestor is_bad_blob);
+use PublicInbox::OverIdx;
+use PublicInbox::MiscIdx;
+use PublicInbox::MID qw(mids);
+use PublicInbox::V2Writable;
+use PublicInbox::InboxWritable;
+use PublicInbox::ContentHash qw(content_hash);
+use PublicInbox::Eml;
+use PublicInbox::DS qw(now);
+use DBI qw(:sql_types); # SQL_BLOB
+
+sub new {
+ my (undef, $dir, $opt) = @_;
+ my $l = $opt->{indexlevel} // 'full';
+ $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and
+ die "invalid indexlevel=$l\n";
+ $l eq 'basic' and die "E: indexlevel=basic not yet supported\n";
+ my $self = bless {
+ xpfx => "$dir/ei".PublicInbox::Search::SCHEMA_VERSION,
+ topdir => $dir,
+ creat => $opt->{creat},
+ ibx_map => {}, # (newsgroup//inboxdir) => $ibx
+ ibx_list => [],
+ indexlevel => $l,
+ transact_bytes => 0,
+ total_bytes => 0,
+ current_info => '',
+ parallel => 1,
+ lock_path => "$dir/ei.lock",
+ }, __PACKAGE__;
+ $self->{shards} = $self->count_shards || nproc_shards($opt->{creat});
+ my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3");
+ $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync};
+ $self->{oidx} = $oidx;
+ $self
+}
+
+sub attach_inbox {
+ my ($self, $ibx) = @_;
+ $self->{ibx_map}->{$ibx->eidx_key} //= do {
+ push @{$self->{ibx_list}}, $ibx;
+ $ibx;
+ }
+}
+
+sub _ibx_attach { # each_inbox callback
+ my ($ibx, $self) = @_;
+ attach_inbox($self, $ibx);
+}
+
+sub attach_config {
+ my ($self, $cfg) = @_;
+ $self->{cfg} = $cfg;
+ $cfg->each_inbox(\&_ibx_attach, $self);
+}
+
+sub check_batch_limit ($) {
+ my ($req) = @_;
+ my $self = $req->{self};
+ my $new_smsg = $req->{new_smsg};
+ my $n = $self->{transact_bytes} += $new_smsg->{bytes};
+
+ # set flag for PublicInbox::V2Writable::index_todo:
+ ${$req->{need_checkpoint}} = 1 if $n >= $self->{batch_bytes};
+}
+
+sub do_xpost ($$) {
+ my ($req, $smsg) = @_;
+ my $self = $req->{self};
+ my $docid = $smsg->{num};
+ my $idx = $self->idx_shard($docid);
+ my $oid = $req->{oid};
+ my $xibx = $req->{ibx};
+ my $eml = $req->{eml};
+ my $eidx_key = $xibx->eidx_key;
+ if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message
+ my $xnum = $req->{xnum};
+ $self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key);
+ $idx->ipc_do('add_eidx_info', $docid, $eidx_key, $eml);
+ check_batch_limit($req);
+ } else { # 'd'
+ my $rm_eidx_info;
+ my $nr = $self->{oidx}->remove_xref3($docid, $oid, $eidx_key,
+ \$rm_eidx_info);
+ if ($nr == 0) {
+ $self->{oidx}->eidxq_del($docid);
+ $idx->ipc_do('xdb_remove', $docid);
+ } elsif ($rm_eidx_info) {
+ $idx->ipc_do('remove_eidx_info',
+ $docid, $eidx_key, $eml);
+ $self->{oidx}->eidxq_add($docid); # yes, add
+ }
+ }
+}
+
+# called by V2Writable::sync_prepare
+sub artnum_max { $_[0]->{oidx}->eidx_max }
+
+sub index_unseen ($) {
+ my ($req) = @_;
+ my $new_smsg = $req->{new_smsg} or die 'BUG: {new_smsg} unset';
+ my $eml = delete $req->{eml};
+ $new_smsg->populate($eml, $req);
+ my $self = $req->{self};
+ my $docid = $self->{oidx}->adj_counter('eidx_docid', '+');
+ $new_smsg->{num} = $docid;
+ my $idx = $self->idx_shard($docid);
+ $self->{oidx}->add_overview($eml, $new_smsg);
+ my $oid = $new_smsg->{blob};
+ my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset';
+ $self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key);
+ $idx->index_eml($eml, $new_smsg, $ibx->eidx_key);
+ check_batch_limit($req);
+}
+
+sub do_finalize ($) {
+ my ($req) = @_;
+ if (my $indexed = $req->{indexed}) {
+ do_xpost($req, $_) for @$indexed;
+ } elsif (exists $req->{new_smsg}) { # totally unseen messsage
+ index_unseen($req);
+ } else {
+ # `d' message was already unindexed in the v1/v2 inboxes,
+ # so it's too noisy to warn, here.
+ }
+ # cur_cmt may be undef for unindex_oid, set by V2Writable::index_todo
+ if (defined(my $cur_cmt = $req->{cur_cmt})) {
+ ${$req->{latest_cmt}} = $cur_cmt;
+ }
+}
+
+sub do_step ($) { # main iterator for adding messages to the index
+ my ($req) = @_;
+ my $self = $req->{self} // die 'BUG: {self} missing';
+ while (1) {
+ if (my $next_arg = $req->{next_arg}) {
+ if (my $smsg = $self->{oidx}->next_by_mid(@$next_arg)) {
+ $req->{cur_smsg} = $smsg;
+ $self->git->cat_async($smsg->{blob},
+ \&ck_existing, $req);
+ return; # ck_existing calls do_step
+ }
+ delete $req->{cur_smsg};
+ delete $req->{next_arg};
+ }
+ my $mid = shift(@{$req->{mids}});
+ last unless defined $mid;
+ my ($id, $prev);
+ $req->{next_arg} = [ $mid, \$id, \$prev ];
+ # loop again
+ }
+ do_finalize($req);
+}
+
+sub _blob_missing ($) { # called when req->{cur_smsg}->{blob} is bad
+ my ($req) = @_;
+ my $smsg = $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+ my $self = $req->{self};
+ my $xref3 = $self->{oidx}->get_xref3($smsg->{num});
+ my @keep = grep(!/:$smsg->{blob}\z/, @$xref3);
+ if (@keep) {
+ $keep[0] =~ /:([a-f0-9]{40,}+)\z/ or
+ die "BUG: xref $keep[0] has no OID";
+ my $oidhex = $1;
+ $self->{oidx}->remove_xref3($smsg->{num}, $smsg->{blob});
+ my $upd = $self->{oidx}->update_blob($smsg, $oidhex);
+ my $saved = $self->{oidx}->get_art($smsg->{num});
+ } else {
+ $self->{oidx}->delete_by_num($smsg->{num});
+ }
+}
+
+sub ck_existing { # git->cat_async callback
+ my ($bref, $oid, $type, $size, $req) = @_;
+ my $smsg = $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+ if ($type eq 'missing') {
+ _blob_missing($req);
+ } elsif (!is_bad_blob($oid, $type, $size, $smsg->{blob})) {
+ my $self = $req->{self} // die 'BUG: {self} missing';
+ local $self->{current_info} = "$self->{current_info} $oid";
+ my $cur = PublicInbox::Eml->new($bref);
+ if (content_hash($cur) eq $req->{chash}) {
+ push @{$req->{indexed}}, $smsg; # for do_xpost
+ } # else { index_unseen later }
+ }
+ do_step($req);
+}
+
+# is the messages visible in the inbox currently being indexed?
+# return the number if so
+sub cur_ibx_xnum ($$) {
+ my ($req, $bref) = @_;
+ my $ibx = $req->{ibx} or die 'BUG: current {ibx} missing';
+
+ $req->{eml} = PublicInbox::Eml->new($bref);
+ $req->{chash} = content_hash($req->{eml});
+ $req->{mids} = mids($req->{eml});
+ my @q = @{$req->{mids}}; # copy
+ while (defined(my $mid = shift @q)) {
+ my ($id, $prev);
+ while (my $x = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
+ return $x->{num} if $x->{blob} eq $req->{oid};
+ }
+ }
+ undef;
+}
+
+sub index_oid { # git->cat_async callback for 'm'
+ my ($bref, $oid, $type, $size, $req) = @_;
+ my $self = $req->{self};
+ local $self->{current_info} = "$self->{current_info} $oid";
+ return if is_bad_blob($oid, $type, $size, $req->{oid});
+ my $new_smsg = $req->{new_smsg} = bless {
+ blob => $oid,
+ }, 'PublicInbox::Smsg';
+ $new_smsg->set_bytes($$bref, $size);
+ defined($req->{xnum} = cur_ibx_xnum($req, $bref)) or return;
+ ++${$req->{nr}};
+ do_step($req);
+}
+
+sub unindex_oid { # git->cat_async callback for 'd'
+ my ($bref, $oid, $type, $size, $req) = @_;
+ my $self = $req->{self};
+ local $self->{current_info} = "$self->{current_info} $oid";
+ return if is_bad_blob($oid, $type, $size, $req->{oid});
+ return if defined(cur_ibx_xnum($req, $bref)); # was re-added
+ do_step($req);
+}
+
+# overrides V2Writable::last_commits, called by sync_ranges via sync_prepare
+sub last_commits {
+ my ($self, $sync) = @_;
+ my $heads = [];
+ my $ekey = $sync->{ibx}->eidx_key;
+ my $uv = $sync->{ibx}->uidvalidity;
+ for my $i (0..$sync->{epoch_max}) {
+ $heads->[$i] = $self->{oidx}->eidx_meta("lc-v2:$ekey//$uv;$i");
+ }
+ $heads;
+}
+
+sub _ibx_index_reject ($) {
+ my ($ibx) = @_;
+ $ibx->mm // return 'unindexed, no msgmap.sqlite3';
+ $ibx->uidvalidity // return 'no UIDVALIDITY';
+ $ibx->over // return 'unindexed, no over.sqlite3';
+ undef;
+}
+
+sub _sync_inbox ($$$) {
+ my ($self, $sync, $ibx) = @_;
+ my $ekey = $ibx->eidx_key;
+ if (defined(my $err = _ibx_index_reject($ibx))) {
+ return "W: skipping $ekey ($err)";
+ }
+ $sync->{ibx} = $ibx;
+ $sync->{nr} = \(my $nr = 0);
+ my $v = $ibx->version;
+ if ($v == 2) {
+ $sync->{epoch_max} = $ibx->max_git_epoch // return;
+ sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable
+ } elsif ($v == 1) {
+ my $uv = $ibx->uidvalidity;
+ my $lc = $self->{oidx}->eidx_meta("lc-v1:$ekey//$uv");
+ my $head = $ibx->mm->last_commit //
+ return "E: $ibx->{inboxdir} is not indexed";
+ my $stk = prepare_stack($sync, $lc ? "$lc..$head" : $head);
+ my $unit = { stack => $stk, git => $ibx->git };
+ push @{$sync->{todo}}, $unit;
+ } else {
+ return "E: $ekey unsupported inbox version (v$v)";
+ }
+ for my $unit (@{delete($sync->{todo}) // []}) {
+ last if $sync->{quit};
+ index_todo($self, $sync, $unit);
+ }
+ $self->{midx}->index_ibx($ibx) unless $sync->{quit};
+ $ibx->git->cleanup; # done with this inbox, now
+ undef;
+}
+
+sub gc_unref_doc ($$$$) {
+ my ($self, $ibx_id, $eidx_key, $docid) = @_;
+ my $dbh = $self->{oidx}->dbh;
+
+ # for debug/info purposes, oids may no longer be accessible
+ my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT oidbin FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+ $sth->execute($docid, $ibx_id);
+ my @oid = map { unpack('H*', $_->[0]) } @{$sth->fetchall_arrayref};
+
+ $dbh->prepare_cached(<<'')->execute($docid, $ibx_id);
+DELETE FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+ my $remain = $self->{oidx}->get_xref3($docid);
+ if (scalar(@$remain)) {
+ $self->{oidx}->eidxq_add($docid); # enqueue for reindex
+ for my $oid (@oid) {
+ warn "I: unref #$docid $eidx_key $oid\n";
+ }
+ } else {
+ warn "I: remove #$docid $eidx_key @oid\n";
+ $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+ }
+}
+
+sub eidx_gc {
+ my ($self, $opt) = @_;
+ $self->{cfg} or die "E: GC requires ->attach_config\n";
+ $opt->{-idx_gc} = 1;
+ $self->idx_init($opt); # acquire lock via V2Writable::_idx_init
+
+ my $dbh = $self->{oidx}->dbh;
+ my $x3_doc = $dbh->prepare('SELECT docid FROM xref3 WHERE ibx_id = ?');
+ my $ibx_ck = $dbh->prepare('SELECT ibx_id,eidx_key FROM inboxes');
+ my $lc_i = $dbh->prepare('SELECT key FROM eidx_meta WHERE key LIKE ?');
+
+ $ibx_ck->execute;
+ while (my ($ibx_id, $eidx_key) = $ibx_ck->fetchrow_array) {
+ next if $self->{ibx_map}->{$eidx_key};
+ $self->{midx}->remove_eidx_key($eidx_key);
+ warn "I: deleting messages for $eidx_key...\n";
+ $x3_doc->execute($ibx_id);
+ while (defined(my $docid = $x3_doc->fetchrow_array)) {
+ gc_unref_doc($self, $ibx_id, $eidx_key, $docid);
+ }
+ $dbh->prepare_cached(<<'')->execute($ibx_id);
+DELETE FROM inboxes WHERE ibx_id = ?
+
+ # drop last_commit info
+ my $pat = $eidx_key;
+ $pat =~ s/([_%])/\\$1/g;
+ $lc_i->execute("lc-%:$pat//%");
+ while (my ($key) = $lc_i->fetchrow_array) {
+ next if $key !~ m!\Alc-v[1-9]+:\Q$eidx_key\E//!;
+ warn "I: removing $key\n";
+ $dbh->prepare_cached(<<'')->execute($key);
+DELETE FROM eidx_meta WHERE key = ?
+
+ }
+
+ warn "I: $eidx_key removed\n";
+ }
+
+ # it's not real unless it's in `over', we use parallelism here,
+ # shards will be reading directly from over, so commit
+ $self->{oidx}->commit_lazy;
+ $self->{oidx}->begin_lazy;
+
+ for my $idx (@{$self->{idx_shards}}) {
+ warn "I: cleaning up shard #$idx->{shard}\n";
+ $idx->shard_over_check($self->{oidx});
+ }
+ my $nr = $dbh->do(<<'');
+DELETE FROM xref3 WHERE docid NOT IN (SELECT num FROM over)
+
+ warn "I: eliminated $nr stale xref3 entries\n" if $nr != 0;
+
+ done($self);
+}
+
+sub _ibx_for ($$$) {
+ my ($self, $sync, $smsg) = @_;
+ my $ibx_id = delete($smsg->{ibx_id}) // die '{ibx_id} unset';
+ my $pos = $sync->{id2pos}->{$ibx_id} // die "$ibx_id no pos";
+ $self->{ibx_list}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped"
+}
+
+sub _fd_constrained ($) {
+ my ($self) = @_;
+ $self->{-fd_constrained} //= do {
+ my $soft;
+ if (eval { require BSD::Resource; 1 }) {
+ my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
+ ($soft, undef) = BSD::Resource::getrlimit($NOFILE);
+ } else {
+ chomp($soft = `sh -c 'ulimit -n'`);
+ }
+ if (defined($soft)) {
+ my $want = scalar(@{$self->{ibx_list}}) + 64; # estimate
+ my $ret = $want > $soft;
+ if ($ret) {
+ warn <<EOF;
+RLIMIT_NOFILE=$soft insufficient (want: $want), will close DB handles early
+EOF
+ }
+ $ret;
+ } else {
+ warn "Unable to determine RLIMIT_NOFILE: $@\n";
+ 1;
+ }
+ };
+}
+
+sub _reindex_finalize ($$$) {
+ my ($req, $smsg, $eml) = @_;
+ my $sync = $req->{sync};
+ my $self = $sync->{self};
+ my $by_chash = delete $req->{by_chash} or die 'BUG: no {by_chash}';
+ my $nr = scalar(keys(%$by_chash)) or die 'BUG: no content hashes';
+ my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}';
+ my $docid = $smsg->{num} = $orig_smsg->{num};
+ $self->{oidx}->add_overview($eml, $smsg); # may rethread
+ check_batch_limit({ %$sync, new_smsg => $smsg });
+ my $chash0 = $smsg->{chash} // die "BUG: $smsg->{blob} no {chash}";
+ my $stable = delete($by_chash->{$chash0}) //
+ die "BUG: $smsg->{blob} chash missing";
+ my $idx = $self->idx_shard($docid);
+ my $top_smsg = pop @$stable;
+ $top_smsg == $smsg or die 'BUG: top_smsg != smsg';
+ my $ibx = _ibx_for($self, $sync, $smsg);
+ $idx->index_eml($eml, $smsg, $ibx->eidx_key);
+ for my $x (reverse @$stable) {
+ $ibx = _ibx_for($self, $sync, $x);
+ my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}';
+ $idx->ipc_do('add_eidx_info', $docid, $ibx->eidx_key, $hdr);
+ }
+ return if $nr == 1; # likely, all good
+
+ warn "W: #$docid split into $nr due to deduplication change\n";
+ my @todo;
+ for my $ary (values %$by_chash) {
+ for my $x (reverse @$ary) {
+ warn "removing #$docid xref3 $x->{blob}\n";
+ my $n = $self->{oidx}->remove_xref3($docid, $x->{blob});
+ die "BUG: $x->{blob} invalidated #$docid" if $n == 0;
+ }
+ my $x = pop(@$ary) // die "BUG: #$docid {by_chash} empty";
+ $x->{num} = delete($x->{xnum}) // die '{xnum} unset';
+ $ibx = _ibx_for($self, $sync, $x);
+ if (my $over = $ibx->over) {
+ my $e = $over->get_art($x->{num});
+ $e->{blob} eq $x->{blob} or die <<EOF;
+$x->{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num});
+EOF
+ push @todo, $ibx, $e;
+ $over->dbh_close if _fd_constrained($self);
+ } else {
+ die "$ibx->{inboxdir}: over.sqlite3 unusable: $!\n";
+ }
+ }
+ undef $by_chash;
+ while (my ($ibx, $e) = splice(@todo, 0, 2)) {
+ reindex_unseen($self, $sync, $ibx, $e);
+ }
+}
+
+sub _reindex_oid { # git->cat_async callback
+ my ($bref, $oid, $type, $size, $req) = @_;
+ my $sync = $req->{sync};
+ my $self = $sync->{self};
+ my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}';
+ my $expect_oid = $req->{xr3r}->[$req->{ix}]->[2];
+ my $docid = $orig_smsg->{num};
+ if (is_bad_blob($oid, $type, $size, $expect_oid)) {
+ my $remain = $self->{oidx}->remove_xref3($docid, $expect_oid);
+ if ($remain == 0) {
+ warn "W: #$docid gone or corrupted\n";
+ $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+ } elsif (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) {
+ $self->git->cat_async($next_oid, \&_reindex_oid, $req);
+ } else {
+ warn "BUG: #$docid gone (UNEXPECTED)\n";
+ $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+ }
+ return;
+ }
+ my $ci = $self->{current_info};
+ local $self->{current_info} = "$ci #$docid $oid";
+ my $re_smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
+ $re_smsg->set_bytes($$bref, $size);
+ my $eml = PublicInbox::Eml->new($bref);
+ $re_smsg->populate($eml, { autime => $orig_smsg->{ds},
+ cotime => $orig_smsg->{ts} });
+ my $chash = content_hash($eml);
+ $re_smsg->{chash} = $chash;
+ $re_smsg->{xnum} = $req->{xr3r}->[$req->{ix}]->[1];
+ $re_smsg->{ibx_id} = $req->{xr3r}->[$req->{ix}]->[0];
+ $re_smsg->{hdr} = $eml->header_obj;
+ push @{$req->{by_chash}->{$chash}}, $re_smsg;
+ if (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) {
+ $self->git->cat_async($next_oid, \&_reindex_oid, $req);
+ } else { # last $re_smsg is the highest priority xref3
+ local $self->{current_info} = "$ci #$docid";
+ _reindex_finalize($req, $re_smsg, $eml);
+ }
+}
+
+sub _reindex_smsg ($$$) {
+ my ($self, $sync, $smsg) = @_;
+ my $docid = $smsg->{num};
+ my $xr3 = $self->{oidx}->get_xref3($docid, 1);
+ if (scalar(@$xr3) == 0) { # _reindex_check_stale should've covered this
+ warn <<"";
+BUG? #$docid $smsg->{blob} is not referenced by inboxes during reindex
+
+ $self->{oidx}->delete_by_num($docid);
+ $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+ return;
+ }
+
+ # we sort {xr3r} in the reverse order of {ibx_list} so we can
+ # hit the common case in _reindex_finalize without rereading
+ # from git (or holding multiple messages in memory).
+ my $id2pos = $sync->{id2pos}; # index in {ibx_list}
+ @$xr3 = sort {
+ $id2pos->{$b->[0]} <=> $id2pos->{$a->[0]}
+ ||
+ $b->[1] <=> $a->[1] # break ties with {xnum}
+ } @$xr3;
+ @$xr3 = map { [ $_->[0], $_->[1], unpack('H*', $_->[2]) ] } @$xr3;
+ my $req = { orig_smsg => $smsg, sync => $sync, xr3r => $xr3, ix => 0 };
+ $self->git->cat_async($xr3->[$req->{ix}]->[2], \&_reindex_oid, $req);
+}
+
+sub checkpoint_due ($) {
+ my ($sync) = @_;
+ ${$sync->{need_checkpoint}} || (now() > $sync->{next_check});
+}
+
+sub host_ident () {
+ # I've copied FS images and only changed the hostname before,
+ # so prepend hostname. Use `state' since these a BOFH can change
+ # these while this process is running and we always want to be
+ # able to release locks taken by this process.
+ state $retval = hostname . '-' . do {
+ my $m; # machine-id(5) is systemd
+ if (open(my $fh, '<', '/etc/machine-id')) { $m = <$fh> }
+ # (g)hostid(1) is in GNU coreutils, kern.hostid is most BSDs
+ chomp($m ||= `{ sysctl -n kern.hostid ||
+ hostid || ghostid; } 2>/dev/null`
+ || "no-machine-id-or-hostid-on-$^O");
+ $m;
+ };
+}
+
+sub eidxq_release {
+ my ($self) = @_;
+ my $expect = delete($self->{-eidxq_locked}) or return;
+ my ($owner_pid, undef) = split(/-/, $expect);
+ return if $owner_pid != $$; # shards may fork
+ my $oidx = $self->{oidx};
+ $oidx->begin_lazy;
+ my $cur = $oidx->eidx_meta('eidxq_lock') // '';
+ if ($cur eq $expect) {
+ $oidx->eidx_meta('eidxq_lock', '');
+ return 1;
+ } elsif ($cur ne '') {
+ warn "E: eidxq_lock($expect) stolen by $cur\n";
+ } else {
+ warn "E: eidxq_lock($expect) released by another process\n";
+ }
+ undef;
+}
+
+sub DESTROY {
+ my ($self) = @_;
+ eidxq_release($self) and $self->{oidx}->commit_lazy;
+}
+
+sub _eidxq_take ($) {
+ my ($self) = @_;
+ my $val = "$$-${\time}-$>-".host_ident;
+ $self->{oidx}->eidx_meta('eidxq_lock', $val);
+ $self->{-eidxq_locked} = $val;
+}
+
+sub eidxq_lock_acquire ($) {
+ my ($self) = @_;
+ my $oidx = $self->{oidx};
+ $oidx->begin_lazy;
+ my $cur = $oidx->eidx_meta('eidxq_lock') || return _eidxq_take($self);
+ if (my $locked = $self->{-eidxq_locked}) { # be lazy
+ return $locked if $locked eq $cur;
+ }
+ my ($pid, $time, $euid, $ident) = split(/-/, $cur, 4);
+ my $t = strftime('%Y-%m-%d %k:%M:%S', gmtime($time));
+ if ($euid == $> && $ident eq host_ident) {
+ if (kill(0, $pid)) {
+ warn <<EOM; return;
+I: PID:$pid (re)indexing Xapian since $t, it will continue our work
+EOM
+ }
+ if ($!{ESRCH}) {
+ warn "I: eidxq_lock is stale ($cur), clobbering\n";
+ return _eidxq_take($self);
+ }
+ warn "E: kill(0, $pid) failed: $!\n"; # fall-through:
+ }
+ my $fn = $oidx->dbh->sqlite_db_filename;
+ warn <<EOF;
+W: PID:$pid, UID:$euid on $ident is indexing Xapian since $t
+W: If this is unexpected, delete `eidxq_lock' from the `eidx_meta' table:
+W: sqlite3 $fn 'DELETE FROM eidx_meta WHERE key = "eidxq_lock"'
+EOF
+ undef;
+}
+
+sub eidxq_process ($$) { # for reindexing
+ my ($self, $sync) = @_;
+
+ return unless eidxq_lock_acquire($self);
+ my $dbh = $self->{oidx}->dbh;
+ my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return;
+ ${$sync->{nr}} = 0;
+ local $sync->{-regen_fmt} = "%u/$tot\n";
+ my $pr = $sync->{-opt}->{-progress};
+ if ($pr) {
+ my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq');
+ my $max = $dbh->selectrow_array('SELECT MAX(docid) FROM eidxq');
+ $pr->("Xapian indexing $min..$max (total=$tot)\n");
+ }
+ $sync->{id2pos} //= do {
+ my %id2pos;
+ my $pos = 0;
+ $id2pos{$_->{-ibx_id}} = $pos++ for @{$self->{ibx_list}};
+ \%id2pos;
+ };
+ my ($del, $iter);
+restart:
+ $del = $dbh->prepare('DELETE FROM eidxq WHERE docid = ?');
+ $iter = $dbh->prepare('SELECT docid FROM eidxq ORDER BY docid ASC');
+ $iter->execute;
+ while (defined(my $docid = $iter->fetchrow_array)) {
+ last if $sync->{quit};
+ if (my $smsg = $self->{oidx}->get_art($docid)) {
+ _reindex_smsg($self, $sync, $smsg);
+ } else {
+ warn "E: #$docid does not exist in over\n";
+ }
+ $del->execute($docid);
+ ++${$sync->{nr}};
+
+ if (checkpoint_due($sync)) {
+ $dbh = $del = $iter = undef;
+ reindex_checkpoint($self, $sync); # release lock
+ $dbh = $self->{oidx}->dbh;
+ goto restart;
+ }
+ }
+ $self->git->async_wait_all;
+ $pr->("reindexed ${$sync->{nr}}/$tot\n") if $pr;
+}
+
+sub _reindex_unseen { # git->cat_async callback
+ my ($bref, $oid, $type, $size, $req) = @_;
+ return if is_bad_blob($oid, $type, $size, $req->{oid});
+ my $self = $req->{self} // die 'BUG: {self} unset';
+ local $self->{current_info} = "$self->{current_info} $oid";
+ my $new_smsg = bless { blob => $oid, }, 'PublicInbox::Smsg';
+ $new_smsg->set_bytes($$bref, $size);
+ my $eml = $req->{eml} = PublicInbox::Eml->new($bref);
+ $req->{new_smsg} = $new_smsg;
+ $req->{chash} = content_hash($eml);
+ $req->{mids} = mids($eml); # do_step iterates through this
+ do_step($req); # enter the normal indexing flow
+}
+
+# --reindex may catch totally unseen messages, this handles them
+sub reindex_unseen ($$$$) {
+ my ($self, $sync, $ibx, $xsmsg) = @_;
+ my $req = {
+ %$sync, # has {self}
+ autime => $xsmsg->{ds},
+ cotime => $xsmsg->{ts},
+ oid => $xsmsg->{blob},
+ ibx => $ibx,
+ xnum => $xsmsg->{num},
+ # {mids} and {chash} will be filled in at _reindex_unseen
+ };
+ warn "I: reindex_unseen ${\$ibx->eidx_key}:$req->{xnum}:$req->{oid}\n";
+ $self->git->cat_async($xsmsg->{blob}, \&_reindex_unseen, $req);
+}
+
+sub _reindex_check_unseen ($$$) {
+ my ($self, $sync, $ibx) = @_;
+ my $ibx_id = $ibx->{-ibx_id};
+ my $slice = 1000;
+ my ($beg, $end) = (1, $slice);
+
+ # first, check if we missed any messages in target $ibx
+ my $msgs;
+ my $pr = $sync->{-opt}->{-progress};
+ my $ekey = $ibx->eidx_key;
+ local $sync->{-regen_fmt} =
+ "$ekey checking unseen %u/".$ibx->over->max."\n";
+ ${$sync->{nr}} = 0;
+
+ while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) {
+ ${$sync->{nr}} = $beg;
+ $beg = $msgs->[-1]->{num} + 1;
+ $end = $beg + $slice;
+ if (checkpoint_due($sync)) {
+ reindex_checkpoint($self, $sync); # release lock
+ }
+
+ my $inx3 = $self->{oidx}->dbh->prepare_cached(<<'', undef, 1);
+SELECT DISTINCT(docid) FROM xref3 WHERE
+ibx_id = ? AND xnum = ? AND oidbin = ?
+
+ for my $xsmsg (@$msgs) {
+ my $oidbin = pack('H*', $xsmsg->{blob});
+ $inx3->bind_param(1, $ibx_id);
+ $inx3->bind_param(2, $xsmsg->{num});
+ $inx3->bind_param(3, $oidbin, SQL_BLOB);
+ $inx3->execute;
+ my $docids = $inx3->fetchall_arrayref;
+ # index messages which were totally missed
+ # the first time around ASAP:
+ if (scalar(@$docids) == 0) {
+ reindex_unseen($self, $sync, $ibx, $xsmsg);
+ } else { # already seen, reindex later
+ for my $r (@$docids) {
+ $self->{oidx}->eidxq_add($r->[0]);
+ }
+ }
+ last if $sync->{quit};
+ }
+ last if $sync->{quit};
+ }
+}
+
+sub _reindex_check_stale ($$$) {
+ my ($self, $sync, $ibx) = @_;
+ my $min = 0;
+ my $pr = $sync->{-opt}->{-progress};
+ my $fetching;
+ my $ekey = $ibx->eidx_key;
+ local $sync->{-regen_fmt} =
+ "$ekey check stale/missing %u/".$ibx->over->max."\n";
+ ${$sync->{nr}} = 0;
+ do {
+ if (checkpoint_due($sync)) {
+ reindex_checkpoint($self, $sync); # release lock
+ }
+ # now, check if there's stale xrefs
+ my $iter = $self->{oidx}->dbh->prepare_cached(<<'', undef, 1);
+SELECT docid,xnum,oidbin FROM xref3 WHERE ibx_id = ? AND docid > ?
+ORDER BY docid,xnum ASC LIMIT 10000
+
+ $iter->execute($ibx->{-ibx_id}, $min);
+ $fetching = undef;
+
+ while (my ($docid, $xnum, $oidbin) = $iter->fetchrow_array) {
+ return if $sync->{quit};
+ ${$sync->{nr}} = $xnum;
+
+ $fetching = $min = $docid;
+ my $smsg = $ibx->over->get_art($xnum);
+ my $oidhex = unpack('H*', $oidbin);
+ my $err;
+ if (!$smsg) {
+ $err = 'stale';
+ } elsif ($smsg->{blob} ne $oidhex) {
+ $err = "mismatch (!= $smsg->{blob})";
+ } else {
+ next; # likely, all good
+ }
+ # current_info already has eidx_key
+ warn "$xnum:$oidhex (#$docid): $err\n";
+ my $del = $self->{oidx}->dbh->prepare_cached(<<'');
+DELETE FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ?
+
+ $del->bind_param(1, $ibx->{-ibx_id});
+ $del->bind_param(2, $xnum);
+ $del->bind_param(3, $oidbin, SQL_BLOB);
+ $del->execute;
+
+ # get_xref3 over-fetches, but this is a rare path:
+ my $xr3 = $self->{oidx}->get_xref3($docid);
+ my $idx = $self->idx_shard($docid);
+ if (scalar(@$xr3) == 0) { # all gone
+ $self->{oidx}->delete_by_num($docid);
+ $self->{oidx}->eidxq_del($docid);
+ $idx->ipc_do('xdb_remove', $docid);
+ } else { # enqueue for reindex of remaining messages
+ $idx->ipc_do('remove_eidx_info',
+ $docid, $ibx->eidx_key);
+ $self->{oidx}->eidxq_add($docid); # yes, add
+ }
+ }
+ } while (defined $fetching);
+}
+
+sub _reindex_inbox ($$$) {
+ my ($self, $sync, $ibx) = @_;
+ my $ekey = $ibx->eidx_key;
+ local $self->{current_info} = $ekey;
+ if (defined(my $err = _ibx_index_reject($ibx))) {
+ warn "W: cannot reindex $ekey ($err)\n";
+ } else {
+ _reindex_check_unseen($self, $sync, $ibx);
+ _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit};
+ }
+ delete @$ibx{qw(over mm search git)}; # won't need these for a bit
+}
+
+sub eidx_reindex {
+ my ($self, $sync) = @_;
+
+ # acquire eidxq_lock early because full reindex takes forever
+ # and incremental -extindex processes can run during our checkpoints
+ if (!eidxq_lock_acquire($self)) {
+ warn "E: aborting --reindex\n";
+ return;
+ }
+ for my $ibx (@{$self->{ibx_list}}) {
+ _reindex_inbox($self, $sync, $ibx);
+ last if $sync->{quit};
+ }
+ $self->git->async_wait_all; # ensure eidxq gets filled completely
+ eidxq_process($self, $sync) unless $sync->{quit};
+}
+
+sub sync_inbox {
+ my ($self, $sync, $ibx) = @_;
+ my $err = _sync_inbox($self, $sync, $ibx);
+ delete @$ibx{qw(mm over)};
+ warn $err, "\n" if defined($err);
+}
+
+sub eidx_sync { # main entry point
+ my ($self, $opt) = @_;
+
+ my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
+ local $self->{current_info} = '';
+ local $SIG{__WARN__} = sub {
+ $warn_cb->($self->{current_info}, ': ', @_);
+ };
+ $self->idx_init($opt); # acquire lock via V2Writable::_idx_init
+ $self->{oidx}->rethread_prepare($opt);
+ my $sync = {
+ need_checkpoint => \(my $need_checkpoint = 0),
+ check_intvl => 10,
+ next_check => now() + 10,
+ -opt => $opt,
+ # DO NOT SET {reindex} here, it's incompatible with reused
+ # V2Writable code, reindex is totally different here
+ # compared to v1/v2 inboxes because we have multiple histories
+ self => $self,
+ -regen_fmt => "%u/?\n",
+ };
+ local $SIG{USR1} = sub { $need_checkpoint = 1 };
+ my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+ local $SIG{QUIT} = $quit;
+ local $SIG{INT} = $quit;
+ local $SIG{TERM} = $quit;
+ for my $ibx (@{$self->{ibx_list}}) {
+ $ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key);
+ }
+ if (delete($opt->{reindex})) {
+ local $sync->{checkpoint_unlocks} = 1;
+ eidx_reindex($self, $sync);
+ }
+
+ # don't use $_ here, it'll get clobbered by reindex_checkpoint
+ if ($opt->{scan} // 1) {
+ for my $ibx (@{$self->{ibx_list}}) {
+ last if $sync->{quit};
+ sync_inbox($self, $sync, $ibx);
+ }
+ }
+ $self->{oidx}->rethread_done($opt) unless $sync->{quit};
+ eidxq_process($self, $sync) unless $sync->{quit};
+
+ eidxq_release($self);
+ done($self);
+ $sync; # for eidx_watch
+}
+
+sub update_last_commit { # overrides V2Writable
+ my ($self, $sync, $stk) = @_;
+ my $unit = $sync->{unit} // return;
+ my $latest_cmt = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+ defined($latest_cmt) or return;
+ my $ibx = $sync->{ibx} or die 'BUG: {ibx} missing';
+ my $ekey = $ibx->eidx_key;
+ my $uv = $ibx->uidvalidity;
+ my $epoch = $unit->{epoch};
+ my $meta_key;
+ my $v = $ibx->version;
+ if ($v == 2) {
+ die 'No {epoch} for v2 unit' unless defined $epoch;
+ $meta_key = "lc-v2:$ekey//$uv;$epoch";
+ } elsif ($v == 1) {
+ die 'Unexpected {epoch} for v1 unit' if defined $epoch;
+ $meta_key = "lc-v1:$ekey//$uv";
+ } else {
+ die "Unsupported inbox version: $v";
+ }
+ my $last = $self->{oidx}->eidx_meta($meta_key);
+ if (defined $last && is_ancestor($self->git, $last, $latest_cmt)) {
+ my @cmd = (qw(rev-list --count), "$last..$latest_cmt");
+ chomp(my $n = $unit->{git}->qx(@cmd));
+ return if $n ne '' && $n == 0;
+ }
+ $self->{oidx}->eidx_meta($meta_key, $latest_cmt);
+}
+
+sub _idx_init { # with_umask callback
+ my ($self, $opt) = @_;
+ PublicInbox::V2Writable::_idx_init($self, $opt);
+ $self->{midx} = PublicInbox::MiscIdx->new($self);
+}
+
+sub idx_init { # similar to V2Writable
+ my ($self, $opt) = @_;
+ return if $self->{idx_shards};
+
+ $self->git->cleanup;
+ my $mode = 0644;
+ my $ALL = $self->git->{git_dir}; # ALL.git
+ my $old = -d $ALL;
+ if ($opt->{-private}) { # LeiStore
+ $mode = 0600;
+ if (!$old) {
+ umask 077; # don't bother restoring
+ PublicInbox::Import::init_bare($ALL);
+ $self->git->qx(qw(config core.sharedRepository 0600));
+ }
+ } else {
+ PublicInbox::Import::init_bare($ALL) unless $old;
+ }
+ my $info_dir = "$ALL/objects/info";
+ my $alt = "$info_dir/alternates";
+ my (@old, @new, %seen); # seen: st_dev + st_ino
+ if (-e $alt) {
+ open(my $fh, '<', $alt) or die "open $alt: $!";
+ $mode = (stat($fh))[2] & 07777;
+ while (my $line = <$fh>) {
+ chomp(my $d = $line);
+
+ # expand relative path (/local/ stuff)
+ substr($d, 0, 3) eq '../' and
+ $d = "$ALL/objects/$d";
+ if (my @st = stat($d)) {
+ next if $seen{"$st[0]\0$st[1]"}++;
+ } else {
+ warn "W: stat($d) failed (from $alt): $!\n";
+ next if $opt->{-idx_gc};
+ }
+ push @old, $line;
+ }
+ }
+
+ # for LeiStore, and possibly some mirror-only state
+ if (opendir(my $dh, my $local = "$self->{topdir}/local")) {
+ # highest numbered epoch first
+ for my $n (sort { $b <=> $a } map { substr($_, 0, -4) + 0 }
+ grep(/\A[0-9]+\.git\z/, readdir($dh))) {
+ my $d = "$local/$n.git/objects"; # absolute path
+ if (my @st = stat($d)) {
+ next if $seen{"$st[0]\0$st[1]"}++;
+ # favor relative paths for rename-friendliness
+ push @new, "../../local/$n.git/objects\n";
+ } else {
+ warn "W: stat($d) failed: $!\n";
+ }
+ }
+ }
+ for my $ibx (@{$self->{ibx_list}}) {
+ my $line = $ibx->git->{git_dir} . "/objects\n";
+ chomp(my $d = $line);
+ if (my @st = stat($d)) {
+ next if $seen{"$st[0]\0$st[1]"}++;
+ } else {
+ warn "W: stat($d) failed (from $ibx->{inboxdir}): $!\n";
+ next if $opt->{-idx_gc};
+ }
+ push @new, $line;
+ }
+ if (scalar @new) {
+ push @old, @new;
+ my $o = \@old;
+ PublicInbox::V2Writable::write_alternates($info_dir, $mode, $o);
+ }
+ $self->parallel_init($self->{indexlevel});
+ $self->with_umask(\&_idx_init, $self, $opt);
+ $self->{oidx}->begin_lazy;
+ $self->{oidx}->eidx_prep;
+ $self->git->batch_prepare;
+ $self->{midx}->begin_txn;
+}
+
+sub _watch_commit { # PublicInbox::DS::add_timer callback
+ my ($self) = @_;
+ delete $self->{-commit_timer};
+ eidxq_process($self, $self->{-watch_sync});
+ eidxq_release($self);
+ delete local $self->{-watch_sync}->{-regen_fmt};
+ reindex_checkpoint($self, $self->{-watch_sync});
+
+ # call event_step => done unless commit_timer is armed
+ PublicInbox::DS::requeue($self);
+}
+
+sub on_inbox_unlock { # called by PublicInbox::InboxIdle
+ my ($self, $ibx) = @_;
+ my $opt = $self->{-watch_sync}->{-opt};
+ my $pr = $opt->{-progress};
+ my $ekey = $ibx->eidx_key;
+ local $0 = "sync $ekey";
+ $pr->("indexing $ekey\n") if $pr;
+ $self->idx_init($opt);
+ sync_inbox($self, $self->{-watch_sync}, $ibx);
+ $self->{-commit_timer} //= PublicInbox::DS::add_timer(
+ $opt->{'commit-interval'} // 10,
+ \&_watch_commit, $self);
+}
+
+sub eidx_reload { # -extindex --watch SIGHUP handler
+ my ($self, $idler) = @_;
+ if ($self->{cfg}) {
+ my $pr = $self->{-watch_sync}->{-opt}->{-progress};
+ $pr->('reloading ...') if $pr;
+ delete $self->{-resync_queue};
+ @{$self->{ibx_list}} = ();
+ %{$self->{ibx_map}} = ();
+ delete $self->{-watch_sync}->{id2pos};
+ my $cfg = PublicInbox::Config->new;
+ attach_config($self, $cfg);
+ $idler->refresh($cfg);
+ $pr->(" done\n") if $pr;
+ } else {
+ warn "reload not supported without --all\n";
+ }
+}
+
+sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler
+ my ($self) = @_;
+ $self->{-resync_queue} //= [ @{$self->{ibx_list}} ];
+ PublicInbox::DS::requeue($self); # trigger our ->event_step
+}
+
+sub event_step { # PublicInbox::DS::requeue callback
+ my ($self) = @_;
+ if (my $resync_queue = $self->{-resync_queue}) {
+ if (my $ibx = shift(@$resync_queue)) {
+ on_inbox_unlock($self, $ibx);
+ PublicInbox::DS::requeue($self);
+ } else {
+ delete $self->{-resync_queue};
+ _watch_commit($self);
+ }
+ } else {
+ done($self) unless $self->{-commit_timer};
+ }
+}
+
+sub eidx_watch { # public-inbox-extindex --watch main loop
+ my ($self, $opt) = @_;
+ local %SIG = %SIG;
+ for my $sig (qw(HUP USR1 TSTP QUIT INT TERM)) {
+ $SIG{$sig} = sub { warn "SIG$sig ignored while scanning\n" };
+ }
+ require PublicInbox::InboxIdle;
+ require PublicInbox::DS;
+ require PublicInbox::Syscall;
+ require PublicInbox::Sigfd;
+ my $idler = PublicInbox::InboxIdle->new($self->{cfg});
+ if (!$self->{cfg}) {
+ $idler->watch_inbox($_) for @{$self->{ibx_list}};
+ }
+ $_->subscribe_unlock(__PACKAGE__, $self) for @{$self->{ibx_list}};
+ my $pr = $opt->{-progress};
+ $pr->("performing initial scan ...\n") if $pr;
+ my $sync = eidx_sync($self, $opt); # initial sync
+ return if $sync->{quit};
+ my $oldset = PublicInbox::DS::block_signals();
+ local $self->{current_info} = '';
+ my $cb = $SIG{__WARN__} || \&CORE::warn;
+ local $SIG{__WARN__} = sub { $cb->($self->{current_info}, ': ', @_) };
+ my $sig = {
+ HUP => sub { eidx_reload($self, $idler) },
+ USR1 => sub { eidx_resync_start($self) },
+ TSTP => sub { kill('STOP', $$) },
+ };
+ my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+ $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit;
+ my $sigfd = PublicInbox::Sigfd->new($sig,
+ $PublicInbox::Syscall::SFD_NONBLOCK);
+ %SIG = (%SIG, %$sig) if !$sigfd;
+ local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock
+ if (!$sigfd) {
+ # wake up every second to accept signals if we don't
+ # have signalfd or IO::KQueue:
+ PublicInbox::DS::sig_setmask($oldset);
+ PublicInbox::DS->SetLoopTimeout(1000);
+ }
+ PublicInbox::DS->SetPostLoopCallback(sub { !$sync->{quit} });
+ $pr->("initial scan complete, entering event loop\n") if $pr;
+ PublicInbox::DS->EventLoop; # calls InboxIdle->event_step
+ done($self);
+}
+
+no warnings 'once';
+*done = \&PublicInbox::V2Writable::done;
+*with_umask = \&PublicInbox::InboxWritable::with_umask;
+*parallel_init = \&PublicInbox::V2Writable::parallel_init;
+*nproc_shards = \&PublicInbox::V2Writable::nproc_shards;
+*sync_prepare = \&PublicInbox::V2Writable::sync_prepare;
+*index_todo = \&PublicInbox::V2Writable::index_todo;
+*count_shards = \&PublicInbox::V2Writable::count_shards;
+*atfork_child = \&PublicInbox::V2Writable::atfork_child;
+*idx_shard = \&PublicInbox::V2Writable::idx_shard;
+*reindex_checkpoint = \&PublicInbox::V2Writable::reindex_checkpoint;
+
+1;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# for systems lacking Linux::Inotify2 or IO::KQueue, just emulates
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used for generating Atom feeds for web-accessible mailing list archives.
sub generate_thread_atom {
my ($ctx) = @_;
- my $msgs = $ctx->{msgs} = $ctx->{-inbox}->over->get_thread($ctx->{mid});
+ my $msgs = $ctx->{msgs} = $ctx->{ibx}->over->get_thread($ctx->{mid});
return _no_thread() unless @$msgs;
PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i);
}
# if the 'r' query parameter is given, it is a legacy permalink
# which we must continue supporting:
my $qp = $ctx->{qp};
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
if ($qp && !$qp->{r} && $ibx->over) {
return PublicInbox::View::index_topics($ctx);
}
sub recent_msgs {
my ($ctx) = @_;
- my $ibx = $ctx->{-inbox};
- my $max = $ibx->{feedmax};
+ my $ibx = $ctx->{ibx};
+ my $max = $ibx->{feedmax} // 25;
return PublicInbox::View::paginate_recent($ctx, $max) if $ibx->over;
# only for rare v1 inboxes which aren't indexed at all
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# base class for creating per-list or per-project filters
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Filter for importing some archives from gmane
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Dumb filter for blindly accepting everything
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Filter for lists.ruby-lang.org trailers
my ($class, %opts) = @_;
my $altid = delete $opts{-altid};
my $self = $class->SUPER::new(%opts);
- my $ibx = $self->{-inbox};
+ my $ibx = $self->{ibx};
# altid = serial:ruby-core:file=msgmap.sqlite3
if (!$altid && $ibx && $ibx->{altid}) {
$altid ||= $ibx->{altid}->[0];
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Filter for various [tags] in subjects
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Filter for vger.kernel.org list trailer
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# backend for a git-cat-file-workalike based on libgit2,
+# other libgit2 stuff may go here, too.
+package PublicInbox::Gcf2;
+use strict;
+use PublicInbox::Spawn qw(which popen_rd);
+use Fcntl qw(LOCK_EX);
+use IO::Handle; # autoflush
+my (%CFG, $c_src, $lockfh);
+BEGIN {
+ # PublicInbox::Spawn will set PERL_INLINE_DIRECTORY
+ # to ~/.cache/public-inbox/inline-c if it exists
+ my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //
+ die 'PERL_INLINE_DIRECTORY not defined';
+ my $f = "$inline_dir/.public-inbox.lock";
+ open $lockfh, '>', $f or die "failed to open $f: $!\n";
+ my $pc = which($ENV{PKG_CONFIG} // 'pkg-config');
+ my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
+ my $rdr = {};
+ open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
+ for my $x (qw(libgit2)) {
+ my $l = popen_rd([$pc, '--libs', $x], undef, $rdr);
+ $l = do { local $/; <$l> };
+ next if $?;
+ my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr);
+ $c = do { local $/; <$c> };
+ next if $?;
+
+ # note: we name C source files .h to prevent
+ # ExtUtils::MakeMaker from automatically trying to
+ # build them.
+ my $f = "$dir/gcf2_$x.h";
+ if (open(my $fh, '<', $f)) {
+ chomp($l, $c);
+ local $/;
+ defined($c_src = <$fh>) or die "read $f: $!\n";
+ $CFG{LIBS} = $l;
+ $CFG{CCFLAGSEX} = $c;
+ last;
+ } else {
+ die "E: $f: $!\n";
+ }
+ }
+ die "E: libgit2 not installed\n" unless $c_src;
+
+ # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
+ flock($lockfh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
+}
+
+# we use Capitalized and ALLCAPS for compatibility with old Inline::C
+use Inline C => Config => %CFG, BOOT => 'git_libgit2_init();';
+use Inline C => $c_src;
+undef $c_src;
+undef %CFG;
+undef $lockfh;
+
+sub add_alt ($$) {
+ my ($gcf2, $objdir) = @_;
+
+ # libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1
+ # in Debian) doesn't handle relative epochs properly when nested
+ # multiple levels. Add all the absolute paths to workaround it,
+ # since $EXTINDEX_DIR/ALL.git/objects/info/alternates uses absolute
+ # paths to reference $V2INBOX_DIR/all.git/objects and
+ # $V2INBOX_DIR/all.git/objects/info/alternates uses relative paths
+ # to refer to $V2INBOX_DIR/git/$EPOCH.git/objects
+ #
+ # See https://bugs.debian.org/975607
+ if (open(my $fh, '<', "$objdir/info/alternates")) {
+ chomp(my @abs_alt = grep(m!^/!, <$fh>));
+ $gcf2->add_alternate($_) for @abs_alt;
+ }
+ $gcf2->add_alternate($objdir);
+}
+
+# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop
+# (see lib/PublicInbox/Gcf2Client.pm)
+sub loop () {
+ my $gcf2 = new();
+ my %seen;
+ STDERR->autoflush(1);
+ STDOUT->autoflush(1);
+
+ while (<STDIN>) {
+ chomp;
+ my ($oid, $git_dir) = split(/ /, $_, 2);
+ $seen{$git_dir}++ or add_alt($gcf2, "$git_dir/objects");
+ if (!$gcf2->cat_oid(1, $oid)) {
+ # retry once if missing. We only get unabbreviated OIDs
+ # from SQLite or Xapian DBs, here, so malicious clients
+ # can't trigger excessive retries:
+ warn "I: $$ $oid missing, retrying in $git_dir\n";
+
+ $gcf2 = new();
+ %seen = ($git_dir => 1);
+ add_alt($gcf2, "$git_dir/objects");
+
+ if ($gcf2->cat_oid(1, $oid)) {
+ warn "I: $$ $oid found after retry\n";
+ } else {
+ warn "W: $$ $oid missing after retry\n";
+ print "$oid missing\n"; # mimic git-cat-file
+ }
+ }
+ }
+}
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# connects public-inbox processes to PublicInbox::Gcf2::loop()
+package PublicInbox::Gcf2Client;
+use strict;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Git;
+use PublicInbox::Gcf2; # fails if Inline::C or libgit2-dev isn't available
+use PublicInbox::Spawn qw(spawn);
+use Socket qw(AF_UNIX SOCK_STREAM);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
+# fields:
+# sock => socket to Gcf2::loop
+# The rest of these fields are compatible with what PublicInbox::Git
+# uses code-sharing
+# pid => PID of Gcf2::loop process
+# pid.owner => process which spawned {pid}
+# in => same as {sock}, for compatibility with PublicInbox::Git
+# inflight => array (see PublicInbox::Git)
+# cat_rbuf => scalarref, may be non-existent or empty
+sub new {
+ my ($rdr) = @_;
+ my $self = bless {}, __PACKAGE__;
+ # ensure the child process has the same @INC we do:
+ my $env = { PERL5LIB => join(':', @INC) };
+ my ($s1, $s2);
+ socketpair($s1, $s2, AF_UNIX, SOCK_STREAM, 0) or die "socketpair $!";
+ $rdr //= {};
+ $rdr->{0} = $rdr->{1} = $s2;
+ my $cmd = [$^X, qw[-MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop]];
+ $self->{'pid.owner'} = $$;
+ $self->{pid} = spawn($cmd, $env, $rdr);
+ $s1->blocking(0);
+ $self->{inflight} = [];
+ $self->{in} = $s1;
+ $self->SUPER::new($s1, EPOLLIN|EPOLLET);
+}
+
+sub fail {
+ my $self = shift;
+ $self->close; # PublicInbox::DS::close
+ PublicInbox::Git::fail($self, @_);
+}
+
+sub gcf2_async ($$$;$) {
+ my ($self, $req, $cb, $arg) = @_;
+ my $inflight = $self->{inflight} or return $self->close;
+
+ # {wbuf} is rare, I hope:
+ cat_async_step($self, $inflight) if $self->{wbuf};
+
+ $self->fail("gcf2c write: $!") if !$self->write($req) && !$self->{sock};
+ push @$inflight, $req, $cb, $arg;
+}
+
+# ensure PublicInbox::Git::cat_async_step never calls cat_async_retry
+sub alternates_changed {}
+
+# DS->EventLoop will call this
+sub event_step {
+ my ($self) = @_;
+ $self->flush_write;
+ $self->close if !$self->{in} || !$self->{sock}; # process died
+ my $inflight = $self->{inflight};
+ if ($inflight && @$inflight) {
+ cat_async_step($self, $inflight);
+ return $self->close unless $self->{in}; # process died
+
+ # ok, more to do, requeue for fairness
+ $self->requeue if @$inflight || exists($self->{cat_rbuf});
+ }
+}
+
+sub DESTROY {
+ my ($self) = @_;
+ delete $self->{sock}; # if outside EventLoop
+ PublicInbox::Git::DESTROY($self);
+}
+
+no warnings 'once';
+
+*cat_async_step = \&PublicInbox::Git::cat_async_step;
+
+1;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Wrap a pipe or file for PSGI streaming response bodies and calls the
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: GPLv2 or later <https://www.gnu.org/licenses/gpl-2.0.txt>
#
# Used to read files from a git repository without excessive forking.
use parent qw(Exporter);
use POSIX ();
use IO::Handle; # ->autoflush
-use Errno qw(EINTR);
+use Errno qw(EINTR EAGAIN);
use File::Glob qw(bsd_glob GLOB_NOSORT);
+use File::Spec ();
use Time::HiRes qw(stat);
use PublicInbox::Spawn qw(popen_rd);
use PublicInbox::Tmpfile;
+use IO::Poll qw(POLLIN);
use Carp qw(croak);
+use Digest::SHA ();
+use PublicInbox::DS qw(dwaitpid);
our @EXPORT_OK = qw(git_unquote git_quote);
our $PIPE_BUFSIZ = 65536; # Linux default
our $in_cleanup;
+our $RDTIMEO = 60_000; # milliseconds
use constant MAX_INFLIGHT =>
(($^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF()) * 3)
sub last_check_err {
my ($self) = @_;
my $fh = $self->{err_c} or return;
- sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+ sysseek($fh, 0, 0) or $self->fail("sysseek failed: $!");
defined(sysread($fh, my $buf, -s $fh)) or
- fail($self, "sysread failed: $!");
+ $self->fail("sysread failed: $!");
$buf;
}
if ($self->{$pid}) {
if (defined $err) { # "err_c"
my $fh = $self->{$err};
- sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
- truncate($fh, 0) or fail($self, "truncate failed: $!");
+ sysseek($fh, 0, 0) or $self->fail("sysseek failed: $!");
+ truncate($fh, 0) or $self->fail("truncate failed: $!");
}
return;
}
my ($out_r, $out_w);
- pipe($out_r, $out_w) or fail($self, "pipe failed: $!");
+ pipe($out_r, $out_w) or $self->fail("pipe failed: $!");
my @cmd = (qw(git), "--git-dir=$self->{git_dir}",
qw(-c core.abbrev=40 cat-file), $batch);
my $redir = { 0 => $out_r };
if ($err) {
my $id = "git.$self->{git_dir}$batch.err";
- my $fh = tmpfile($id) or fail($self, "tmpfile($id): $!");
+ my $fh = tmpfile($id) or $self->fail("tmpfile($id): $!");
$self->{$err} = $fh;
$redir->{2} = $fh;
}
my ($in_r, $p) = popen_rd(\@cmd, undef, $redir);
$self->{$pid} = $p;
+ $self->{"$pid.owner"} = $$;
$out_w->autoflush(1);
if ($^O eq 'linux') { # 1031: F_SETPIPE_SZ
fcntl($out_w, 1031, 4096);
$self->{$in} = $in_r;
}
+sub poll_in ($) { IO::Poll::_poll($RDTIMEO, fileno($_[0]), my $ev = POLLIN) }
+
sub my_read ($$$) {
my ($fh, $rbuf, $len) = @_;
my $left = $len - length($$rbuf);
$r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
if ($r) {
$left -= $r;
+ } elsif (defined($r)) { # EOF
+ return 0;
} else {
- next if (!defined($r) && $! == EINTR);
- return $r;
+ next if ($! == EAGAIN and poll_in($fh));
+ next if $! == EINTR; # may be set by sysread or poll_in
+ return; # unrecoverable error
}
}
\substr($$rbuf, 0, $len, '');
if ((my $n = index($$rbuf, "\n")) >= 0) {
return substr($$rbuf, 0, $n + 1, '');
}
- my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
- next if $r || (!defined($r) && $! == EINTR);
- return defined($r) ? '' : undef; # EOF or error
+ my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf))
+ and next;
+
+ # return whatever's left on EOF
+ return substr($$rbuf, 0, length($$rbuf)+1, '') if defined($r);
+
+ next if ($! == EAGAIN and poll_in($fh));
+ next if $! == EINTR; # may be set by sysread or poll_in
+ return; # unrecoverable error
}
}
for (my $i = 0; $i < @$inflight; $i += 3) {
$buf .= "$inflight->[$i]\n";
}
- print { $self->{out} } $buf or fail($self, "write error: $!");
+ print { $self->{out} } $buf or $self->fail("write error: $!");
unshift(@$inflight, \$req, $cb, $arg); # \$ref to indicate retried
cat_async_step($self, $inflight); # take one step
my $rbuf = delete($self->{cat_rbuf}) // \(my $new = '');
my ($bref, $oid, $type, $size);
my $head = my_readline($self->{in}, $rbuf);
+ # ->fail may be called via Gcf2Client.pm
if ($head =~ /^([0-9a-f]{40,}) (\S+) ([0-9]+)$/) {
($oid, $type, $size) = ($1, $2, $3 + 0);
$bref = my_read($self->{in}, $rbuf, $size + 1) or
- fail($self, defined($bref) ? 'read EOF' : "read: $!");
- chop($$bref) eq "\n" or fail($self, 'LF missing after blob');
- } elsif ($head =~ / missing$/) {
+ $self->fail(defined($bref) ? 'read EOF' : "read: $!");
+ chop($$bref) eq "\n" or $self->fail('LF missing after blob');
+ } elsif ($head =~ s/ missing\n//s) {
+ $oid = $head;
# ref($req) indicates it's already been retried
- if (!ref($req) && !$in_cleanup && alternates_changed($self)) {
+ # -gcf2 retries internally, so it never hits this path:
+ if (!ref($req) && !$in_cleanup && $self->alternates_changed) {
return cat_async_retry($self, $inflight,
$req, $cb, $arg);
}
$type = 'missing';
- $oid = ref($req) ? $$req : $req;
+ $oid = ref($req) ? $$req : $req if $oid eq '';
} else {
- fail($self, "Unexpected result from async git cat-file: $head");
+ my $err = $! ? " ($!)" : '';
+ $self->fail("bad result from async cat-file: $head$err");
}
- eval { $cb->($bref, $oid, $type, $size, $arg) };
$self->{cat_rbuf} = $rbuf if $$rbuf ne '';
+ eval { $cb->($bref, $oid, $type, $size, $arg) };
warn "E: $oid: $@\n" if $@;
}
sub cat_async_wait ($) {
my ($self) = @_;
- my $inflight = delete $self->{inflight} or return;
+ my $inflight = $self->{inflight} or return;
while (scalar(@$inflight)) {
cat_async_step($self, $inflight);
}
my ($self, $inflight_c) = @_;
die 'BUG: inflight empty or odd' if scalar(@$inflight_c) < 3;
my ($req, $cb, $arg) = splice(@$inflight_c, 0, 3);
- my $rbuf = delete($self->{rbuf_c}) // \(my $new = '');
+ my $rbuf = delete($self->{chk_rbuf}) // \(my $new = '');
chomp(my $line = my_readline($self->{in_c}, $rbuf));
my ($hex, $type, $size) = split(/ /, $line);
# https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
my $ret = my_read($self->{in_c}, $rbuf, $type + 1);
- fail($self, defined($ret) ? 'read EOF' : "read: $!") if !$ret;
+ $self->fail(defined($ret) ? 'read EOF' : "read: $!") if !$ret;
}
+ $self->{chk_rbuf} = $rbuf if $$rbuf ne '';
eval { $cb->($hex, $type, $size, $arg, $self) };
warn "E: check($req) $@\n" if $@;
- $self->{rbuf_c} = $rbuf if $$rbuf ne '';
}
sub check_async_wait ($) {
my ($self) = @_;
- my $inflight_c = delete $self->{inflight_c} or return;
+ my $inflight_c = $self->{inflight_c} or return;
while (scalar(@$inflight_c)) {
check_async_step($self, $inflight_c);
}
sub check_async ($$$$) {
my ($self, $oid, $cb, $arg) = @_;
my $inflight_c = $self->{inflight_c} // check_async_begin($self);
- if (scalar(@$inflight_c) >= MAX_INFLIGHT) {
+ while (scalar(@$inflight_c) >= MAX_INFLIGHT) {
check_async_step($self, $inflight_c);
}
- print { $self->{out_c} } $oid, "\n" or fail($self, "write error: $!");
+ print { $self->{out_c} } $oid, "\n" or $self->fail("write error: $!");
push(@$inflight_c, $oid, $cb, $arg);
}
sub _destroy {
my ($self, $rbuf, $in, $out, $pid, $err) = @_;
- my $p = delete $self->{$pid} or return;
delete @$self{($rbuf, $in, $out)};
delete $self->{$err} if $err; # `err_c'
- # PublicInbox::DS may not be loaded
- eval { PublicInbox::DS::dwaitpid($p, undef, undef) };
- waitpid($p, 0) if $@; # wait synchronously if not in event loop
+ # GitAsyncCat::event_step may delete {pid}
+ my $p = delete $self->{$pid} or return;
+ dwaitpid($p) if $$ == $self->{"$pid.owner"};
}
sub cat_async_abort ($) {
my ($self) = @_;
- my $inflight = delete $self->{inflight} or die 'BUG: not in async';
+ if (my $inflight = $self->{inflight}) {
+ while (@$inflight) {
+ my ($req, $cb, $arg) = splice(@$inflight, 0, 3);
+ $req =~ s/ .*//; # drop git_dir for Gcf2Client
+ eval { $cb->(undef, $req, undef, undef, $arg) };
+ warn "E: $req: $@ (in abort)\n" if $@;
+ }
+ delete $self->{cat_rbuf};
+ delete $self->{inflight};
+ }
cleanup($self);
}
-sub fail {
+sub fail { # may be augmented in subclasses
my ($self, $msg) = @_;
- $self->{inflight} ? cat_async_abort($self) : cleanup($self);
- croak("git $self->{git_dir}: $msg");
+ cat_async_abort($self);
+ croak(ref($self) . ' ' . ($self->{git_dir} // '') . ": $msg");
}
+# $git->popen(qw(show f00)); # or
+# $git->popen(qw(show f00), { GIT_CONFIG => ... }, { 2 => ... });
sub popen {
- my ($self, @cmd) = @_;
- @cmd = ('git', "--git-dir=$self->{git_dir}", @cmd);
- popen_rd(\@cmd);
+ my ($self, $cmd) = splice(@_, 0, 2);
+ $cmd = [ 'git', "--git-dir=$self->{git_dir}",
+ ref($cmd) ? @$cmd : ($cmd, grep { defined && !ref } @_) ];
+ popen_rd($cmd, grep { !defined || ref } @_); # env and opt
}
+# same args as popen above
sub qx {
- my ($self, @cmd) = @_;
- my $fh = $self->popen(@cmd);
- local $/ = "\n";
- return <$fh> if wantarray;
- local $/;
- <$fh>
+ my $self = shift;
+ my $fh = $self->popen(@_);
+ if (wantarray) {
+ local $/ = "\n";
+ my @ret = <$fh>;
+ close $fh; # caller should check $?
+ @ret;
+ } else {
+ local $/;
+ my $ret = <$fh>;
+ close $fh; # caller should check $?
+ $ret;
+ }
+}
+
+# check_async and cat_async may trigger the other, so ensure they're
+# both completely done by using this:
+sub async_wait_all ($) {
+ my ($self) = @_;
+ while (scalar(@{$self->{inflight_c} // []}) ||
+ scalar(@{$self->{inflight} // []})) {
+ $self->check_async_wait;
+ $self->cat_async_wait;
+ }
}
# returns true if there are pending "git cat-file" processes
my ($self) = @_;
local $in_cleanup = 1;
delete $self->{async_cat};
- check_async_wait($self);
- cat_async_wait($self);
+ async_wait_all($self);
+ delete $self->{inflight};
+ delete $self->{inflight_c};
_destroy($self, qw(cat_rbuf in out pid));
_destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
!!($self->{pid} || $self->{pid_c});
}
+
# assuming a well-maintained repo, this should be a somewhat
# accurate estimation of its size
# TODO: show this in the WWW UI as a hint to potential cloners
sub cat_async_begin {
my ($self) = @_;
- cleanup($self) if alternates_changed($self);
- batch_prepare($self);
+ cleanup($self) if $self->alternates_changed;
+ $self->batch_prepare;
die 'BUG: already in async' if $self->{inflight};
$self->{inflight} = [];
}
sub cat_async ($$$;$) {
my ($self, $oid, $cb, $arg) = @_;
my $inflight = $self->{inflight} // cat_async_begin($self);
- if (scalar(@$inflight) >= MAX_INFLIGHT) {
+ while (scalar(@$inflight) >= MAX_INFLIGHT) {
cat_async_step($self, $inflight);
}
-
- print { $self->{out} } $oid, "\n" or fail($self, "write error: $!");
+ print { $self->{out} } $oid, "\n" or $self->fail("write error: $!");
push(@$inflight, $oid, $cb, $arg);
}
-# this is safe to call inside $cb, but not guaranteed to enqueue
-# returns true if successful, undef if not.
sub async_prefetch {
my ($self, $oid, $cb, $arg) = @_;
- if (defined($self->{async_cat}) && (my $inflight = $self->{inflight})) {
+ if (my $inflight = $self->{inflight}) {
# we could use MAX_INFLIGHT here w/o the halving,
# but lets not allow one client to monopolize a git process
if (scalar(@$inflight) < int(MAX_INFLIGHT/2)) {
print { $self->{out} } $oid, "\n" or
- fail($self, "write error: $!");
+ $self->fail("write error: $!");
return push(@$inflight, $oid, $cb, $arg);
}
}
$modified || time;
}
+# for grokmirror, which doesn't read gitweb.description
+# templates/hooks--update.sample and git-multimail in git.git
+# only match "Unnamed repository", not the full contents of
+# templates/this--description in git.git
+sub manifest_entry {
+ my ($self, $epoch, $default_desc) = @_;
+ my $fh = $self->popen('show-ref');
+ my $dig = Digest::SHA->new(1);
+ while (read($fh, my $buf, 65536)) {
+ $dig->add($buf);
+ }
+ close $fh or return; # empty, uninitialized git repo
+ undef $fh; # for open, below
+ my $git_dir = $self->{git_dir};
+ my $ent = {
+ fingerprint => $dig->hexdigest,
+ reference => undef,
+ modified => modified($self),
+ };
+ chomp(my $owner = $self->qx('config', 'gitweb.owner'));
+ utf8::decode($owner);
+ $ent->{owner} = $owner eq '' ? undef : $owner;
+ my $desc = '';
+ if (open($fh, '<', "$git_dir/description")) {
+ local $/ = "\n";
+ chomp($desc = <$fh>);
+ utf8::decode($desc);
+ }
+ $desc = 'Unnamed repository' if $desc eq '';
+ if (defined $epoch && $desc =~ /\AUnnamed repository/) {
+ $desc = "$default_desc [epoch $epoch]";
+ }
+ $ent->{description} = $desc;
+ if (open($fh, '<', "$git_dir/objects/info/alternates")) {
+ # n.b.: GitPython doesn't seem to handle comments or C-quoted
+ # strings like native git does; and we don't for now, either.
+ local $/ = "\n";
+ chomp(my @alt = <$fh>);
+
+ # grokmirror only supports 1 alternate for "reference",
+ if (scalar(@alt) == 1) {
+ my $objdir = "$git_dir/objects";
+ my $ref = File::Spec->rel2abs($alt[0], $objdir);
+ $ref =~ s!/[^/]+/?\z!!; # basename
+ $ent->{reference} = $ref;
+ }
+ }
+ $ent;
+}
+
1;
__END__
=pod
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# internal class used by PublicInbox::Git + PublicInbox::DS
# This parses the output pipe of "git cat-file --batch"
-#
-# Note: this does NOT set the non-blocking flag, we expect `git cat-file'
-# to be a local process, and git won't start writing a blob until it's
-# fully read. So minimize context switching and read as much as possible
-# and avoid holding a buffer in our heap any longer than it has to live.
package PublicInbox::GitAsyncCat;
use strict;
use parent qw(PublicInbox::DS Exporter);
+use POSIX qw(WNOHANG);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-our @EXPORT = qw(git_async_cat);
+our @EXPORT = qw(git_async_cat git_async_prefetch);
+use PublicInbox::Git ();
+
+our $GCF2C; # singleton PublicInbox::Gcf2Client
-sub _add {
- my ($class, $git) = @_;
- $git->batch_prepare;
- my $self = bless { git => $git }, $class;
- $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
- \undef; # this is a true ref()
+sub close {
+ my ($self) = @_;
+ if (my $git = delete $self->{git}) {
+ $git->cat_async_abort;
+ }
+ $self->SUPER::close; # PublicInbox::DS::close
}
sub event_step {
my ($self) = @_;
- my $git = $self->{git};
+ my $git = $self->{git} or return;
return $self->close if ($git->{in} // 0) != ($self->{sock} // 1);
my $inflight = $git->{inflight};
if ($inflight && @$inflight) {
$git->cat_async_step($inflight);
- $self->requeue if @$inflight || exists $git->{cat_rbuf};
+
+ # child death?
+ if (($git->{in} // 0) != ($self->{sock} // 1)) {
+ $self->close;
+ } elsif (@$inflight || exists $git->{cat_rbuf}) {
+ # ok, more to do, requeue for fairness
+ $self->requeue;
+ }
+ } elsif ((my $pid = waitpid($git->{pid}, WNOHANG)) > 0) {
+ # May happen if the child process is killed by a BOFH
+ # (or segfaults)
+ delete $git->{pid};
+ warn "E: git $pid exited with \$?=$?\n";
+ $self->close;
}
}
sub git_async_cat ($$$$) {
my ($git, $oid, $cb, $arg) = @_;
- $git->cat_async($oid, $cb, $arg);
- $git->{async_cat} //= _add(__PACKAGE__, $git);
+ if ($GCF2C //= eval {
+ require PublicInbox::Gcf2Client;
+ PublicInbox::Gcf2Client::new();
+ } // 0) { # 0: do not retry if libgit2 or Inline::C are missing
+ $GCF2C->gcf2_async(\"$oid $git->{git_dir}\n", $cb, $arg);
+ \undef;
+ } else { # read-only end of git-cat-file pipe
+ $git->cat_async($oid, $cb, $arg);
+ $git->{async_cat} //= do {
+ my $self = bless { git => $git }, __PACKAGE__;
+ $git->{in}->blocking(0);
+ $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
+ \undef; # this is a true ref()
+ };
+ }
+}
+
+# this is safe to call inside $cb, but not guaranteed to enqueue
+# returns true if successful, undef if not.
+sub git_async_prefetch {
+ my ($git, $oid, $cb, $arg) = @_;
+ if ($GCF2C) {
+ if (!$GCF2C->{wbuf}) {
+ $oid .= " $git->{git_dir}\n";
+ return $GCF2C->gcf2_async(\$oid, $cb, $arg); # true
+ }
+ } elsif ($git->{async_cat} && (my $inflight = $git->{inflight})) {
+ # we could use MAX_INFLIGHT here w/o the halving,
+ # but lets not allow one client to monopolize a git process
+ if (@$inflight < int(PublicInbox::Git::MAX_INFLIGHT/2)) {
+ print { $git->{out} } $oid, "\n" or
+ $git->fail("write error: $!");
+ return push(@$inflight, $oid, $cb, $arg);
+ }
+ }
+ undef;
}
1;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::GitCredential;
use strict;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# when no endpoints match, fallback to this and serve a static file
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# In public-inbox <=1.5.0, public-inbox-httpd favored "getline"
if (!defined($oid)) {
# it's possible to have TOCTOU if an admin runs
# public-inbox-(edit|purge), just move onto the next message
- warn "E: $smsg->{blob} missing in $self->{-inbox}->{inboxdir}\n";
+ warn "E: $smsg->{blob} missing in $self->{ibx}->{inboxdir}\n";
return $http->next_step($self->can('async_next'));
}
$smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid");
sub smsg_blob {
my ($self, $smsg) = @_;
- git_async_cat($self->{-inbox}->git, $smsg->{blob},
+ git_async_cat($self->{ibx}->git, $smsg->{blob},
\&async_blob_cb, $self);
}
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Generic PSGI server for convenience. It aims to provide
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# wraps a listen socket for HTTP and links it to the PSGI app in
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# XXX This is a totally unstable API for public-inbox internal use only
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# I have no idea how stable or safe this is for handling untrusted
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# represents a header value in various forms. Used for HTML generation
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Each instance of this represents an IMAP client connected to
}
my $pre;
if (!$self->{wbuf} && (my $nxt = $msgs->[0])) {
- $pre = $ibx->git->async_prefetch($nxt->{blob},
+ $pre = git_async_prefetch($ibx->git, $nxt->{blob},
\&fetch_blob_cb, $fetch_arg);
}
fetch_run_ops($self, $smsg, $bref, $ops, $partial);
1; # more
}
-sub parse_query ($$) {
+sub parse_imap_query ($$) {
my ($self, $query) = @_;
my $q = PublicInbox::IMAPsearchqp::parse($self, $query);
if (ref($q)) {
$q;
}
-sub refill_xap ($$$$) {
- my ($self, $uids, $range_info, $q) = @_;
- my ($beg, $end) = @$range_info;
- my $srch = $self->{ibx}->search;
- my $opt = { mset => 2, limit => 1000 };
- my $mset = $srch->mset("$q uid:$beg..$end", $opt);
- @$uids = @{$srch->mset_to_artnums($mset)};
- if (@$uids) {
- $range_info->[0] = $uids->[-1] + 1; # update $beg
- return; # possibly more
- }
- 0; # all done
-}
-
-sub search_xap_range { # long_response
- my ($self, $tag, $q, $range_info, $want_msn) = @_;
- my $uids = [];
- if (defined(my $err = refill_xap($self, $uids, $range_info, $q))) {
- $err ||= 'OK Search done';
- $self->write("\r\n$tag $err\r\n");
- return;
- }
- msn_convert($self, $uids) if $want_msn;
- $self->msg_more(join(' ', '', @$uids));
- 1; # more
-}
-
sub search_common {
my ($self, $tag, $query, $want_msn) = @_;
my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
- my $q = parse_query($self, $query);
+ my $q = parse_imap_query($self, $query);
return "$tag $q\r\n" if !ref($q);
my ($sql, $range_info) = delete @$q{qw(sql range_info)};
if (!scalar(keys %$q)) { # overview.sqlite3
long_response($self, \&search_uid_range,
$tag, $sql, $range_info, $want_msn);
} elsif ($q = $q->{xap}) {
- $self->{ibx}->search or
+ my $srch = $self->{ibx}->isrch or
return "$tag BAD search not available for mailbox\r\n";
- $self->msg_more('* SEARCH');
- long_response($self, \&search_xap_range,
- $tag, $q, $range_info, $want_msn);
+ my $opt = {
+ relevance => -1,
+ limit => UID_SLICE,
+ uid_range => $range_info
+ };
+ my $mset = $srch->mset($q, $opt);
+ my $uids = $srch->mset_to_artnums($mset, $opt);
+ msn_convert($self, $uids) if scalar(@$uids) && $want_msn;
+ "* SEARCH @$uids\r\n$tag OK Search done\r\n";
} else {
"$tag BAD Error\r\n";
}
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# represents an IMAPD (currently a singleton),
err => \*STDERR,
out => \*STDOUT,
# accept_tls => { SSL_server => 1, ..., SSL_reuse_ctx => ... }
- # pi_config => PublicInbox::Config
+ # pi_cfg => PublicInbox::Config
# idler => PublicInbox::InboxIdle
}, $class;
}
-sub imapd_refresh_ibx { # pi_config->each_inbox cb
+sub imapd_refresh_ibx { # pi_cfg->each_inbox cb
my ($ibx, $imapd) = @_;
my $ngname = $ibx->{newsgroup} or return;
- if (ref $ngname) {
- warn 'multiple newsgroups not supported: '.
- join(', ', @$ngname). "\n";
- return;
- } elsif ($ngname =~ m![^a-z0-9/_\.\-\~\@\+\=:]! ||
- $ngname =~ /\.[0-9]+\z/) {
+
+ # We require lower-case since IMAP mailbox names are
+ # case-insensitive (but -nntpd matches INN in being
+ # case-sensitive
+ if ($ngname =~ m![^a-z0-9/_\.\-\~\@\+\=:]! ||
+ # don't confuse with 50K slices
+ $ngname =~ /\.[0-9]+\z/) {
warn "mailbox name invalid: newsgroup=`$ngname'\n";
return;
}
$ibx->over or return;
$ibx->{over} = undef;
- my $mm = $ibx->mm or return;
- $ibx->{mm} = undef;
# RFC 3501 2.3.1.1 - "A good UIDVALIDITY value to use in
# this case is a 32-bit representation of the creation
# date/time of the mailbox"
- defined($ibx->{uidvalidity} = $mm->created_at) or return;
- PublicInbox::IMAP::ensure_slices_exist($imapd, $ibx, $mm->max // 0);
+ eval { $ibx->uidvalidity };
+ my $mm = delete($ibx->{mm}) or return;
+ defined($ibx->{uidvalidity}) or return;
+ PublicInbox::IMAP::ensure_slices_exist($imapd, $ibx, $mm->max);
# preload to avoid fragmentation:
$ibx->description;
}
sub imapd_refresh_finalize {
- my ($imapd, $pi_config) = @_;
+ my ($imapd, $pi_cfg) = @_;
my $mailboxes;
if (my $next = delete $imapd->{imapd_next}) {
$imapd->{mailboxes} = delete $next->{mailboxes};
qq[* LIST (\\Has${no}Children) "." $u\r\n]
} keys %$mailboxes
];
- $imapd->{pi_config} = $pi_config;
+ $imapd->{pi_cfg} = $pi_cfg;
if (my $idler = $imapd->{idler}) {
- $idler->refresh($pi_config);
+ $idler->refresh($pi_cfg);
}
}
-sub imapd_refresh_step { # pi_config->iterate_start cb
- my ($pi_config, $section, $imapd) = @_;
+sub imapd_refresh_step { # pi_cfg->iterate_start cb
+ my ($pi_cfg, $section, $imapd) = @_;
if (defined($section)) {
return if $section !~ m!\Apublicinbox\.([^/]+)\z!;
- my $ibx = $pi_config->lookup_name($1) or return;
+ my $ibx = $pi_cfg->lookup_name($1) or return;
imapd_refresh_ibx($ibx, $imapd->{imapd_next});
} else { # undef == "EOF"
- imapd_refresh_finalize($imapd, $pi_config);
+ imapd_refresh_finalize($imapd, $pi_cfg);
}
}
sub refresh_groups {
my ($self, $sig) = @_;
- my $pi_config = PublicInbox::Config->new;
+ my $pi_cfg = PublicInbox::Config->new;
if ($sig) { # SIGHUP is handled through the event loop
$self->{imapd_next} = { dummies => {}, mailboxes => {} };
- my $iter = PublicInbox::ConfigIter->new($pi_config,
+ my $iter = PublicInbox::ConfigIter->new($pi_cfg,
\&imapd_refresh_step, $self);
$iter->event_step;
} else { # initial start is synchronous
$self->{dummies} = {};
- $pi_config->each_inbox(\&imapd_refresh_ibx, $self);
- imapd_refresh_finalize($self, $pi_config);
+ $pi_cfg->each_inbox(\&imapd_refresh_ibx, $self);
+ imapd_refresh_finalize($self, $pi_cfg);
}
}
sub idler_start {
- $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_config});
+ $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_cfg});
}
1;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::IMAPTracker;
use strict;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# TODO: reduce duplication from PublicInbox::NNTPdeflate
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# IMAP search query parser. cf RFC 3501
my ($self, $item) = @_;
my $ts = yyyymmdd($item);
my $end = $ts + 86399; # no leap day
- push @{$self->{xap}}, "ts:$ts..$end";
+ push @{$self->{xap}}, "rt:$ts..$end";
my $sql = $self->{sql} or return 1;
$$sql .= " AND ts >= $ts AND ts <= $end";
}
sub BEFORE {
my ($self, $item) = @_;
my $ts = yyyymmdd($item);
- push @{$self->{xap}}, "ts:..$ts";
+ push @{$self->{xap}}, "rt:..$ts";
my $sql = $self->{sql} or return 1;
$$sql .= " AND ts <= $ts";
}
sub SINCE {
my ($self, $item) = @_;
my $ts = yyyymmdd($item);
- push @{$self->{xap}}, "ts:$ts..";
+ push @{$self->{xap}}, "rt:$ts..";
my $sql = $self->{sql} or return 1;
$$sql .= " AND ts >= $ts";
}
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# base class for remote IPC calls and workqueues, requires Storable or Sereal
+package PublicInbox::IPC;
+use strict;
+use v5.10.1;
+use Carp qw(confess croak);
+use PublicInbox::DS qw(dwaitpid);
+use PublicInbox::Spawn;
+use POSIX qw(mkfifo WNOHANG);
+use Socket qw(AF_UNIX MSG_EOR SOCK_STREAM);
+use Errno qw(EMSGSIZE);
+use File::Temp 0.19 (); # 0.19 for ->newdir
+my $SEQPACKET = eval { Socket::SOCK_SEQPACKET() }; # portable enough?
+use constant PIPE_BUF => $^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF();
+my $WQ_MAX_WORKERS = 4096;
+my ($enc, $dec);
+# ->imports at BEGIN turns sereal_*_with_object into custom ops on 5.14+
+# and eliminate method call overhead
+BEGIN {
+ eval {
+ require Sereal::Encoder;
+ require Sereal::Decoder;
+ Sereal::Encoder->import('sereal_encode_with_object');
+ Sereal::Decoder->import('sereal_decode_with_object');
+ ($enc, $dec) = (Sereal::Encoder->new, Sereal::Decoder->new);
+ };
+};
+
+if ($enc && $dec) { # should be custom ops
+ *freeze = sub ($) { sereal_encode_with_object $enc, $_[0] };
+ *thaw = sub ($) { sereal_decode_with_object $dec, $_[0], my $ret };
+} else {
+ eval { # some distros have Storable as a separate package from Perl
+ require Storable;
+ Storable->import(qw(freeze thaw));
+ $enc = 1;
+ } // warn("Storable (part of Perl) missing: $@\n");
+}
+
+my $recv_cmd = PublicInbox::Spawn->can('recv_cmd4');
+my $send_cmd = PublicInbox::Spawn->can('send_cmd4') // do {
+ require PublicInbox::CmdIPC4;
+ $recv_cmd //= PublicInbox::CmdIPC4->can('recv_cmd4');
+ PublicInbox::CmdIPC4->can('send_cmd4');
+};
+
+sub _get_rec ($) {
+ my ($r) = @_;
+ defined(my $len = <$r>) or return;
+ chop($len) eq "\n" or croak "no LF byte in $len";
+ defined(my $n = read($r, my $buf, $len)) or croak "read error: $!";
+ $n == $len or croak "short read: $n != $len";
+ thaw($buf);
+}
+
+sub _pack_rec ($) {
+ my ($ref) = @_;
+ my $buf = freeze($ref);
+ length($buf) . "\n" . $buf;
+}
+
+sub _send_rec ($$) {
+ my ($w, $ref) = @_;
+ print $w _pack_rec($ref) or croak "print: $!";
+}
+
+sub ipc_return ($$$) {
+ my ($w, $ret, $exc) = @_;
+ _send_rec($w, $exc ? bless(\$exc, 'PublicInbox::IPC::Die') : $ret);
+}
+
+sub ipc_worker_loop ($$$) {
+ my ($self, $r_req, $w_res) = @_;
+ my ($rec, $wantarray, $sub, @args);
+ local $/ = "\n";
+ while ($rec = _get_rec($r_req)) {
+ ($wantarray, $sub, @args) = @$rec;
+ # no waiting if client doesn't care,
+ # this is the overwhelmingly likely case
+ if (!defined($wantarray)) {
+ eval { $self->$sub(@args) };
+ warn "$$ die: $@ (from nowait $sub)\n" if $@;
+ } elsif ($wantarray) {
+ my @ret = eval { $self->$sub(@args) };
+ ipc_return($w_res, \@ret, $@);
+ } else { # '' => wantscalar
+ my $ret = eval { $self->$sub(@args) };
+ ipc_return($w_res, \$ret, $@);
+ }
+ }
+}
+
+# starts a worker if Sereal or Storable is installed
+sub ipc_worker_spawn {
+ my ($self, $ident, $oldset) = @_;
+ return unless $enc; # no Sereal or Storable
+ return if ($self->{-ipc_ppid} // -1) == $$; # idempotent
+ delete(@$self{qw(-ipc_req -ipc_res -ipc_ppid -ipc_pid)});
+ pipe(my ($r_req, $w_req)) or die "pipe: $!";
+ pipe(my ($r_res, $w_res)) or die "pipe: $!";
+ my $sigset = $oldset // PublicInbox::DS::block_signals();
+ $self->ipc_atfork_prepare;
+ my $seed = rand(0xffffffff);
+ my $pid = fork // die "fork: $!";
+ if ($pid == 0) {
+ srand($seed);
+ eval { PublicInbox::DS->Reset };
+ delete @$self{qw(-wq_s1 -wq_workers -wq_ppid)};
+ $w_req = $r_res = undef;
+ $w_res->autoflush(1);
+ $SIG{$_} = 'IGNORE' for (qw(TERM INT QUIT));
+ local $0 = $ident;
+ PublicInbox::DS::sig_setmask($sigset);
+ my $on_destroy = $self->ipc_atfork_child;
+ eval { ipc_worker_loop($self, $r_req, $w_res) };
+ die "worker $ident PID:$$ died: $@\n" if $@;
+ exit;
+ }
+ PublicInbox::DS::sig_setmask($sigset) unless $oldset;
+ $r_req = $w_res = undef;
+ $w_req->autoflush(1);
+ $self->{-ipc_req} = $w_req;
+ $self->{-ipc_res} = $r_res;
+ $self->{-ipc_ppid} = $$;
+ $self->{-ipc_pid} = $pid;
+}
+
+sub ipc_worker_reap { # dwaitpid callback
+ my ($self, $pid) = @_;
+ # SIGTERM (15) is our default exit signal
+ warn "PID:$pid died with \$?=$?\n" if $? && ($? & 127) != 15;
+}
+
+sub wq_wait_old {
+ my ($self) = @_;
+ my $pids = delete $self->{"-wq_old_pids.$$"} or return;
+ dwaitpid($_, \&ipc_worker_reap, $self) for @$pids;
+}
+
+# for base class, override in sub classes
+sub ipc_atfork_prepare {}
+
+sub ipc_atfork_child {
+ my ($self) = @_;
+ my $io = delete($self->{-ipc_atfork_child_close}) or return;
+ close($_) for @$io;
+ undef;
+}
+
+# idempotent, can be called regardless of whether worker is active or not
+sub ipc_worker_stop {
+ my ($self) = @_;
+ my ($pid, $ppid) = delete(@$self{qw(-ipc_pid -ipc_ppid)});
+ my ($w_req, $r_res) = delete(@$self{qw(-ipc_req -ipc_res)});
+ if (!$w_req && !$r_res) {
+ die "unexpected PID:$pid without IPC pipes" if $pid;
+ return; # idempotent
+ }
+ die 'no PID with IPC pipes' unless $pid;
+ $w_req = $r_res = undef;
+
+ return if $$ != $ppid;
+ dwaitpid($pid, \&ipc_worker_reap, $self);
+}
+
+# use this if we have multiple readers reading curl or "pigz -dc"
+# and writing to the same store
+sub ipc_lock_init {
+ my ($self, $f) = @_;
+ require PublicInbox::Lock;
+ $self->{-ipc_lock} //= bless { lock_path => $f }, 'PublicInbox::Lock'
+}
+
+sub ipc_async_wait ($$) {
+ my ($self, $max) = @_; # max == -1 to wait for all
+ my $aif = $self->{-async_inflight} or return;
+ my $r_res = $self->{-ipc_res} or die 'BUG: no ipc_res';
+ while (my ($sub, $bytes, $cb, $cb_arg) = splice(@$aif, 0, 4)) {
+ my $ret = _get_rec($r_res) //
+ die "no response on $sub (req.size=$bytes)";
+ $self->{-async_inflight_bytes} -= $bytes;
+
+ eval { $cb->($cb_arg, $ret) };
+ warn "E: $sub callback error: $@\n" if $@;
+ return if --$max == 0;
+ }
+}
+
+# call $self->$sub(@args), on a worker if ipc_worker_spawn was used
+sub ipc_do {
+ my ($self, $sub, @args) = @_;
+ if (my $w_req = $self->{-ipc_req}) { # run in worker
+ my $ipc_lock = $self->{-ipc_lock};
+ my $lock = $ipc_lock ? $ipc_lock->lock_for_scope : undef;
+ if (defined(wantarray)) {
+ my $r_res = $self->{-ipc_res} or die 'BUG: no ipc_res';
+ ipc_async_wait($self, -1);
+ _send_rec($w_req, [ wantarray, $sub, @args ]);
+ my $ret = _get_rec($r_res) // die "no response on $sub";
+ die $$ret if ref($ret) eq 'PublicInbox::IPC::Die';
+ wantarray ? @$ret : $$ret;
+ } else { # likely, fire-and-forget into pipe
+ _send_rec($w_req, [ undef , $sub, @args ]);
+ }
+ } else { # run locally
+ $self->$sub(@args);
+ }
+}
+
+sub ipc_async {
+ my ($self, $sub, $sub_args, $cb, $cb_arg) = @_;
+ if (my $w_req = $self->{-ipc_req}) { # run in worker
+ my $rec = _pack_rec([ 1, $sub, @$sub_args ]);
+ my $cur_bytes = \($self->{-async_inflight_bytes} //= 0);
+ while (($$cur_bytes + length($rec)) > PIPE_BUF) {
+ ipc_async_wait($self, 1);
+ }
+ my $ipc_lock = $self->{-ipc_lock};
+ my $lock = $ipc_lock ? $ipc_lock->lock_for_scope : undef;
+ print $w_req $rec or croak "print: $!";
+ $$cur_bytes += length($rec);
+ push @{$self->{-async_inflight}},
+ $sub, length($rec), $cb, $cb_arg;
+ } else {
+ my $ret = [ eval { $self->$sub(@$sub_args) } ];
+ if (my $exc = $@) {
+ $ret = ( bless(\$exc, 'PublicInbox::IPC::Die') );
+ }
+ eval { $cb->($cb_arg, $ret) };
+ warn "E: $sub callback error: $@\n" if $@;
+ }
+}
+
+# needed when there's multiple IPC workers and the parent forking
+# causes newer siblings to inherit older siblings sockets
+sub ipc_sibling_atfork_child {
+ my ($self) = @_;
+ my ($pid, undef) = delete(@$self{qw(-ipc_pid -ipc_ppid)});
+ delete(@$self{qw(-ipc_req -ipc_res)});
+ $pid == $$ and die "BUG: $$ ipc_atfork_child called on itself";
+}
+
+sub _recv_and_run {
+ my ($self, $s2, $len, $full_stream) = @_;
+ my @fds = $recv_cmd->($s2, my $buf, $len);
+ my $n = length($buf // '') or return;
+ my $nfd = 0;
+ for my $fd (@fds) {
+ if (open(my $cmdfh, '+<&=', $fd)) {
+ $self->{$nfd++} = $cmdfh;
+ $cmdfh->autoflush(1);
+ } else {
+ die "$$ open(+<&=$fd) (FD:$nfd): $!";
+ }
+ }
+ while ($full_stream && $n < $len) {
+ my $r = sysread($s2, $buf, $len - $n, $n) // croak "read: $!";
+ croak "read EOF after $n/$len bytes" if $r == 0;
+ $n = length($buf);
+ }
+ # Sereal dies on truncated data, Storable returns undef
+ my $args = thaw($buf) // die "thaw error on buffer of size: $n";
+ undef $buf;
+ my $sub = shift @$args;
+ eval { $self->$sub(@$args) };
+ warn "$$ wq_worker: $@" if $@ && ref($@) ne 'PublicInbox::SIGPIPE';
+ delete @$self{0..($nfd-1)};
+ $n;
+}
+
+sub wq_worker_loop ($) {
+ my ($self) = @_;
+ my $len = $self->{wq_req_len} // (4096 * 33);
+ my $s2 = $self->{-wq_s2} // die 'BUG: no -wq_s2';
+ 1 while (_recv_and_run($self, $s2, $len));
+}
+
+sub do_sock_stream { # via wq_do, for big requests
+ my ($self, $len) = @_;
+ _recv_and_run($self, delete $self->{0}, $len, 1);
+}
+
+sub wq_do { # always async
+ my ($self, $sub, $ios, @args) = @_;
+ if (my $s1 = $self->{-wq_s1}) { # run in worker
+ my $fds = [ map { fileno($_) } @$ios ];
+ my $n = $send_cmd->($s1, $fds, freeze([$sub, @args]), MSG_EOR);
+ return if defined($n);
+ croak "sendmsg error: $!" if $! != EMSGSIZE;
+ socketpair(my $r, my $w, AF_UNIX, SOCK_STREAM, 0) or
+ croak "socketpair: $!";
+ my $buf = freeze([$sub, @args]);
+ $n = $send_cmd->($s1, [ fileno($r) ],
+ freeze(['do_sock_stream', length($buf)]),
+ MSG_EOR) // croak "sendmsg: $!";
+ undef $r;
+ $n = $send_cmd->($w, $fds, $buf, 0) // croak "sendmsg: $!";
+ while ($n < length($buf)) {
+ my $x = syswrite($w, $buf, length($buf) - $n, $n) //
+ croak "syswrite: $!";
+ croak "syswrite wrote 0 bytes" if $x == 0;
+ $n += $x;
+ }
+ } else {
+ @$self{0..$#$ios} = @$ios;
+ eval { $self->$sub(@args) };
+ warn "wq_do: $@" if $@ && ref($@) ne 'PublicInbox::SIGPIPE';
+ delete @$self{0..$#$ios}; # don't close
+ }
+}
+
+sub _wq_worker_start ($$) {
+ my ($self, $oldset) = @_;
+ my $seed = rand(0xffffffff);
+ my $pid = fork // die "fork: $!";
+ if ($pid == 0) {
+ srand($seed);
+ eval { PublicInbox::DS->Reset };
+ delete @$self{qw(-wq_s1 -wq_workers -wq_ppid)};
+ $SIG{$_} = 'IGNORE' for (qw(PIPE TTOU TTIN));
+ $SIG{$_} = 'DEFAULT' for (qw(TERM QUIT INT CHLD));
+ local $0 = $self->{-wq_ident};
+ PublicInbox::DS::sig_setmask($oldset);
+ # ensure we properly exit even if warn() dies:
+ my $end = PublicInbox::OnDestroy->new($$, sub { exit(!!$@) });
+ my $on_destroy = $self->ipc_atfork_child;
+ eval { wq_worker_loop($self) };
+ warn "worker $self->{-wq_ident} PID:$$ died: $@" if $@;
+ undef $on_destroy;
+ undef $end; # trigger exit
+ } else {
+ $self->{-wq_workers}->{$pid} = \undef;
+ }
+}
+
+# starts workqueue workers if Sereal or Storable is installed
+sub wq_workers_start {
+ my ($self, $ident, $nr_workers, $oldset) = @_;
+ ($enc && $send_cmd && $recv_cmd && defined($SEQPACKET)) or return;
+ return if $self->{-wq_s1}; # idempotent
+ $self->{-wq_s1} = $self->{-wq_s2} = undef;
+ socketpair($self->{-wq_s1}, $self->{-wq_s2}, AF_UNIX, $SEQPACKET, 0) or
+ die "socketpair: $!";
+ $self->ipc_atfork_prepare;
+ $nr_workers //= 4;
+ $nr_workers = $WQ_MAX_WORKERS if $nr_workers > $WQ_MAX_WORKERS;
+ my $sigset = $oldset // PublicInbox::DS::block_signals();
+ $self->{-wq_workers} = {};
+ $self->{-wq_ident} = $ident;
+ _wq_worker_start($self, $sigset) for (1..$nr_workers);
+ PublicInbox::DS::sig_setmask($sigset) unless $oldset;
+ $self->{-wq_ppid} = $$;
+}
+
+sub wq_worker_incr { # SIGTTIN handler
+ my ($self, $oldset) = @_;
+ $self->{-wq_s2} or return;
+ return if wq_workers($self) >= $WQ_MAX_WORKERS;
+ $self->ipc_atfork_prepare;
+ my $sigset = $oldset // PublicInbox::DS::block_signals();
+ _wq_worker_start($self, $sigset);
+ PublicInbox::DS::sig_setmask($sigset) unless $oldset;
+}
+
+sub wq_exit { # wakes up wq_worker_decr_wait
+ send($_[0]->{-wq_s2}, $$, MSG_EOR) // die "$$ send: $!";
+ exit;
+}
+
+sub wq_worker_decr { # SIGTTOU handler, kills first idle worker
+ my ($self) = @_;
+ return unless wq_workers($self);
+ my $s2 = $self->{-wq_s2} // die 'BUG: no wq_s2';
+ $self->wq_do('wq_exit', [ $s2, $s2, $s2 ]);
+ # caller must call wq_worker_decr_wait in main loop
+}
+
+sub wq_worker_decr_wait {
+ my ($self, $timeout) = @_;
+ return if $self->{-wq_ppid} != $$; # can't reap siblings or parents
+ my $s1 = $self->{-wq_s1} // croak 'BUG: no wq_s1';
+ vec(my $rin = '', fileno($s1), 1) = 1;
+ select(my $rout = $rin, undef, undef, $timeout) or
+ croak 'timed out waiting for wq_exit';
+ recv($s1, my $pid, 64, 0) // croak "recv: $!";
+ my $workers = $self->{-wq_workers} // croak 'BUG: no wq_workers';
+ delete $workers->{$pid} // croak "BUG: PID:$pid invalid";
+ dwaitpid($pid, \&ipc_worker_reap, $self);
+}
+
+# set or retrieve number of workers
+sub wq_workers {
+ my ($self, $nr) = @_;
+ my $cur = $self->{-wq_workers} or return;
+ if (defined $nr) {
+ while (scalar(keys(%$cur)) > $nr) {
+ $self->wq_worker_decr;
+ $self->wq_worker_decr_wait;
+ }
+ $self->wq_worker_incr while scalar(keys(%$cur)) < $nr;
+ }
+ scalar(keys(%$cur));
+}
+
+sub wq_close {
+ my ($self, $nohang) = @_;
+ delete @$self{qw(-wq_s1 -wq_s2)} or return;
+ my $ppid = delete $self->{-wq_ppid} or return;
+ my $workers = delete $self->{-wq_workers} // die 'BUG: no wq_workers';
+ return if $ppid != $$; # can't reap siblings or parents
+ my @pids = map { $_ + 0 } keys %$workers;
+ if ($nohang) {
+ push @{$self->{"-wq_old_pids.$$"}}, @pids;
+ } else {
+ dwaitpid($_, \&ipc_worker_reap, $self) for @pids;
+ }
+}
+
+sub wq_kill_old {
+ my ($self) = @_;
+ my $pids = $self->{"-wq_old_pids.$$"} or return;
+ kill 'TERM', @$pids;
+}
+
+sub wq_kill {
+ my ($self, $sig) = @_;
+ my $workers = $self->{-wq_workers} or return;
+ kill($sig // 'TERM', keys %$workers);
+}
+
+sub WQ_MAX_WORKERS { $WQ_MAX_WORKERS }
+
+sub DESTROY {
+ my ($self) = @_;
+ my $ppid = $self->{-wq_ppid};
+ wq_kill($self) if $ppid && $ppid == $$;
+ wq_close($self);
+ wq_wait_old($self);
+ ipc_worker_stop($self);
+}
+
+# Sereal doesn't have dclone
+sub deep_clone { thaw(freeze($_[-1])) }
+
+1;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# temporary stack for public-inbox-index
use v5.10.1;
use strict;
use Fcntl qw(:seek);
-use constant FMT => eval { pack('Q', 1) } ? 'A1QQH*' : 'A1IIH*';
+use constant PACK_FMT => eval { pack('Q', 1) } ? 'A1QQH*H*' : 'A1IIH*H*';
# start off in write-only mode
sub new {
open(my $io, '+>', undef) or die "open: $!";
+ # latest_cmt is still useful when the newest revision is a `d'(elete),
+ # otherwise we favor $sync->{latest_cmt} for checkpoints and {quit}
bless { wr => $io, latest_cmt => $_[1] }, __PACKAGE__
}
# file_char = [d|m]
sub push_rec {
- my ($self, $file_char, $at, $ct, $blob_oid) = @_;
- my $rec = pack(FMT, $file_char, $at, $ct, $blob_oid);
- $self->{rec_size} //= length($rec);
+ my ($self, $file_char, $at, $ct, $blob_oid, $cmt_oid) = @_;
+ my $rec = pack(PACK_FMT, $file_char, $at, $ct, $blob_oid, $cmt_oid);
+ $self->{unpack_fmt} //= do {
+ my $len = length($cmt_oid);
+ my $fmt = PACK_FMT;
+ $fmt =~ s/H\*/H$len/g;
+ $self->{rec_size} = length($rec);
+ $fmt;
+ };
print { $self->{wr} } $rec or die "print: $!";
$self->{tot_size} += length($rec);
}
my $r = read($io, my $buf, $sz);
defined($r) or die "read: $!";
$r == $sz or die "read($r != $sz)";
- unpack(FMT, $buf);
+ unpack($self->{unpack_fmt}, $buf);
}
1;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# git fast-import-based ssoma-mda MDA replacement
use strict;
use parent qw(PublicInbox::Lock);
use v5.10.1;
-use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::Spawn qw(run_die popen_rd);
use PublicInbox::MID qw(mids mid2path);
use PublicInbox::Address;
use PublicInbox::Smsg;
use PublicInbox::Eml;
use POSIX qw(strftime);
+sub default_branch () {
+ state $default_branch = do {
+ delete local $ENV{GIT_CONFIG};
+ my $r = popen_rd([qw(git config --global init.defaultBranch)]);
+ chomp(my $h = <$r> // '');
+ $h eq '' ? 'refs/heads/master' : $h;
+ }
+}
+
sub new {
# we can't change arg order, this is documented in POD
# and external projects may rely on it:
my ($class, $git, $name, $email, $ibx) = @_;
- my $ref = 'refs/heads/master';
+ my $ref;
if ($ibx) {
- $ref = $ibx->{ref_head} // 'refs/heads/master';
+ $ref = $ibx->{ref_head};
$name //= $ibx->{name};
$email //= $ibx->{-primary_address};
$git //= $ibx->git;
git => $git,
ident => "$name <$email>",
mark => 1,
- ref => $ref,
+ ref => $ref // default_branch,
ibx => $ibx,
path_type => '2/38', # or 'v2'
lock_path => "$git->{git_dir}/ssoma.lock", # v2 changes this
sub gfi_start {
my ($self) = @_;
- return ($self->{in}, $self->{out}) if $self->{pid};
+ return ($self->{in}, $self->{out}) if $self->{in};
- my (@ret, $out_r, $out_w);
+ my ($in_r, $out_r, $out_w);
pipe($out_r, $out_w) or die "pipe failed: $!";
$self->lock_acquire;
my ($git, $ref) = @$self{qw(git ref)};
local $/ = "\n";
chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref));
+ die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?;
if ($self->{path_type} ne '2/38' && $self->{tip}) {
local $/ = "\0";
my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref);
+ die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?;
chomp @t;
$self->{-tree} = { map { $_ => 1 } @t };
}
- my @cmd = ('git', "--git-dir=$git->{git_dir}",
- qw(fast-import --quiet --done --date-format=raw));
- my ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r });
+ $in_r = $self->{in} = $git->popen(qw(fast-import
+ --quiet --done --date-format=raw),
+ undef, { 0 => $out_r });
$out_w->autoflush(1);
- $self->{in} = $in_r;
$self->{out} = $out_w;
- $self->{pid} = $pid;
$self->{nchg} = 0;
- @ret = ($in_r, $out_w);
};
if ($@) {
$self->lock_release;
die $@;
}
- @ret;
+ ($in_r, $out_w);
}
sub wfail () { die "write to fast-import failed: $!" }
sub checkpoint {
my ($self) = @_;
- return unless $self->{pid};
+ return unless $self->{in};
print { $self->{out} } "checkpoint\n" or wfail;
undef;
}
sub progress {
my ($self, $msg) = @_;
- return unless $self->{pid};
+ return unless $self->{in};
print { $self->{out} } "progress $msg\n" or wfail;
readline($self->{in}) eq "progress $msg\n" or die
"progress $msg not received\n";
# used for v2
sub get_mark {
my ($self, $mark) = @_;
- die "not active\n" unless $self->{pid};
+ die "not active\n" unless $self->{in};
my ($r, $w) = $self->gfi_start;
print $w "get-mark $mark\n" or wfail;
defined(my $oid = <$r>) or die "get-mark failed, need git 2.6.0+\n";
# v2: we need this for Xapian
if ($smsg) {
$smsg->{blob} = $self->get_mark(":$blob");
- $smsg->{raw_bytes} = $n;
- $smsg->{-raw_email} = \$raw_email;
+ $smsg->set_bytes($raw_email, $n);
+ if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore
+ return if $oidx->blob_exists($smsg->{blob});
+ }
}
my $ref = $self->{ref};
my $commit = $self->{mark}++;
$self->{tip} = ":$commit";
}
-sub run_die ($;$$) {
- my ($cmd, $env, $rdr) = @_;
- my $pid = spawn($cmd, $env, $rdr);
- waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish';
- $? == 0 or die join(' ', @$cmd) . " failed: $?\n";
-}
-
-my @INIT_FILES = ('HEAD' => "ref: refs/heads/master\n",
+my @INIT_FILES = ('HEAD' => undef, # filled in at runtime
'description' => <<EOD,
Unnamed repository; edit this file 'description' to name the repository.
EOD
EOC
sub init_bare {
- my ($dir) = @_; # or self
+ my ($dir, $head) = @_; # or self
$dir = $dir->{git}->{git_dir} if ref($dir);
require File::Path;
File::Path::mkpath([ map { "$dir/$_" } qw(objects/info refs/heads) ]);
- for (my $i = 0; $i < @INIT_FILES; $i++) {
- my $f = $dir.'/'.$INIT_FILES[$i++];
+ $INIT_FILES[1] //= 'ref: '.default_branch."\n";
+ my @fn_contents = @INIT_FILES;
+ $fn_contents[1] = "ref: refs/heads/$head\n" if defined $head;
+ while (my ($fn, $contents) = splice(@fn_contents, 0, 2)) {
+ my $f = $dir.'/'.$fn;
next if -f $f;
open my $fh, '>', $f or die "open $f: $!";
- print $fh $INIT_FILES[$i] or die "print $f: $!";
+ print $fh $contents or die "print $f: $!";
close $fh or die "close $f: $!";
}
}
eval {
my $r = delete $self->{in} or die 'BUG: missing {in} when done';
print $w "done\n" or wfail;
- my $pid = delete $self->{pid} or
- die 'BUG: missing {pid} when done';
- waitpid($pid, 0) == $pid or die 'fast-import did not finish';
- $? == 0 or die "fast-import failed: $?";
+ close $r or die "fast-import failed: $?"; # ProcessPipe::CLOSE
};
my $wait_err = $@;
my $nchg = delete $self->{nchg};
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# used to ensure PublicInbox::DS can call fileno() as a function
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Represents a public-inbox (which may have multiple mailing addresses)
package PublicInbox::Inbox;
use strict;
-use warnings;
use PublicInbox::Git;
use PublicInbox::MID qw(mid2path);
use PublicInbox::Eml;
+use List::Util qw(max);
# Long-running "git-cat-file --batch" processes won't notice
# unlinked packs, so we need to restart those processes occasionally.
$CLEANUP->{"$self"} = $self;
}
-sub _set_uint ($$$) {
- my ($opts, $field, $default) = @_;
- my $val = $opts->{$field};
- if (defined $val) {
- $val = $val->[-1] if ref($val) eq 'ARRAY';
- $val = undef if $val !~ /\A[0-9]+\z/;
- }
- $opts->{$field} = $val || $default;
-}
-
sub _set_limiter ($$$) {
- my ($self, $pi_config, $pfx) = @_;
+ my ($self, $pi_cfg, $pfx) = @_;
my $lkey = "-${pfx}_limiter";
$self->{$lkey} ||= do {
# full key is: publicinbox.$NAME.httpbackendmax
require PublicInbox::Qspawn;
$lim = PublicInbox::Qspawn::Limiter->new($val);
} elsif ($val =~ /\A[a-z][a-z0-9]*\z/) {
- $lim = $pi_config->limiter($val);
+ $lim = $pi_cfg->limiter($val);
warn "$mkey limiter=$val not found\n" if !$lim;
} else {
warn "$mkey limiter=$val not understood\n";
my $v = $opts->{address} ||= [ 'public-inbox@example.com' ];
my $p = $opts->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v;
$opts->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost';
- my $pi_config = delete $opts->{-pi_config};
- _set_limiter($opts, $pi_config, 'httpbackend');
- _set_uint($opts, 'feedmax', 25);
- $opts->{nntpserver} ||= $pi_config->{'publicinbox.nntpserver'};
- my $dir = $opts->{inboxdir};
- if (defined $dir && -f "$dir/inbox.lock") {
- $opts->{version} = 2;
+ my $pi_cfg = delete $opts->{-pi_cfg};
+ _set_limiter($opts, $pi_cfg, 'httpbackend');
+ my $fmax = $opts->{feedmax};
+ if (defined($fmax) && $fmax =~ /\A[0-9]+\z/) {
+ $opts->{feedmax} += 0;
+ } else {
+ delete $opts->{feedmax};
}
+ $opts->{nntpserver} ||= $pi_cfg->{'publicinbox.nntpserver'};
# allow any combination of multi-line or comma-delimited hide entries
my $hide = {};
bless $opts, $class;
}
-sub version { $_[0]->{version} // 1 }
+sub version {
+ $_[0]->{version} //= -f "$_[0]->{inboxdir}/inbox.lock" ? 2 : 1
+}
sub git_epoch {
- my ($self, $epoch) = @_;
- $self->version == 2 or return;
+ my ($self, $epoch) = @_; # v2-only, callers always supply $epoch
$self->{"$epoch.git"} ||= do {
my $git_dir = "$self->{inboxdir}/git/$epoch.git";
+ return unless -d $git_dir;
my $g = PublicInbox::Git->new($git_dir);
$g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
- # no cleanup needed, we never cat-file off this, only clone
+ # caller must manually cleanup when done
$g;
};
}
my ($self) = @_;
return if $self->version < 2;
my $cur = $self->{-max_git_epoch};
- my $changed = git($self)->alternates_changed;
- if (!defined($cur) || $changed) {
+ my $changed;
+ if (!defined($cur) || ($changed = git($self)->alternates_changed)) {
git_cleanup($self) if $changed;
my $gits = "$self->{inboxdir}/git";
if (opendir my $dh, $gits) {
- my $max = -1;
- while (defined(my $git_dir = readdir($dh))) {
- $git_dir =~ m!\A([0-9]+)\.git\z! or next;
- $max = $1 if $1 > $max;
- }
- $cur = $self->{-max_git_epoch} = $max if $max >= 0;
- } else {
- warn "opendir $gits failed: $!\n";
+ my $max = max(map {
+ substr($_, 0, -4) + 0; # drop ".git" suffix
+ } grep(/\A[0-9]+\.git\z/, readdir($dh))) // return;
+ $cur = $self->{-max_git_epoch} = $max;
}
}
$cur;
};
}
-sub search ($;$$) {
- my ($self, $over_only, $ctx) = @_;
- my $srch = $self->{search} ||= eval {
+sub search {
+ my ($self) = @_;
+ my $srch = $self->{search} //= eval {
_cleanup_later($self);
require PublicInbox::Search;
PublicInbox::Search->new($self);
};
- ($over_only || eval { $srch->xdb }) ? $srch : do {
- $ctx and $ctx->{env}->{'psgi.errors'}->print(<<EOF);
-`$self->{name}' search went away unexpectedly
-EOF
- undef;
- };
+ (eval { $srch->xdb }) ? $srch : undef;
}
+# isrch is preferred for read-only interfaces if available since it
+# reduces kernel cache and FD overhead
+sub isrch { $_[0]->{isrch} // search($_[0]) }
+
sub over {
$_[0]->{over} //= eval {
- my $srch = search($_[0], 1) or return;
+ my $srch = $_[0]->{search} //= eval {
+ _cleanup_later($_[0]);
+ require PublicInbox::Search;
+ PublicInbox::Search->new($_[0]);
+ };
my $over = PublicInbox::Over->new("$srch->{xpfx}/over.sqlite3");
$over->dbh; # may fail
$over;
};
}
+
sub try_cat {
my ($path) = @_;
- my $rv = '';
- if (open(my $fh, '<', $path)) {
- local $/;
- $rv = <$fh>;
- }
- $rv;
+ open(my $fh, '<', $path) or return '';
+ local $/;
+ <$fh> // '';
+}
+
+sub cat_desc ($) {
+ my $desc = try_cat($_[0]);
+ local $/ = "\n";
+ chomp $desc;
+ utf8::decode($desc);
+ $desc =~ s/\s+/ /smg;
+ $desc eq '' ? undef : $desc;
}
sub description {
my ($self) = @_;
- ($self->{description} //= do {
- my $desc = try_cat("$self->{inboxdir}/description");
- local $/ = "\n";
- chomp $desc;
- utf8::decode($desc);
- $desc =~ s/\s+/ /smg;
- $desc eq '' ? undef : $desc;
- }) // '($INBOX_DIR/description missing)';
+ ($self->{description} //= cat_desc("$self->{inboxdir}/description")) //
+ '($INBOX_DIR/description missing)';
}
sub cloneurl {
return unless defined $smsg;
defined(my $blob = $smsg->{blob}) or return;
- git($self)->cat_file($blob);
+ $self->git->cat_file($blob);
}
sub smsg_eml {
$eml;
}
-sub mid2num($$) {
- my ($self, $mid) = @_;
- my $mm = mm($self) or return;
- $mm->num_for($mid);
-}
-
sub smsg_by_mid ($$) {
my ($self, $mid) = @_;
- my $over = over($self) or return;
- # favor the Message-ID we used for the NNTP article number:
- defined(my $num = mid2num($self, $mid)) or return;
- my $smsg = $over->get_art($num) or return;
- PublicInbox::Smsg::psgi_cull($smsg);
+ my $over = $self->over or return;
+ my $smsg;
+ if (my $mm = $self->mm) {
+ # favor the Message-ID we used for the NNTP article number:
+ defined(my $num = $mm->num_for($mid)) or return;
+ $smsg = $over->get_art($num);
+ } else {
+ my ($id, $prev);
+ $smsg = $over->next_by_mid($mid, \$id, \$prev);
+ }
+ $smsg ? PublicInbox::Smsg::psgi_cull($smsg) : undef;
}
sub msg_by_mid ($$) {
my ($self, $mid) = @_;
-
- over($self) or
- return msg_by_path($self, mid2path($mid));
-
my $smsg = smsg_by_mid($self, $mid);
- $smsg ? msg_by_smsg($self, $smsg) : undef;
+ $smsg ? msg_by_smsg($self, $smsg) : msg_by_path($self, mid2path($mid));
}
sub recent {
my ($self, $opts, $after, $before) = @_;
- over($self)->recent($opts, $after, $before);
+ $self->over->recent($opts, $after, $before);
}
sub modified {
my ($self) = @_;
- if (my $over = over($self)) {
+ if (my $over = $self->over) {
my $msgs = $over->recent({limit => 1});
if (my $smsg = $msgs->[0]) {
return $smsg->{ts};
}
}
+sub uidvalidity { $_[0]->{uidvalidity} //= eval { $_[0]->mm->created_at } }
+
+sub eidx_key { $_[0]->{newsgroup} // $_[0]->{inboxdir} }
+
1;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# fields:
-# pi_config: PublicInbox::Config ref
# inot: Linux::Inotify2-like object
# pathmap => { inboxdir => [ ibx, watch1, watch2, watch3... ] } mapping
package PublicInbox::InboxIdle;
use strict;
use parent qw(PublicInbox::DS);
-use Cwd qw(abs_path);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
my $IN_MODIFY = 0x02; # match Linux inotify
my $ino_cls;
sub in2_arm ($$) { # PublicInbox::Config::each_inbox callback
my ($ibx, $self) = @_;
- my $dir = abs_path($ibx->{inboxdir});
- if (!defined($dir)) {
- warn "W: $ibx->{inboxdir} not watched: $!\n";
- return;
- }
+ my $dir = $ibx->{inboxdir};
my $inot = $self->{inot};
my $cur = $self->{pathmap}->{$dir} //= [];
my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock');
}
sub refresh {
- my ($self, $pi_config) = @_;
- $pi_config->each_inbox(\&in2_arm, $self);
+ my ($self, $pi_cfg) = @_;
+ $pi_cfg->each_inbox(\&in2_arm, $self);
}
+# internal API for ease-of-use
+sub watch_inbox { in2_arm($_[1], $_[0]) };
+
sub new {
- my ($class, $pi_config) = @_;
+ my ($class, $pi_cfg) = @_;
my $self = bless {}, $class;
my $inot;
if ($ino_cls) {
$self->{inot} = $inot;
$self->{pathmap} = {}; # inboxdir => [ ibx, watch1, watch2, watch3...]
$self->{on_unlock} = {}; # lock path => ibx
- refresh($self, $pi_config);
+ refresh($self, $pi_cfg) if $pi_cfg;
PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
$self;
}
my @events = $self->{inot}->read; # Linux::Inotify2::read
my $on_unlock = $self->{on_unlock};
for my $ev (@events) {
- if (my $ibx = $on_unlock->{$ev->fullname}) {
+ my $fn = $ev->fullname // next; # cancelled
+ if (my $ibx = $on_unlock->{$fn}) {
$ibx->on_unlock;
}
}
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Extends read-only Inbox for writing
require PublicInbox::Msgmap;
my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create
$sidx->begin_txn_lazy;
+ my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1);
if (defined $skip_artnum) {
- my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1);
$mm->{dbh}->begin_work;
$mm->skip_artnum($skip_artnum);
$mm->{dbh}->commit;
}
+ undef $mm; # ->created_at set
$sidx->commit_txn_lazy;
} else {
open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or
if ($self->version == 1) {
my $dir = assert_usable_dir($self);
PublicInbox::Import::init_bare($dir);
- $self->umask_prepare;
$self->with_umask(\&_init_v1, $self, $skip_artnum);
} else {
my $v2w = importer($self);
$im->done;
}
- my @args = (-inbox => $self);
+ my @args = (ibx => $self);
# basic line splitting, only
# Perhaps we can have proper quote splitting one day...
($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
sub with_umask {
my ($self, $cb, @arg) = @_;
- my $old = umask $self->{umask};
+ my $old = umask($self->{umask} //= umask_prepare($self));
my $rv = eval { $cb->(@arg) };
my $err = $@;
umask $old;
sub umask_prepare {
my ($self) = @_;
my $perm = _git_config_perm($self);
- my $umask = _umask_for($perm);
- $self->{umask} = $umask;
+ _umask_for($perm);
}
sub cleanup ($) {
# PublicInbox::MsgTime
|| $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/
|| $s =~ /^bad Date: .+? in /
+ # Encode::Unicode::UTF7
+ || $s =~ /^Bad UTF7 data escape at /
}
# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
sub warn_ignore_cb {
- my $cb = $SIG{__WARN__} // sub { print STDERR @_ };
+ my $cb = $SIG{__WARN__} // \&CORE::warn;
sub {
return if warn_ignore(@_);
$cb->(@_);
}
}
+# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove
+sub git_dir_latest {
+ my ($self, $max) = @_;
+ defined($$max = $self->max_git_epoch) ?
+ "$self->{inboxdir}/git/$$max.git" : undef;
+}
+
1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Provides everything the PublicInbox::Search object does;
+# but uses global ExtSearch (->ALL) with an eidx_key query to
+# emulate per-Inbox search using ->ALL.
+package PublicInbox::Isearch;
+use strict;
+use v5.10.1;
+use PublicInbox::ExtSearch;
+use PublicInbox::Search;
+
+sub new {
+ my (undef, $ibx, $es) = @_;
+ bless { es => $es, eidx_key => $ibx->eidx_key }, __PACKAGE__;
+}
+
+sub _ibx_id ($) {
+ my ($self) = @_;
+ my $sth = $self->{es}->over->dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
+
+ $sth->execute($self->{eidx_key});
+ $sth->fetchrow_array //
+ die "E: `$self->{eidx_key}' not in $self->{es}->{topdir}\n";
+}
+
+
+sub mset {
+ my ($self, $str, $opt) = @_;
+ my %opt = $opt ? %$opt : ();
+ $opt{eidx_key} = $self->{eidx_key};
+ if (my $uid_range = $opt{uid_range}) {
+ my ($beg, $end) = @$uid_range;
+ my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+ my $dbh = $self->{es}->{over}->dbh;
+ my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+ $sth->execute($ibx_id, $beg, $end);
+ my @r = ($sth->fetchrow_array);
+
+ $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?
+
+ $sth->execute($ibx_id, $beg, $end);
+ $r[1] = $sth->fetchrow_array;
+ if (defined($r[1]) && defined($r[0])) {
+ $opt{limit} = $r[1] - $r[0] + 1;
+ } else {
+ $r[1] //= 0xffffffff;
+ $r[0] //= 0;
+ }
+ $opt{uid_range} = \@r;
+ }
+ $self->{es}->mset($str, \%opt);
+}
+
+sub mset_to_artnums {
+ my ($self, $mset, $opt) = @_;
+ my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset);
+ my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
+ my $qmarks = join(',', map { '?' } @$docids);
+ if ($opt && ($opt->{relevance} // 0) == -1) { # -1 => ENQ_ASCENDING
+ my $range = '';
+ my @r;
+ if (my $r = $opt->{uid_range}) {
+ $range = 'AND xnum >= ? AND xnum <= ?';
+ @r = @$r;
+ }
+ my $rows = $self->{es}->over->dbh->
+ selectall_arrayref(<<"", undef, $ibx_id, @$docids, @r);
+SELECT xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) $range
+ORDER BY xnum ASC
+
+ return [ map { $_->[0] } @$rows ];
+ }
+
+ my $rows = $self->{es}->over->dbh->
+ selectall_arrayref(<<"", undef, $ibx_id, @$docids);
+SELECT docid,xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks)
+
+ my $i = -1;
+ my %order = map { $_ => ++$i } @$docids;
+ my @xnums;
+ for my $row (@$rows) { # @row = ($docid, $xnum)
+ my $idx = delete($order{$row->[0]}) // next;
+ $xnums[$idx] = $row->[1];
+ }
+ if (scalar keys %order) {
+ warn "W: $self->{es}->{topdir} #",
+ join(', ', sort { $a <=> $b } keys %order),
+ " not mapped to `$self->{eidx_key}'\n";
+ warn "W: $self->{es}->{topdir} may need to be reindexed\n";
+ @xnums = grep { defined } @xnums;
+ }
+ \@xnums;
+}
+
+sub mset_to_smsg {
+ my ($self, $ibx, $mset) = @_; # $ibx is a real inbox, not eidx
+ my $xnums = mset_to_artnums($self, $mset);
+ my $i = -1;
+ my %order = map { $_ => ++$i } @$xnums;
+ my $unordered = $ibx->over->get_all(@$xnums);
+ my @msgs;
+ for my $smsg (@$unordered) {
+ my $idx = delete($order{$smsg->{num}}) // do {
+ warn "W: $ibx->{inboxdir} #$smsg->{num}\n";
+ next;
+ };
+ $msgs[$idx] = $smsg;
+ }
+ if (scalar keys %order) {
+ warn "W: $ibx->{inboxdir} #",
+ join(', ', sort { $a <=> $b } keys %order),
+ " no longer valid\n";
+ warn "W: $self->{es}->{topdir} may need to be reindexed\n";
+ }
+ wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs;
+}
+
+sub has_threadid { 1 }
+
+sub help { $_[0]->{es}->help }
+
+1;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# implements the small subset of Linux::Inotify2 functionality we use
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Backend for `lei' (local email interface). Unlike the C10K-oriented
+# PublicInbox::Daemon, this is designed exclusively to handle trusted
+# local clients with read/write access to the FS and use as many
+# system resources as the local user has access to.
+package PublicInbox::LEI;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::DS PublicInbox::LeiExternal
+ PublicInbox::LeiQuery);
+use Getopt::Long ();
+use Socket qw(AF_UNIX SOCK_SEQPACKET MSG_EOR pack_sockaddr_un);
+use Errno qw(EAGAIN EINTR ECONNREFUSED ENOENT ECONNRESET);
+use POSIX ();
+use IO::Handle ();
+use Fcntl qw(SEEK_SET);
+use Sys::Syslog qw(syslog openlog);
+use PublicInbox::Config;
+use PublicInbox::Syscall qw(SFD_NONBLOCK EPOLLIN EPOLLET);
+use PublicInbox::Sigfd;
+use PublicInbox::DS qw(now dwaitpid);
+use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::OnDestroy;
+use Text::Wrap qw(wrap);
+use File::Path qw(mkpath);
+use File::Spec;
+our $quit = \&CORE::exit;
+our ($current_lei, $errors_log, $listener);
+my ($recv_cmd, $send_cmd);
+my $GLP = Getopt::Long::Parser->new;
+$GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
+my $GLP_PASS = Getopt::Long::Parser->new;
+$GLP_PASS->configure(qw(gnu_getopt no_ignore_case auto_abbrev pass_through));
+
+our %PATH2CFG; # persistent for socket daemon
+
+# TBD: this is a documentation mechanism to show a subcommand
+# (may) pass options through to another command:
+sub pass_through { $GLP_PASS }
+
+my $OPT;
+sub opt_dash ($$) {
+ my ($spec, $re_str) = @_; # 'limit|n=i', '([0-9]+)'
+ my ($key) = ($spec =~ m/\A([a-z]+)/g);
+ my $cb = sub { # Getopt::Long "<>" catch-all handler
+ my ($arg) = @_;
+ if ($arg =~ /\A-($re_str)\z/) {
+ $OPT->{$key} = $1;
+ } elsif ($arg eq '--') { # "--" arg separator, ignore first
+ push @{$OPT->{-argv}}, $arg if $OPT->{'--'}++;
+ # lone (single) dash is handled elsewhere
+ } elsif (substr($arg, 0, 1) eq '-') {
+ if ($OPT->{'--'}) {
+ push @{$OPT->{-argv}}, $arg;
+ } else {
+ die "bad argument: $arg\n";
+ }
+ } else {
+ push @{$OPT->{-argv}}, $arg;
+ }
+ };
+ ($spec, '<>' => $cb, $GLP_PASS) # for Getopt::Long
+}
+
+sub _store_path ($) {
+ my ($env) = @_;
+ File::Spec->rel2abs(($env->{XDG_DATA_HOME} //
+ ($env->{HOME} // '/nonexistent').'/.local/share')
+ .'/lei/store', $env->{PWD});
+}
+
+sub _config_path ($) {
+ my ($env) = @_;
+ File::Spec->rel2abs(($env->{XDG_CONFIG_HOME} //
+ ($env->{HOME} // '/nonexistent').'/.config')
+ .'/lei/config', $env->{PWD});
+}
+
+# TODO: generate shell completion + help using %CMD and %OPTDESC
+# command => [ positional_args, 1-line description, Getopt::Long option spec ]
+our %CMD = ( # sorted in order of importance/use:
+'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
+ save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
+ sort|s=s reverse|r offset=i remote! local! external! pretty mua-cmd=s
+ torsocks=s no-torsocks verbose|v since|after=s until|before=s),
+ PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
+
+'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
+ qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!),
+ pass_through('git show') ],
+
+'add-external' => [ 'URL_OR_PATHNAME',
+ 'add/set priority of a publicinbox|extindex for extra matches',
+ qw(boost=i quiet|q) ],
+'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations',
+ qw(format|f=s z|0 local remote quiet|q) ],
+'forget-external' => [ 'URL_OR_PATHNAME...|--prune',
+ 'exclude further results from a publicinbox|extindex',
+ qw(prune quiet|q) ],
+
+'ls-query' => [ '[FILTER...]', 'list saved search queries',
+ qw(name-only format|f=s z) ],
+'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ],
+'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ],
+
+'plonk' => [ '--thread|--from=IDENT',
+ 'exclude mail matching From: or thread from non-Message-ID searches',
+ qw(stdin| thread|t from|f=s mid=s oid=s) ],
+'mark' => [ 'MESSAGE_FLAGS...',
+ 'set/unset flags on message(s) from stdin',
+ qw(stdin| oid=s exact by-mid|mid:s) ],
+'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]',
+ "exclude message(s) on stdin from `q' search results",
+ qw(stdin| oid=s exact by-mid|mid:s quiet|q) ],
+
+'purge-mailsource' => [ 'URL_OR_PATHNAME|--all',
+ 'remove imported messages from IMAP, Maildirs, and MH',
+ qw(exact! all jobs:i indexed) ],
+
+# code repos are used for `show' to solve blobs from patch mails
+'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo',
+ qw(boost=i) ],
+'ls-coderepo' => [ '[FILTER_TERMS...]',
+ 'list known code repos', qw(format|f=s z) ],
+'forget-coderepo' => [ 'PATHNAME',
+ 'stop using repo to solve blobs from patches',
+ qw(prune) ],
+
+'add-watch' => [ '[URL_OR_PATHNAME]',
+ 'watch for new messages and flag changes',
+ qw(import! flags! interval=s recursive|r exclude=s include=s) ],
+'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status',
+ qw(format|f=s z) ],
+'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ],
+'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ],
+'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch',
+ qw(prune) ],
+
+'import' => [ 'URL_OR_PATHNAME|--stdin',
+ 'one-shot import/update from URL or filesystem',
+ qw(stdin| offset=i recursive|r exclude=s include=s !flags),
+ ],
+
+'config' => [ '[...]', sub {
+ 'git-config(1) wrapper for '._config_path($_[0]);
+ }, qw(config-file|system|global|file|f=s), # for conflict detection
+ pass_through('git config') ],
+'init' => [ '[PATHNAME]', sub {
+ 'initialize storage, default: '._store_path($_[0]);
+ }, qw(quiet|q) ],
+'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon',
+ opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ],
+'daemon-pid' => [ '', 'show the PID of the lei-daemon' ],
+'help' => [ '[SUBCOMMAND]', 'show help' ],
+
+# XXX do we need this?
+# 'git' => [ '[ANYTHING...]', 'git(1) wrapper', pass_through('git') ],
+
+'reorder-local-store-and-break-history' => [ '[REFNAME]',
+ 'rewrite git history in an attempt to improve compression',
+ 'gc!' ],
+
+# internal commands are prefixed with '_'
+'_complete' => [ '[...]', 'internal shell completion helper',
+ pass_through('everything') ],
+); # @CMD
+
+# switch descriptions, try to keep consistent across commands
+# $spec: Getopt::Long option specification
+# $spec => [@ALLOWED_VALUES (default is first), $description],
+# $spec => $description
+# "$SUB_COMMAND TAB $spec" => as above
+my $stdin_formats = [ 'IN|auto|raw|mboxrd|mboxcl2|mboxcl|mboxo',
+ 'specify message input format' ];
+my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];
+
+my %OPTDESC = (
+'help|h' => 'show this built-in help',
+'quiet|q' => 'be quiet',
+'solve!' => 'do not attempt to reconstruct blobs from emails',
+'save-as=s' => ['NAME', 'save a search terms by given name'],
+
+'type=s' => [ 'any|mid|git', 'disambiguate type' ],
+
+'dedupe|d=s' => ['STRAT|content|oid|mid|none',
+ 'deduplication strategy'],
+'show thread|t' => 'display entire thread a message belongs to',
+'q thread|t' =>
+ 'return all messages in the same thread as the actual match(es)',
+'augment|a' => 'augment --output destination instead of clobbering',
+
+'output|o=s' => [ 'DEST',
+ "destination (e.g. `/path/to/Maildir', or `-' for stdout)" ],
+'mua-cmd|mua=s' => [ 'COMMAND',
+ "MUA to run on --output Maildir or mbox (e.g. `mutt -f %f'" ],
+
+'show format|f=s' => [ 'OUT|plain|raw|html|mboxrd|mboxcl2|mboxcl',
+ 'message/object output format' ],
+'mark format|f=s' => $stdin_formats,
+'forget format|f=s' => $stdin_formats,
+'q format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|html|oid|json',
+ 'specify output format, default depends on --output'],
+'ls-query format|f=s' => $ls_format,
+'ls-external format|f=s' => $ls_format,
+
+'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ],
+'offset=i' => ['OFF', 'search result offset (default: 0)'],
+
+'sort|s=s' => [ 'VAL|received,relevance,docid',
+ "order of results `--output'-dependent"],
+'reverse|r' => [ 'reverse search results' ], # like sort(1)
+
+'boost=i' => 'increase/decrease priority of results (default: 0)',
+
+'local' => 'limit operations to the local filesystem',
+'local!' => 'exclude results from the local filesystem',
+'remote' => 'limit operations to those requiring network access',
+'remote!' => 'prevent operations requiring network access',
+
+'mid=s' => 'specify the Message-ID of a message',
+'oid=s' => 'specify the git object ID of a message',
+
+'recursive|r' => 'scan directories/mailboxes/newsgroups recursively',
+'exclude=s' => 'exclude mailboxes/newsgroups based on pattern',
+'include=s' => 'include mailboxes/newsgroups based on pattern',
+
+'exact' => 'operate on exact header matches only',
+'exact!' => 'rely on content match instead of exact header matches',
+
+'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
+'jobs:i' => 'set parallelism level',
+
+# xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere
+'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',
+
+'signal|s=s' => [ 'SIG', 'signal to send lei-daemon (default: TERM)' ],
+); # %OPTDESC
+
+my %CONFIG_KEYS = (
+ 'leistore.dir' => 'top-level storage location',
+);
+
+# pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE
+sub x_it ($$) {
+ my ($self, $code) = @_;
+ # make sure client sees stdout before exit
+ $self->{1}->autoflush(1) if $self->{1};
+ dump_and_clear_log();
+ if (my $sock = $self->{sock}) {
+ send($sock, "x_it $code", MSG_EOR);
+ } elsif (!($code & 127)) { # oneshot, ignore signals
+ # don't want to end up using $? from child processes
+ for my $f (qw(lxs l2m)) {
+ my $wq = delete $self->{$f} or next;
+ $wq->DESTROY;
+ }
+ $quit->($code >> 8);
+ }
+}
+
+sub puts ($;@) { print { shift->{1} } map { "$_\n" } @_ }
+
+sub out ($;@) { print { shift->{1} } @_ }
+
+sub err ($;@) {
+ my $self = shift;
+ my $err = $self->{2} // ($self->{pgr} // [])->[2] // *STDERR{IO};
+ print $err @_, (substr($_[-1], -1, 1) eq "\n" ? () : "\n");
+}
+
+sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) }
+
+sub fail ($$;$) {
+ my ($self, $buf, $exit_code) = @_;
+ err($self, $buf);
+ x_it($self, ($exit_code // 1) << 8);
+ undef;
+}
+
+sub child_error { # passes non-fatal curl exit codes to user
+ my ($self, $child_error) = @_; # child_error is $?
+ if (my $sock = $self->{sock}) { # send to lei(1) client
+ send($sock, "child_error $child_error", MSG_EOR);
+ } else { # oneshot
+ $self->{child_error} = $child_error;
+ }
+ undef;
+}
+
+sub atfork_prepare_wq {
+ my ($self, $wq) = @_;
+ my $tcafc = $wq->{-ipc_atfork_child_close} //= [ $listener // () ];
+ if (my $sock = $self->{sock}) {
+ push @$tcafc, @$self{qw(0 1 2)}, $sock;
+ }
+ if (my $pgr = $self->{pgr}) {
+ push @$tcafc, @$pgr[1,2];
+ }
+ if (my $old_1 = $self->{old_1}) {
+ push @$tcafc, $old_1;
+ }
+ for my $f (qw(lxs l2m)) {
+ my $ipc = $self->{$f} or next;
+ push @$tcafc, grep { defined }
+ @$ipc{qw(-wq_s1 -wq_s2 -ipc_req -ipc_res)};
+ }
+}
+
+# usage: my %sig = $lei->atfork_child_wq($wq);
+# local @SIG{keys %sig} = values %sig;
+sub atfork_child_wq {
+ my ($self, $wq) = @_;
+ my ($sock, $l2m_wq_s1);
+ (@$self{qw(0 1 2)}, $sock, $l2m_wq_s1) = delete(@$wq{0..4});
+ $self->{sock} = $sock if -S $sock;
+ $self->{l2m}->{-wq_s1} = $l2m_wq_s1 if $l2m_wq_s1 && -S $l2m_wq_s1;
+ %PATH2CFG = ();
+ undef $errors_log;
+ $quit = \&CORE::exit;
+ (__WARN__ => sub { err($self, @_) },
+ PIPE => sub {
+ $self->x_it(13); # SIGPIPE = 13
+ # we need to close explicitly to avoid Perl warning on SIGPIPE
+ for my $i (1, 2) {
+ next unless $self->{$i} && (-p $self->{$i} || -S _);
+ close(delete $self->{$i});
+ }
+ # trigger the LeiXSearch $done OpPipe:
+ syswrite($self->{0}, '!') if $self->{0} && -p $self->{0};
+ $SIG{PIPE} = 'DEFAULT';
+ die bless(\"$_[0]", 'PublicInbox::SIGPIPE'),
+ });
+}
+
+# usage: ($lei, @io) = $lei->atfork_parent_wq($wq);
+sub atfork_parent_wq {
+ my ($self, $wq) = @_;
+ my $env = delete $self->{env}; # env is inherited at fork
+ my $ret = bless { %$self }, ref($self);
+ if (my $dedupe = delete $ret->{dedupe}) {
+ $ret->{dedupe} = $wq->deep_clone($dedupe);
+ }
+ $self->{env} = $env;
+ delete @$ret{qw(-lei_store cfg old_1 pgr lxs)}; # keep l2m
+ my @io = delete @$ret{0..2};
+ $io[3] = delete($ret->{sock}) // $io[2];
+ my $l2m = $ret->{l2m};
+ if ($l2m && $l2m != $wq) { # $wq == lxs
+ $io[4] = $l2m->{-wq_s1} if $l2m->{-wq_s1};
+ $l2m->wq_close(1);
+ }
+ ($ret, @io);
+}
+
+sub _help ($;$) {
+ my ($self, $errmsg) = @_;
+ my $cmd = $self->{cmd} // 'COMMAND';
+ my @info = @{$CMD{$cmd} // [ '...', '...' ]};
+ my @top = ($cmd, shift(@info) // ());
+ my $cmd_desc = shift(@info);
+ $cmd_desc = $cmd_desc->($self->{env}) if ref($cmd_desc) eq 'CODE';
+ my @opt_desc;
+ my $lpad = 2;
+ for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS)
+ my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next;
+ my $arg_vals = '';
+ ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY';
+
+ # lower-case is a keyword (e.g. `content', `oid'),
+ # ALL_CAPS is a string description (e.g. `PATH')
+ if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) {
+ $desc .= "\ndefault: `$1'";
+ }
+ my (@vals, @s, @l);
+ my $x = $sw;
+ if ($x =~ s/!\z//) { # solve! => --no-solve
+ $x = "no-$x";
+ } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s"
+ @vals = (' [', undef, ']');
+ } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s"
+ @vals = (' ', undef);
+ } # else: no args $x = 'thread|t'
+ for (split(/\|/, $x)) { # help|h
+ length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_");
+ }
+ if (!scalar(@vals)) { # no args 'thread|t'
+ } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME"
+ $vals[1] = $1;
+ } else {
+ $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE"
+ }
+ if ($arg_vals =~ /([,\|])/) {
+ my $sep = $1;
+ my @allow = split(/\Q$sep\E/, $arg_vals);
+ my $must = $sep eq '|' ? 'Must' : 'Can';
+ @allow = map { "`$_'" } @allow;
+ my $last = pop @allow;
+ $desc .= "\n$must be one of: " .
+ join(', ', @allow) . " or $last";
+ }
+ my $lhs = join(', ', @s, @l) . join('', @vals);
+ if ($x =~ /\|\z/) { # "stdin|" or "clear|"
+ $lhs =~ s/\A--/- , --/;
+ } else {
+ $lhs =~ s/\A--/ --/; # pad if no short options
+ }
+ $lpad = length($lhs) if length($lhs) > $lpad;
+ push @opt_desc, $lhs, $desc;
+ }
+ my $msg = $errmsg ? "E: $errmsg\n" : '';
+ $msg .= <<EOF;
+usage: lei @top
+ $cmd_desc
+
+EOF
+ $lpad += 2;
+ local $Text::Wrap::columns = 78 - $lpad;
+ my $padding = ' ' x ($lpad + 2);
+ while (my ($lhs, $rhs) = splice(@opt_desc, 0, 2)) {
+ $msg .= ' '.pack("A$lpad", $lhs);
+ $rhs = wrap('', '', $rhs);
+ $rhs =~ s/\n/\n$padding/sg; # LHS pad continuation lines
+ $msg .= $rhs;
+ $msg .= "\n";
+ }
+ print { $self->{$errmsg ? 2 : 1} } $msg;
+ x_it($self, $errmsg ? 1 << 8 : 0); # stderr => failure
+ undef;
+}
+
+sub optparse ($$$) {
+ my ($self, $cmd, $argv) = @_;
+ $self->{cmd} = $cmd;
+ $OPT = $self->{opt} = {};
+ my $info = $CMD{$cmd} // [ '[...]' ];
+ my ($proto, undef, @spec) = @$info;
+ my $glp = ref($spec[-1]) eq ref($GLP) ? pop(@spec) : $GLP;
+ push @spec, qw(help|h);
+ my $lone_dash;
+ if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias
+ $lone_dash = $spec[0];
+ $OPT->{$spec[0]} = \(my $var);
+ push @spec, '' => \$var;
+ }
+ $glp->getoptionsfromarray($argv, $OPT, @spec) or
+ return _help($self, "bad arguments or options for $cmd");
+ return _help($self) if $OPT->{help};
+
+ push @$argv, @{$OPT->{-argv}} if defined($OPT->{-argv});
+
+ # "-" aliases "stdin" or "clear"
+ $OPT->{$lone_dash} = ${$OPT->{$lone_dash}} if defined $lone_dash;
+
+ my $i = 0;
+ my $POS_ARG = '[A-Z][A-Z0-9_]+';
+ my ($err, $inf);
+ my @args = split(/ /, $proto);
+ for my $var (@args) {
+ if ($var =~ /\A$POS_ARG\.\.\.\z/o) { # >= 1 args;
+ $inf = defined($argv->[$i]) and last;
+ $var =~ s/\.\.\.\z//;
+ $err = "$var not supplied";
+ } elsif ($var =~ /\A$POS_ARG\z/o) { # required arg at $i
+ $argv->[$i++] // ($err = "$var not supplied");
+ } elsif ($var =~ /\.\.\.\]\z/) { # optional args start
+ $inf = 1;
+ last;
+ } elsif ($var =~ /\A\[-?$POS_ARG\]\z/) { # one optional arg
+ $i++;
+ } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin
+ my @or = split(/\|/, $var);
+ my $ok;
+ for my $o (@or) {
+ if ($o =~ /\A--([a-z0-9\-]+)/) {
+ $ok = defined($OPT->{$1});
+ last;
+ } elsif (defined($argv->[$i])) {
+ $ok = 1;
+ $i++;
+ last;
+ } # else continue looping
+ }
+ last if $ok;
+ my $last = pop @or;
+ $err = join(', ', @or) . " or $last must be set";
+ } else {
+ warn "BUG: can't parse `$var' in $proto";
+ }
+ last if $err;
+ }
+ if (!$inf && scalar(@$argv) > scalar(@args)) {
+ $err //= 'too many arguments';
+ }
+ $err ? fail($self, "usage: lei $cmd $proto\nE: $err") : 1;
+}
+
+sub dispatch {
+ my ($self, $cmd, @argv) = @_;
+ local $current_lei = $self; # for __WARN__
+ dump_and_clear_log("from previous run\n");
+ return _help($self, 'no command given') unless defined($cmd);
+ my $func = "lei_$cmd";
+ $func =~ tr/-/_/;
+ if (my $cb = __PACKAGE__->can($func)) {
+ optparse($self, $cmd, \@argv) or return;
+ $cb->($self, @argv);
+ } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only
+ my $opt = {};
+ $GLP->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or
+ return _help($self, 'bad arguments or options');
+ _help($self);
+ } else {
+ fail($self, "`$cmd' is not an lei command");
+ }
+}
+
+sub _lei_cfg ($;$) {
+ my ($self, $creat) = @_;
+ my $f = _config_path($self->{env});
+ my @st = stat($f);
+ my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size
+ if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case
+ return ($self->{cfg} = $cfg) if $cur_st eq $cfg->{-st};
+ }
+ if (!@st) {
+ unless ($creat) {
+ delete $self->{cfg};
+ return;
+ }
+ my (undef, $cfg_dir, undef) = File::Spec->splitpath($f);
+ -d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n";
+ open my $fh, '>>', $f or die "open($f): $!\n";
+ @st = stat($fh) or die "fstat($f): $!\n";
+ $cur_st = pack('dd', $st[10], $st[7]);
+ qerr($self, "I: $f created") if $self->{cmd} ne 'config';
+ }
+ my $cfg = PublicInbox::Config::git_config_dump($f);
+ $cfg->{-st} = $cur_st;
+ $cfg->{'-f'} = $f;
+ $self->{cfg} = $PATH2CFG{$f} = $cfg;
+}
+
+sub _lei_store ($;$) {
+ my ($self, $creat) = @_;
+ my $cfg = _lei_cfg($self, $creat);
+ $cfg->{-lei_store} //= do {
+ require PublicInbox::LeiStore;
+ my $dir = $cfg->{'leistore.dir'};
+ $dir //= _store_path($self->{env}) if $creat;
+ return unless $dir;
+ PublicInbox::LeiStore->new($dir, { creat => $creat });
+ };
+}
+
+sub lei_show {
+ my ($self, @argv) = @_;
+}
+
+sub lei_mark {
+ my ($self, @argv) = @_;
+}
+
+sub _config {
+ my ($self, @argv) = @_;
+ my $env = $self->{env};
+ delete local $env->{GIT_CONFIG};
+ delete local $ENV{GIT_CONFIG};
+ my $cfg = _lei_cfg($self, 1);
+ my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ];
+ my %rdr = map { $_ => $self->{$_} } (0..2);
+ waitpid(spawn($cmd, $env, \%rdr), 0);
+}
+
+sub lei_config {
+ my ($self, @argv) = @_;
+ $self->{opt}->{'config-file'} and return fail $self,
+ "config file switches not supported by `lei config'";
+ _config(@_);
+ x_it($self, $?) if $?;
+}
+
+sub lei_init {
+ my ($self, $dir) = @_;
+ my $cfg = _lei_cfg($self, 1);
+ my $cur = $cfg->{'leistore.dir'};
+ my $env = $self->{env};
+ $dir //= _store_path($env);
+ $dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware
+ my @cur = stat($cur) if defined($cur);
+ $cur = File::Spec->canonpath($cur // $dir);
+ my @dir = stat($dir);
+ my $exists = "I: leistore.dir=$cur already initialized" if @dir;
+ if (@cur) {
+ if ($cur eq $dir) {
+ _lei_store($self, 1)->done;
+ return qerr($self, $exists);
+ }
+
+ # some folks like symlinks and bind mounts :P
+ if (@dir && "$cur[0] $cur[1]" eq "$dir[0] $dir[1]") {
+ lei_config($self, 'leistore.dir', $dir);
+ _lei_store($self, 1)->done;
+ return qerr($self, "$exists (as $cur)");
+ }
+ return fail($self, <<"");
+E: leistore.dir=$cur already initialized and it is not $dir
+
+ }
+ lei_config($self, 'leistore.dir', $dir);
+ _lei_store($self, 1)->done;
+ $exists //= "I: leistore.dir=$dir newly initialized";
+ return qerr($self, $exists);
+}
+
+sub lei_daemon_pid { puts shift, $$ }
+
+sub lei_daemon_kill {
+ my ($self) = @_;
+ my $sig = $self->{opt}->{signal} // 'TERM';
+ kill($sig, $$) or fail($self, "kill($sig, $$): $!");
+}
+
+sub lei_help { _help($_[0]) }
+
+# Shell completion helper. Used by lei-completion.bash and hopefully
+# other shells. Try to do as much here as possible to avoid redundancy
+# and improve maintainability.
+sub lei__complete {
+ my ($self, @argv) = @_; # argv = qw(lei and any other args...)
+ shift @argv; # ignore "lei", the entire command is sent
+ @argv or return puts $self, grep(!/^_/, keys %CMD), qw(--help -h);
+ my $cmd = shift @argv;
+ my $info = $CMD{$cmd} // do { # filter matching commands
+ @argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD);
+ return;
+ };
+ my ($proto, undef, @spec) = @$info;
+ my $cur = pop @argv;
+ my $re = defined($cur) ? qr/\A\Q$cur\E/ : qr/./;
+ if (substr($cur // '-', 0, 1) eq '-') { # --switches
+ # gross special case since the only git-config options
+ # Consider moving to a table if we need more special cases
+ # we use Getopt::Long for are the ones we reject, so these
+ # are the ones we don't reject:
+ if ($cmd eq 'config') {
+ puts $self, grep(/$re/, keys %CONFIG_KEYS);
+ @spec = qw(add z|null get get-all unset unset-all
+ replace-all get-urlmatch
+ remove-section rename-section
+ name-only list|l edit|e
+ get-color-name get-colorbool);
+ # fall-through
+ }
+ # TODO: arg support
+ puts $self, grep(/$re/, map { # generate short/long names
+ my $eq = '';
+ if (s/=.+\z//) { # required arg, e.g. output|o=i
+ $eq = '=';
+ } elsif (s/:.+\z//) { # optional arg, e.g. mid:s
+ } else { # negation: solve! => no-solve|solve
+ s/\A(.+)!\z/no-$1|$1/;
+ }
+ map {
+ length > 1 ? "--$_$eq" : "-$_"
+ } split(/\|/, $_, -1) # help|h
+ } grep { $OPTDESC{"$cmd\t$_"} || $OPTDESC{$_} } @spec);
+ } elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) {
+ puts $self, grep(/$re/, keys %CONFIG_KEYS);
+ }
+ $cmd =~ tr/-/_/;
+ if (my $sub = $self->can("_complete_$cmd")) {
+ puts $self, $sub->($self, @argv, $cur);
+ }
+ # TODO: URLs, pathnames, OIDs, MIDs, etc... See optparse() for
+ # proto parsing.
+}
+
+sub reap_exec { # dwaitpid callback
+ my ($self, $pid) = @_;
+ x_it($self, $?);
+}
+
+sub lei_git { # support passing through random git commands
+ my ($self, @argv) = @_;
+ my %rdr = map { $_ => $self->{$_} } (0..2);
+ my $pid = spawn(['git', @argv], $self->{env}, \%rdr);
+ dwaitpid($pid, \&reap_exec, $self);
+}
+
+sub exec_buf ($$) {
+ my ($argv, $env) = @_;
+ my $argc = scalar @$argv;
+ my $buf = 'exec '.join("\0", scalar(@$argv), @$argv);
+ while (my ($k, $v) = each %$env) { $buf .= "\0$k=$v" };
+ $buf;
+}
+
+sub start_mua {
+ my ($self) = @_;
+ my $mua = $self->{opt}->{'mua-cmd'} // return;
+ my $mfolder = $self->{ovv}->{dst};
+ my (@cmd, $replaced);
+ if ($mua =~ /\A(?:mutt|mailx|mail|neomutt)\z/) {
+ @cmd = ($mua, '-f');
+ # TODO: help wanted: other common FOSS MUAs
+ } else {
+ require Text::ParseWords;
+ my @cmd = Text::ParseWords::shellwords($mua);
+ # mutt uses '%f' for open-hook with compressed mbox, we follow
+ @cmd = map { $_ eq '%f' ? ($replaced = $mfolder) : $_ } @cmd;
+ }
+ push @cmd, $mfolder unless defined($replaced);
+ if (my $sock = $self->{sock}) { # lei(1) client process runs it
+ send($sock, exec_buf(\@cmd, {}), MSG_EOR);
+ } else { # oneshot
+ $self->{"mua.pid.$self.$$"} = spawn(\@cmd);
+ }
+}
+
+# caller needs to "-t $self->{1}" to check if tty
+sub start_pager {
+ my ($self) = @_;
+ my $env = $self->{env};
+ my $fh = popen_rd([qw(git var GIT_PAGER)], $env);
+ chomp(my $pager = <$fh> // '');
+ close($fh) or warn "`git var PAGER' error: \$?=$?";
+ return if $pager eq 'cat' || $pager eq '';
+ # TODO TIOCGWINSZ
+ my $new_env = { LESS => 'FRX', LV => '-c', COLUMNS => 80 };
+ $new_env->{MORE} = 'FRX' if $^O eq 'freebsd';
+ pipe(my ($r, $wpager)) or return warn "pipe: $!";
+ my $rdr = { 0 => $r, 1 => $self->{1}, 2 => $self->{2} };
+ my $pgr = [ undef, @$rdr{1, 2}, $$ ];
+ if (my $sock = $self->{sock}) { # lei(1) process runs it
+ delete @$new_env{keys %$env}; # only set iff unset
+ my $fds = [ map { fileno($_) } @$rdr{0..2} ];
+ $send_cmd->($sock, $fds, exec_buf([$pager], $new_env), MSG_EOR);
+ } else {
+ $pgr->[0] = spawn([$pager], $new_env, $rdr);
+ }
+ $self->{1} = $wpager;
+ $self->{2} = $wpager if -t $self->{2};
+ $env->{GIT_PAGER_IN_USE} = 'true'; # we may spawn git
+ $self->{pgr} = $pgr;
+}
+
+sub stop_pager {
+ my ($self) = @_;
+ my $pgr = delete($self->{pgr}) or return;
+ $self->{2} = $pgr->[2];
+ # do not restore original stdout, just close it so we error out
+ close(delete($self->{1})) if $self->{1};
+ my $pid = $pgr->[0];
+ dwaitpid($pid, undef, $self->{sock}) if $pid && $pgr->[3] == $$;
+}
+
+sub accept_dispatch { # Listener {post_accept} callback
+ my ($sock) = @_; # ignore other
+ $sock->autoflush(1);
+ my $self = bless { sock => $sock }, __PACKAGE__;
+ vec(my $rvec = '', fileno($sock), 1) = 1;
+ select($rvec, undef, undef, 1) or
+ return send($sock, 'timed out waiting to recv FDs', MSG_EOR);
+ my @fds = $recv_cmd->($sock, my $buf, 4096 * 33); # >MAX_ARG_STRLEN
+ if (scalar(@fds) == 4) {
+ for my $i (0..3) {
+ my $fd = shift(@fds);
+ open($self->{$i}, '+<&=', $fd) and next;
+ send($sock, "open(+<&=$fd) (FD=$i): $!", MSG_EOR);
+ }
+ } else {
+ return send($sock, "recv_cmd failed: $!", MSG_EOR);
+ }
+ $self->{2}->autoflush(1); # keep stdout buffered until x_it|DESTROY
+ # $ENV_STR = join('', map { "\0$_=$ENV{$_}" } keys %ENV);
+ # $buf = "$$\0$argc\0".join("\0", @ARGV).$ENV_STR."\0\0";
+ substr($buf, -2, 2, '') eq "\0\0" or # s/\0\0\z//
+ return send($sock, 'request command truncated', MSG_EOR);
+ my ($argc, @argv) = split(/\0/, $buf, -1);
+ undef $buf;
+ my %env = map { split(/=/, $_, 2) } splice(@argv, $argc);
+ if (chdir(delete($self->{3}))) {
+ local %ENV = %env;
+ $self->{env} = \%env;
+ eval { dispatch($self, @argv) };
+ send($sock, $@, MSG_EOR) if $@;
+ } else {
+ send($sock, "fchdir: $!", MSG_EOR); # implicit close
+ }
+}
+
+sub dclose {
+ my ($self) = @_;
+ for my $f (qw(lxs l2m)) {
+ my $wq = delete $self->{$f} or next;
+ if ($wq->wq_kill) {
+ $self->wq_close
+ } elsif ($wq->wq_kill_old) {
+ $wq->wq_wait_old;
+ }
+ }
+ close(delete $self->{1}) if $self->{1}; # may reap_compress
+ $self->close if $self->{sock}; # PublicInbox::DS::close
+}
+
+# for long-running results
+sub event_step {
+ my ($self) = @_;
+ local %ENV = %{$self->{env}};
+ my $sock = $self->{sock};
+ local $current_lei = $self;
+ eval {
+ while (my @fds = $recv_cmd->($sock, my $buf, 4096)) {
+ if (scalar(@fds) == 1 && !defined($fds[0])) {
+ return if $! == EAGAIN;
+ next if $! == EINTR;
+ last if $! == ECONNRESET;
+ die "recvmsg: $!";
+ }
+ for my $fd (@fds) {
+ open my $rfh, '+<&=', $fd;
+ }
+ die "unrecognized client signal: $buf";
+ }
+ dclose($self);
+ };
+ if (my $err = $@) {
+ eval { $self->fail($err) };
+ dclose($self);
+ }
+}
+
+sub event_step_init {
+ my ($self) = @_;
+ if (my $sock = $self->{sock}) { # using DS->EventLoop
+ $sock->blocking(0);
+ $self->SUPER::new($sock, EPOLLIN|EPOLLET);
+ }
+}
+
+sub noop {}
+
+our $oldset; sub oldset { $oldset }
+
+sub dump_and_clear_log {
+ if (defined($errors_log) && -s STDIN && seek(STDIN, 0, SEEK_SET)) {
+ my @pfx = @_;
+ unshift(@pfx, "$errors_log ") if @pfx;
+ warn @pfx, do { local $/; <STDIN> };
+ truncate(STDIN, 0) or warn "ftruncate ($errors_log): $!";
+ }
+}
+
+# lei(1) calls this when it can't connect
+sub lazy_start {
+ my ($path, $errno, $narg) = @_;
+ if ($errno == ECONNREFUSED) {
+ unlink($path) or die "unlink($path): $!";
+ } elsif ($errno != ENOENT) {
+ $! = $errno; # allow interpolation to stringify in die
+ die "connect($path): $!";
+ }
+ umask(077) // die("umask(077): $!");
+ local $listener;
+ socket($listener, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!";
+ bind($listener, pack_sockaddr_un($path)) or die "bind($path): $!";
+ listen($listener, 1024) or die "listen: $!";
+ my @st = stat($path) or die "stat($path): $!";
+ my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
+ local $oldset = PublicInbox::DS::block_signals();
+ if ($narg == 5) {
+ $send_cmd = PublicInbox::Spawn->can('send_cmd4');
+ $recv_cmd = PublicInbox::Spawn->can('recv_cmd4') // do {
+ require PublicInbox::CmdIPC4;
+ $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4');
+ PublicInbox::CmdIPC4->can('recv_cmd4');
+ };
+ }
+ $recv_cmd or die <<"";
+(Socket::MsgHdr || Inline::C) missing/unconfigured (narg=$narg);
+
+ require PublicInbox::Listener;
+ require PublicInbox::EOFpipe;
+ (-p STDOUT) or die "E: stdout must be a pipe\n";
+ local $errors_log;
+ ($errors_log) = ($path =~ m!\A(.+?/)[^/]+\z!);
+ $errors_log .= 'errors.log';
+ open(STDIN, '+>>', $errors_log) or die "open($errors_log): $!";
+ STDIN->autoflush(1);
+ dump_and_clear_log("from previous daemon process:\n");
+ POSIX::setsid() > 0 or die "setsid: $!";
+ my $pid = fork // die "fork: $!";
+ return if $pid;
+ $0 = "lei-daemon $path";
+ local %PATH2CFG;
+ $listener->blocking(0);
+ my $exit_code;
+ my $pil = PublicInbox::Listener->new($listener, \&accept_dispatch);
+ local $quit = do {
+ pipe(my ($eof_r, $eof_w)) or die "pipe: $!";
+ PublicInbox::EOFpipe->new($eof_r, \&noop, undef);
+ sub {
+ $exit_code //= shift;
+ my $lis = $pil or exit($exit_code);
+ # closing eof_w triggers \&noop wakeup
+ $listener = $eof_w = $pil = $path = undef;
+ $lis->close; # DS::close
+ PublicInbox::DS->SetLoopTimeout(1000);
+ };
+ };
+ my $sig = {
+ CHLD => \&PublicInbox::DS::enqueue_reap,
+ QUIT => $quit,
+ INT => $quit,
+ TERM => $quit,
+ HUP => \&noop,
+ USR1 => \&noop,
+ USR2 => \&noop,
+ };
+ my $sigfd = PublicInbox::Sigfd->new($sig, SFD_NONBLOCK);
+ local @SIG{keys %$sig} = values(%$sig) unless $sigfd;
+ undef $sig;
+ local $SIG{PIPE} = 'IGNORE';
+ if ($sigfd) { # TODO: use inotify/kqueue to detect unlinked sockets
+ undef $sigfd;
+ PublicInbox::DS->SetLoopTimeout(5000);
+ } else {
+ # wake up every second to accept signals if we don't
+ # have signalfd or IO::KQueue:
+ PublicInbox::DS::sig_setmask($oldset);
+ PublicInbox::DS->SetLoopTimeout(1000);
+ }
+ PublicInbox::DS->SetPostLoopCallback(sub {
+ my ($dmap, undef) = @_;
+ if (@st = defined($path) ? stat($path) : ()) {
+ if ($dev_ino_expect ne pack('dd', $st[0], $st[1])) {
+ warn "$path dev/ino changed, quitting\n";
+ $path = undef;
+ }
+ } elsif (defined($path)) {
+ warn "stat($path): $!, quitting ...\n";
+ undef $path; # don't unlink
+ $quit->();
+ }
+ return 1 if defined($path);
+ my $now = now();
+ my $n = 0;
+ for my $s (values %$dmap) {
+ $s->can('busy') or next;
+ if ($s->busy($now)) {
+ ++$n;
+ } else {
+ $s->close;
+ }
+ }
+ $n; # true: continue, false: stop
+ });
+
+ # STDIN was redirected to /dev/null above, closing STDERR and
+ # STDOUT will cause the calling `lei' client process to finish
+ # reading the <$daemon> pipe.
+ openlog($path, 'pid', 'user');
+ local $SIG{__WARN__} = sub {
+ $current_lei ? err($current_lei, @_) : syslog('warning', "@_");
+ };
+ my $on_destroy = PublicInbox::OnDestroy->new($$, sub {
+ syslog('crit', "$@") if $@;
+ });
+ open STDERR, '>&STDIN' or die "redirect stderr failed: $!";
+ open STDOUT, '>&STDIN' or die "redirect stdout failed: $!";
+ # $daemon pipe to `lei' closed, main loop begins:
+ PublicInbox::DS->EventLoop;
+ @$on_destroy = (); # cancel on_destroy if we get here
+ exit($exit_code // 0);
+}
+
+# for users w/o Socket::Msghdr installed or Inline::C enabled
+sub oneshot {
+ my ($main_pkg) = @_;
+ my $exit = $main_pkg->can('exit'); # caller may override exit()
+ local $quit = $exit if $exit;
+ local %PATH2CFG;
+ umask(077) // die("umask(077): $!");
+ my $self = bless {
+ 0 => *STDIN{GLOB},
+ 1 => *STDOUT{GLOB},
+ 2 => *STDERR{GLOB},
+ env => \%ENV
+ }, __PACKAGE__;
+ dispatch($self, @ARGV);
+ x_it($self, $self->{child_error}) if $self->{child_error};
+}
+
+# ensures stdout hits the FS before sock disconnects so a client
+# can immediately reread it
+sub DESTROY {
+ my ($self) = @_;
+ $self->{1}->autoflush(1) if $self->{1};
+ stop_pager($self);
+ if (my $mua_pid = delete $self->{"mua.pid.$self.$$"}) {
+ waitpid($mua_pid, 0);
+ }
+}
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::LeiDedupe;
+use strict;
+use v5.10.1;
+use PublicInbox::SharedKV;
+use PublicInbox::ContentHash qw(content_hash);
+
+# n.b. mutt sets most of these headers not sure about Bytes
+our @OID_IGNORE = qw(Status X-Status Content-Length Lines Bytes);
+
+# best-effort regeneration of OID when augmenting existing results
+sub _regen_oid ($) {
+ my ($eml) = @_;
+ my @stash; # stash away headers we shouldn't have in git
+ for my $k (@OID_IGNORE) {
+ my @v = $eml->header_raw($k) or next;
+ push @stash, [ $k, \@v ];
+ $eml->header_set($k); # restore below
+ }
+ my $dig = Digest::SHA->new(1); # XXX SHA256 later
+ my $buf = $eml->as_string;
+ $dig->add('blob '.length($buf)."\0");
+ $dig->add($buf);
+ undef $buf;
+
+ for my $kv (@stash) { # restore stashed headers
+ my ($k, @v) = @$kv;
+ $eml->header_set($k, @v);
+ }
+ $dig->digest;
+}
+
+sub _oidbin ($) { defined($_[0]) ? pack('H*', $_[0]) : undef }
+
+sub smsg_hash ($) {
+ my ($smsg) = @_;
+ my $dig = Digest::SHA->new(256);
+ my $x = join("\0", @$smsg{qw(from to cc ds subject references mid)});
+ utf8::encode($x);
+ $dig->add($x);
+ $dig->digest;
+}
+
+# the paranoid option
+sub dedupe_oid ($) {
+ my ($skv) = @_;
+ (sub { # may be called in a child process
+ my ($eml, $oid) = @_;
+ $skv->set_maybe(_oidbin($oid) // _regen_oid($eml), '');
+ }, sub {
+ my ($smsg) = @_;
+ $skv->set_maybe(_oidbin($smsg->{blob}), '');
+ });
+}
+
+# dangerous if there's duplicate messages with different Message-IDs
+sub dedupe_mid ($) {
+ my ($skv) = @_;
+ (sub { # may be called in a child process
+ my ($eml, $oid) = @_;
+ # TODO: lei will support non-public messages w/o Message-ID
+ my $mid = $eml->header_raw('Message-ID') // _oidbin($oid) //
+ content_hash($eml);
+ $skv->set_maybe($mid, '');
+ }, sub {
+ my ($smsg) = @_;
+ my $mid = $smsg->{mid};
+ $mid = undef if $mid eq '';
+ $mid //= smsg_hash($smsg) // _oidbin($smsg->{blob});
+ $skv->set_maybe($mid, '');
+ });
+}
+
+# our default deduplication strategy (used by v2, also)
+sub dedupe_content ($) {
+ my ($skv) = @_;
+ (sub { # may be called in a child process
+ my ($eml) = @_; # oid = $_[1], ignored
+ $skv->set_maybe(content_hash($eml), '');
+ }, sub {
+ my ($smsg) = @_;
+ $skv->set_maybe(smsg_hash($smsg), '');
+ });
+}
+
+# no deduplication at all
+sub true { 1 }
+sub dedupe_none ($) { (\&true, \&true) }
+
+sub new {
+ my ($cls, $lei) = @_;
+ my $dd = $lei->{opt}->{dedupe} // 'content';
+ my $dst = $lei->{ovv}->{dst};
+
+ # allow "none" to bypass Eml->new if writing to directory:
+ return if ($dd eq 'none' && substr($dst // '', -1) eq '/');
+ my $m = "dedupe_$dd";
+ $cls->can($m) or die "unsupported dedupe strategy: $dd\n";
+ my $skv = $dd eq 'none' ? undef : PublicInbox::SharedKV->new;
+
+ # [ $skv, $eml_cb, $smsg_cb, "dedupe_$dd" ]
+ bless [ $skv, undef, undef, $m ], $cls;
+}
+
+# returns true on unseen messages according to the deduplication strategy,
+# returns false if seen
+sub is_dup {
+ my ($self, $eml, $oid) = @_;
+ !$self->[1]->($eml, $oid);
+}
+
+sub is_smsg_dup {
+ my ($self, $smsg) = @_;
+ !$self->[2]->($smsg);
+}
+
+sub prepare_dedupe {
+ my ($self) = @_;
+ my $skv = $self->[0];
+ $self->[1] or @$self[1,2] = $self->can($self->[3])->($skv);
+ $skv ? $skv->dbh : undef;
+}
+
+sub pause_dedupe {
+ my ($self) = @_;
+ my $skv = $self->[0];
+ delete($skv->{dbh}) if $skv;
+}
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# *-external commands of lei
+package PublicInbox::LeiExternal;
+use strict;
+use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external);
+use PublicInbox::Config;
+
+sub _externals_each {
+ my ($self, $cb, @arg) = @_;
+ my $cfg = $self->_lei_cfg(0);
+ my %boost;
+ for my $sec (grep(/\Aexternal\./, @{$cfg->{-section_order}})) {
+ my $loc = substr($sec, length('external.'));
+ $boost{$loc} = $cfg->{"$sec.boost"};
+ }
+ return \%boost if !wantarray && !$cb;
+
+ # highest boost first, but stable for alphabetic tie break
+ use sort 'stable';
+ my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
+ return @order if !$cb;
+ for my $loc (@order) {
+ $cb->(@arg, $loc, $boost{$loc});
+ }
+ @order; # scalar or array
+}
+
+sub lei_ls_external {
+ my ($self, @argv) = @_;
+ my $out = $self->{1};
+ my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+ $self->_externals_each(sub {
+ my ($loc, $boost_val) = @_;
+ print $out $loc, $OFS, 'boost=', $boost_val, $ORS;
+ });
+}
+
+sub _canonicalize {
+ my ($location) = @_;
+ if ($location !~ m!\Ahttps?://!) {
+ PublicInbox::Config::rel2abs_collapsed($location);
+ } else {
+ require URI;
+ my $uri = URI->new($location)->canonical;
+ my $path = $uri->path . '/';
+ $path =~ tr!/!/!s; # squeeze redundant '/'
+ $uri->path($path);
+ $uri->as_string;
+ }
+}
+
+sub lei_add_external {
+ my ($self, $location) = @_;
+ my $cfg = $self->_lei_cfg(1);
+ my $new_boost = $self->{opt}->{boost} // 0;
+ $location = _canonicalize($location);
+ if ($location !~ m!\Ahttps?://! && !-d $location) {
+ return $self->fail("$location not a directory");
+ }
+ my $key = "external.$location.boost";
+ my $cur_boost = $cfg->{$key};
+ return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
+ $self->lei_config($key, $new_boost);
+ $self->_lei_store(1)->done; # just create the store
+}
+
+sub lei_forget_external {
+ my ($self, @locations) = @_;
+ my $cfg = $self->_lei_cfg(1);
+ my $quiet = $self->{opt}->{quiet};
+ my %seen;
+ for my $loc (@locations) {
+ my (@unset, @not_found);
+ for my $l ($loc, _canonicalize($loc)) {
+ next if $seen{$l}++;
+ my $key = "external.$l.boost";
+ delete($cfg->{$key});
+ $self->_config('--unset', $key);
+ if ($? == 0) {
+ push @unset, $l;
+ } elsif (($? >> 8) == 5) {
+ push @not_found, $l;
+ } else {
+ $self->err("# --unset $key error");
+ return $self->x_it($?);
+ }
+ }
+ if (@unset) {
+ next if $quiet;
+ $self->err("# $_ gone") for @unset;
+ } elsif (@not_found) {
+ $self->err("# $_ not found") for @not_found;
+ } # else { already exited
+ }
+}
+
+# shell completion helper called by lei__complete
+sub _complete_forget_external {
+ my ($self, @argv) = @_;
+ my $cfg = $self->_lei_cfg(0);
+ my $cur = pop @argv;
+ # Workaround bash word-splitting URLs to ['https', ':', '//' ...]
+ # Maybe there's a better way to go about this in
+ # contrib/completion/lei-completion.bash
+ my $re = '';
+ if (@argv) {
+ my @x = @argv;
+ if ($cur eq ':' && @x) {
+ push @x, $cur;
+ $cur = '';
+ }
+ while (@x > 2 && $x[0] !~ /\Ahttps?\z/ && $x[1] ne ':') {
+ shift @x;
+ }
+ if (@x >= 2) { # qw(https : hostname : 443) or qw(http :)
+ $re = join('', @x);
+ } else { # just filter out the flags and hope for the best
+ $re = join('', grep(!/^-/, @argv));
+ }
+ $re = quotemeta($re);
+ }
+ # FIXME: bash completion off "http:" or "https:" when the last
+ # character is a colon doesn't work properly even if we're
+ # returning "//$HTTP_HOST/$PATH_INFO/", not sure why, could
+ # be a bash issue.
+ map {
+ my $x = substr($_, length('external.'));
+ # only return the part specified on the CLI
+ if ($x =~ /\A$re(\Q$cur\E.*)/) {
+ # don't duplicate if already 100% completed
+ $cur eq $1 ? () : $1;
+ } else {
+ ();
+ }
+ } grep(/\Aexternal\.$re\Q$cur/, @{$cfg->{-section_order}});
+}
+
+1;
--- /dev/null
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# per-mitem/smsg iterators for search results
+# "ovv" => "Overview viewer"
+package PublicInbox::LeiOverview;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock);
+use POSIX qw(strftime);
+use Fcntl qw(F_GETFL O_APPEND);
+use File::Spec;
+use File::Temp ();
+use PublicInbox::MID qw($MID_EXTRACT);
+use PublicInbox::Address qw(pairs);
+use PublicInbox::Config;
+use PublicInbox::Search qw(get_pct);
+use PublicInbox::LeiDedupe;
+use PublicInbox::LeiToMail;
+
+# cf. https://en.wikipedia.org/wiki/JSON_streaming
+my $JSONL = 'ldjson|ndjson|jsonl'; # 3 names for the same thing
+
+sub _iso8601 ($) { strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($_[0])) }
+
+# we open this in the parent process before ->wq_do handoff
+sub ovv_out_lk_init ($) {
+ my ($self) = @_;
+ $self->{tmp_lk_id} = "$self.$$";
+ my $tmp = File::Temp->new("lei-ovv.dst.$$.lock-XXXXXX",
+ TMPDIR => 1, UNLINK => 0);
+ $self->{lock_path} = $tmp->filename;
+}
+
+sub ovv_out_lk_cancel ($) {
+ my ($self) = @_;
+ ($self->{tmp_lk_id}//'') eq "$self.$$" and
+ unlink(delete($self->{lock_path}));
+}
+
+sub detect_fmt ($$) {
+ my ($lei, $dst) = @_;
+ if ($dst =~ m!\A([:/]+://)!) {
+ $lei->fail("$1 support not implemented, yet\n");
+ } elsif (!-e $dst || -d _) {
+ 'maildir'; # the default TODO: MH?
+ } elsif (-f _ || -p _) {
+ $lei->fail("unable to determine mbox family of $dst\n");
+ } else {
+ $lei->fail("unable to determine format of $dst\n");
+ }
+}
+
+sub new {
+ my ($class, $lei) = @_;
+ my $opt = $lei->{opt};
+ my $dst = $opt->{output} // '-';
+ $dst = '/dev/stdout' if $dst eq '-';
+
+ my $fmt = $opt->{'format'};
+ $fmt = lc($fmt) if defined $fmt;
+ if ($dst =~ s/\A([a-z0-9]+)://is) { # e.g. Maildir:/home/user/Mail/
+ my $ofmt = lc $1;
+ $fmt //= $ofmt;
+ return $lei->fail(<<"") if $fmt ne $ofmt;
+--format=$fmt and --output=$ofmt conflict
+
+ }
+ $fmt //= 'json' if $dst eq '/dev/stdout';
+ $fmt //= detect_fmt($lei, $dst) or return;
+
+ if (index($dst, '://') < 0) { # not a URL, so assume path
+ $dst = File::Spec->canonpath($dst);
+ } # else URL
+
+ my $self = bless { fmt => $fmt, dst => $dst }, $class;
+ $lei->{ovv} = $self;
+ my $json;
+ if ($fmt =~ /\A($JSONL|(?:concat)?json)\z/) {
+ $json = $self->{json} = ref(PublicInbox::Config->json);
+ }
+ my ($isatty, $seekable);
+ if ($dst eq '/dev/stdout') {
+ $isatty = -t $lei->{1};
+ $lei->start_pager if $isatty;
+ $opt->{pretty} //= $isatty;
+ if (!$isatty && -f _) {
+ my $fl = fcntl($lei->{1}, F_GETFL, 0) //
+ return $lei->fail("fcntl(stdout): $!");
+ ovv_out_lk_init($self) unless ($fl & O_APPEND);
+ } else {
+ ovv_out_lk_init($self);
+ }
+ }
+ if (!$json) {
+ # default to the cheapest sort since MUA usually resorts
+ $lei->{opt}->{'sort'} //= 'docid' if $dst ne '/dev/stdout';
+ $lei->{l2m} = eval { PublicInbox::LeiToMail->new($lei) };
+ return $lei->fail($@) if $@;
+ }
+ $lei->{dedupe} //= PublicInbox::LeiDedupe->new($lei);
+ $self;
+}
+
+# called once by parent
+sub ovv_begin {
+ my ($self, $lei) = @_;
+ if ($self->{fmt} eq 'json') {
+ print { $lei->{1} } '[';
+ } # TODO HTML/Atom/...
+}
+
+# called once by parent (via PublicInbox::EOFpipe)
+sub ovv_end {
+ my ($self, $lei) = @_;
+ my $out = $lei->{1} or return;
+ if ($self->{fmt} eq 'json') {
+ # JSON doesn't allow trailing commas, and preventing
+ # trailing commas is a PITA when parallelizing outputs
+ print $out "null]\n";
+ } elsif ($self->{fmt} eq 'concatjson') {
+ print $out "\n";
+ }
+}
+
+sub ovv_atfork_child {
+ my ($self) = @_;
+ # reopen dedupe here
+}
+
+# prepares an smsg for JSON
+sub _unbless_smsg {
+ my ($smsg, $mitem) = @_;
+
+ delete @$smsg{qw(lines bytes num tid)};
+ $smsg->{rt} = _iso8601(delete $smsg->{ts}); # JMAP receivedAt
+ $smsg->{dt} = _iso8601(delete $smsg->{ds}); # JMAP UTCDate
+ $smsg->{pct} = get_pct($mitem) if $mitem;
+ if (my $r = delete $smsg->{references}) {
+ $smsg->{refs} = [ map { "<$_>" } ($r =~ m/$MID_EXTRACT/go) ];
+ }
+ if (my $m = delete($smsg->{mid})) {
+ $smsg->{'m'} = "<$m>";
+ }
+ for my $f (qw(from to cc)) {
+ my $v = delete $smsg->{$f} or next;
+ $smsg->{substr($f, 0, 1)} = pairs($v);
+ }
+ $smsg->{'s'} = delete $smsg->{subject};
+ # can we be bothered to parse From/To/Cc into arrays?
+ scalar { %$smsg }; # unbless
+}
+
+sub ovv_atexit_child {
+ my ($self, $lei) = @_;
+ if (my $l2m = delete $lei->{l2m}) {
+ # gracefully stop lei2mail processes after all
+ # ->write_mail work is complete
+ delete $l2m->{-wq_s1};
+ if (my $rd = delete $l2m->{each_smsg_done}) {
+ read($rd, my $buf, 1); # wait for EOF
+ }
+ }
+ # order matters, git->{-tmp}->DESTROY must not fire until
+ # {each_smsg_done} hits EOF above
+ if (my $git = delete $self->{git}) {
+ $git->async_wait_all;
+ }
+ if (my $bref = delete $lei->{ovv_buf}) {
+ my $out = $lei->{1} or return;
+ my $lk = $self->lock_for_scope;
+ print $out $$bref;
+ }
+}
+
+# JSON module ->pretty output wastes too much vertical white space,
+# this (IMHO) provides better use of screen real-estate while not
+# being excessively compact:
+sub _json_pretty {
+ my ($json, $k, $v) = @_;
+ if (ref $v eq 'ARRAY') {
+ if (@$v) {
+ my $sep = ",\n" . (' ' x (length($k) + 7));
+ if (ref($v->[0])) { # f/t/c
+ $v = '[' . join($sep, map {
+ my $pair = $json->encode($_);
+ $pair =~ s/(null|"),"/$1, "/g;
+ $pair;
+ } @$v) . ']';
+ } else { # references
+ $v = '[' . join($sep, map {
+ substr($json->encode([$_]), 1, -1);
+ } @$v) . ']';
+ }
+ } else {
+ $v = '[]';
+ }
+ }
+ qq{ "$k": }.$v;
+}
+
+sub ovv_each_smsg_cb { # runs in wq worker usually
+ my ($self, $lei, $ibxish) = @_;
+ my $json;
+ $lei->{1}->autoflush(1);
+ if (my $pkg = $self->{json}) {
+ $json = $pkg->new;
+ $json->utf8->canonical;
+ $json->ascii(1) if $lei->{opt}->{ascii};
+ }
+ my $l2m = $lei->{l2m};
+ if ($l2m && !$ibxish) { # remote https?:// mboxrd
+ delete $l2m->{-wq_s1};
+ my $g2m = $l2m->can('git_to_mail');
+ my $wcb = $l2m->write_cb($lei);
+ sub {
+ my ($smsg, undef, $eml) = @_; # no mitem in $_[1]
+ $wcb->(undef, $smsg, $eml);
+ };
+ } elsif ($l2m && $l2m->{-wq_s1}) {
+ my ($lei_ipc, @io) = $lei->atfork_parent_wq($l2m);
+ # n.b. $io[0] = qry_status_wr, $io[1] = mbox|stdout,
+ # $io[4] becomes a notification pipe that triggers EOF
+ # in this wq worker when all outstanding ->write_mail
+ # calls are complete
+ die "BUG: \$io[4] $io[4] unexpected" if $io[4];
+ pipe($l2m->{each_smsg_done}, $io[4]) or die "pipe: $!";
+ fcntl($io[4], 1031, 4096) if $^O eq 'linux';
+ delete @$lei_ipc{qw(l2m opt mset_opt cmd)};
+ my $git = $ibxish->git; # (LeiXSearch|Inbox|ExtSearch)->git
+ $self->{git} = $git;
+ my $git_dir = $git->{git_dir};
+ sub {
+ my ($smsg, $mitem) = @_;
+ $smsg->{pct} = get_pct($mitem) if $mitem;
+ $l2m->wq_do('write_mail', \@io, $git_dir, $smsg,
+ $lei_ipc);
+ }
+ } elsif ($l2m) {
+ my $wcb = $l2m->write_cb($lei);
+ my $git = $ibxish->git; # (LeiXSearch|Inbox|ExtSearch)->git
+ $self->{git} = $git; # for ovv_atexit_child
+ my $g2m = $l2m->can('git_to_mail');
+ sub {
+ my ($smsg, $mitem) = @_;
+ $smsg->{pct} = get_pct($mitem) if $mitem;
+ $git->cat_async($smsg->{blob}, $g2m, [ $wcb, $smsg ]);
+ };
+ } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) {
+ my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
+ $lei->{ovv_buf} = \(my $buf = '');
+ sub { # DIY prettiness :P
+ my ($smsg, $mitem) = @_;
+ $smsg = _unbless_smsg($smsg, $mitem);
+ $buf .= "{\n";
+ $buf .= join(",\n", map {
+ my $v = $smsg->{$_};
+ if (ref($v)) {
+ _json_pretty($json, $_, $v);
+ } else {
+ $v = $json->encode([$v]);
+ qq{ "$_": }.substr($v, 1, -1);
+ }
+ } sort keys %$smsg);
+ $buf .= $EOR;
+ if (length($buf) > 65536) {
+ my $lk = $self->lock_for_scope;
+ print { $lei->{1} } $buf;
+ $buf = '';
+ }
+ }
+ } elsif ($json) {
+ my $ORS = $self->{fmt} eq 'json' ? ",\n" : "\n"; # JSONL
+ $lei->{ovv_buf} = \(my $buf = '');
+ sub {
+ my ($smsg, $mitem) = @_;
+ $buf .= $json->encode(_unbless_smsg(@_)) . $ORS;
+ if (length($buf) > 65536) {
+ my $lk = $self->lock_for_scope;
+ print { $lei->{1} } $buf;
+ $buf = '';
+ }
+ }
+ } elsif ($self->{fmt} eq 'oid') {
+ sub {
+ my ($smsg, $mitem) = @_;
+ }
+ } # else { ...
+}
+
+no warnings 'once';
+*DESTROY = \&ovv_out_lk_cancel;
+
+1;
--- /dev/null
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# handles lei <q|ls-query|rm-query|mv-query> commands
+package PublicInbox::LeiQuery;
+use strict;
+use v5.10.1;
+use PublicInbox::DS qw(dwaitpid);
+
+# the main "lei q SEARCH_TERMS" method
+sub lei_q {
+ my ($self, @argv) = @_;
+ require PublicInbox::LeiXSearch;
+ require PublicInbox::LeiOverview;
+ PublicInbox::Config->json; # preload before forking
+ my $opt = $self->{opt};
+ my $lxs = $self->{lxs} = PublicInbox::LeiXSearch->new;
+ # any number of LeiXSearch || LeiSearch || Inbox
+ if ($opt->{'local'} //= 1) { # --local is enabled by default
+ my $sto = $self->_lei_store(1);
+ $lxs->prepare_external($sto->search);
+ }
+
+ # --external is enabled by default, but allow --no-external
+ if ($opt->{external} //= 1) {
+ my $cb = $lxs->can('prepare_external');
+ my $ne = $self->_externals_each($cb, $lxs);
+ $opt->{remote} //= $ne == $lxs->remotes;
+ if ($opt->{'local'}) {
+ delete($lxs->{remotes}) if !$opt->{remote};
+ } else {
+ delete($lxs->{locals});
+ }
+ }
+ unless ($lxs->locals || $lxs->remotes) {
+ return $self->fail('no local or remote inboxes to search');
+ }
+ my $xj = $lxs->concurrency($opt);
+ my $ovv = PublicInbox::LeiOverview->new($self) or return;
+ $self->atfork_prepare_wq($lxs);
+ $lxs->wq_workers_start('lei_xsearch', $xj, $self->oldset);
+ delete $lxs->{-ipc_atfork_child_close};
+ if (my $l2m = $self->{l2m}) {
+ my $mj = 4; # TODO: configurable
+ $self->atfork_prepare_wq($l2m);
+ $l2m->wq_workers_start('lei2mail', $mj, $self->oldset);
+ delete $l2m->{-ipc_atfork_child_close};
+ }
+
+ # no forking workers after this
+
+ my %mset_opt = map { $_ => $opt->{$_} } qw(thread limit offset);
+ $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
+ $mset_opt{qstr} = join(' ', map {;
+ # Consider spaces in argv to be for phrase search in Xapian.
+ # In other words, the users should need only care about
+ # normal shell quotes and not have to learn Xapian quoting.
+ /\s/ ? (s/\A(\w+:)// ? qq{$1"$_"} : qq{"$_"}) : $_
+ } @argv);
+ if (defined(my $sort = $opt->{'sort'})) {
+ if ($sort eq 'relevance') {
+ $mset_opt{relevance} = 1;
+ } elsif ($sort eq 'docid') {
+ $mset_opt{relevance} = $mset_opt{asc} ? -1 : -2;
+ } elsif ($sort =~ /\Areceived(?:-?[aA]t)?\z/) {
+ # the default
+ } else {
+ die "unrecognized --sort=$sort\n";
+ }
+ }
+ # descending docid order
+ $mset_opt{relevance} //= -2 if $opt->{thread};
+ $self->{mset_opt} = \%mset_opt;
+ $ovv->ovv_begin($self);
+ $lxs->do_query($self);
+}
+
+# Stuff we may pass through to curl (as of 7.64.0), see curl manpage for
+# details, so most options which make sense for HTTP/HTTPS (including proxy
+# support for Tor and other methods of getting past weird networks).
+# Most of these are untested by us, some may not make sense for our use case
+# and typos below are likely.
+# n.b. some short options (-$NUMBER) are not supported since they conflict
+# with other "lei q" switches.
+# FIXME: Getopt::Long doesn't easily let us support support options with
+# '.' in them (e.g. --http1.1)
+sub curl_opt { qw(
+ abstract-unix-socket=s anyauth basic cacert=s capath=s
+ cert-status cert-type cert|E=s ciphers=s config|K=s@
+ connect-timeout=s connect-to=s cookie-jar|c=s cookie|b=s crlfile=s
+ digest disable dns-interface=s dns-ipv4-addr=s dns-ipv6-addr=s
+ dns-servers=s doh-url=s egd-file=s engine=s false-start
+ happy-eyeballs-timeout-ms=s haproxy-protocol header|H=s@
+ http2-prior-knowledge http2 insecure|k
+ interface=s ipv4 ipv6 junk-session-cookies
+ key-type=s key=s limit-rate=s local-port=s location-trusted location|L
+ max-redirs=i max-time=s negotiate netrc-file=s netrc-optional netrc
+ no-alpn no-buffer|N no-npn no-sessionid noproxy=s ntlm-wb ntlm
+ pass=s pinnedpubkey=s post301 post302 post303 preproxy=s
+ proxy-anyauth proxy-basic proxy-cacert=s proxy-capath=s
+ proxy-cert-type=s proxy-cert=s proxy-ciphers=s proxy-crlfile=s
+ proxy-digest proxy-header=s@ proxy-insecure
+ proxy-key-type=s proxy-key proxy-negotiate proxy-ntlm proxy-pass=s
+ proxy-pinnedpubkey=s proxy-service-name=s proxy-ssl-allow-beast
+ proxy-tls13-ciphers=s proxy-tlsauthtype=s proxy-tlspassword=s
+ proxy-tlsuser=s proxy-tlsv1 proxy-user|U=s proxy=s
+ proxytunnel=s pubkey=s random-file=s referer=s resolve=s
+ retry-connrefused retry-delay=s retry-max-time=s retry=i
+ sasl-ir service-name=s socks4=s socks4a=s socks5-basic
+ socks5-gssapi-service-name=s socks5-gssapi socks5-hostname=s socks5=s
+ speed-limit|Y speed-type|y ssl-allow-beast sslv2 sslv3
+ suppress-connect-headers tcp-fastopen tls-max=s
+ tls13-ciphers=s tlsauthtype=s tlspassword=s tlsuser=s
+ tlsv1 trace-ascii=s trace-time trace=s
+ unix-socket=s user-agent|A=s user|u=s
+)
+}
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::LeiSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::ExtSearch);
+use PublicInbox::Search qw(xap_terms);
+
+# get combined docid from over.num:
+# (not generic Xapian, only works with our sharding scheme)
+sub num2docid ($$) {
+ my ($self, $num) = @_;
+ my $nshard = $self->{nshard};
+ ($num - 1) * $nshard + $num % $nshard + 1;
+}
+
+sub msg_keywords {
+ my ($self, $num) = @_; # num_or_mitem
+ my $xdb = $self->xdb; # set {nshard};
+ my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
+ my $kw = xap_terms('K', $xdb, $docid);
+ warn "E: #$docid ($num): $@\n" if $@;
+ wantarray ? sort(keys(%$kw)) : $kw;
+}
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Local storage (cache/memo) for lei(1), suitable for personal/private
+# mail iff on encrypted device/FS. Based on v2, but only deduplicates
+# based on git OID.
+#
+# for xref3, the following are constant: $eidx_key = '.', $xnum = -1
+package PublicInbox::LeiStore;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock PublicInbox::IPC);
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Import;
+use PublicInbox::InboxWritable;
+use PublicInbox::V2Writable;
+use PublicInbox::ContentHash qw(content_hash content_digest);
+use PublicInbox::MID qw(mids mids_in);
+use PublicInbox::LeiSearch;
+use List::Util qw(max);
+
+sub new {
+ my (undef, $dir, $opt) = @_;
+ my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt);
+ my $self = bless { priv_eidx => $eidx }, __PACKAGE__;
+ eidx_init($self)->done if $opt->{creat};
+ $self;
+}
+
+sub git { $_[0]->{priv_eidx}->git } # read-only
+
+sub packing_factor { $PublicInbox::V2Writable::PACKING_FACTOR }
+
+sub rotate_bytes {
+ $_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor)
+}
+
+sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" };
+
+sub git_epoch_max {
+ my ($self) = @_;
+ if (opendir(my $dh, $self->git_pfx)) {
+ max(map {
+ substr($_, 0, -4) + 0; # drop ".git" suffix
+ } grep(/\A[0-9]+\.git\z/, readdir($dh))) // 0;
+ } else {
+ $!{ENOENT} ? 0 : die("opendir ${\$self->git_pfx}: $!\n");
+ }
+}
+
+sub git_ident ($) {
+ my ($git) = @_;
+ chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT)));
+ warn "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?;
+ $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ ? ($1, $2) :
+ ('lei user', 'x@example.com')
+}
+
+sub importer {
+ my ($self) = @_;
+ my $max;
+ my $im = $self->{im};
+ if ($im) {
+ return $im if $im->{bytes_added} < $self->rotate_bytes;
+
+ delete $self->{im};
+ $im->done;
+ undef $im;
+ $self->checkpoint;
+ $max = $self->git_epoch_max + 1;
+ }
+ my $pfx = $self->git_pfx;
+ $max //= $self->git_epoch_max;
+ while (1) {
+ my $latest = "$pfx/$max.git";
+ my $old = -e $latest;
+ PublicInbox::Import::init_bare($latest);
+ my $git = PublicInbox::Git->new($latest);
+ $git->qx(qw(config core.sharedRepository 0600)) if !$old;
+ my $packed_bytes = $git->packed_bytes;
+ my $unpacked_bytes = $packed_bytes / $self->packing_factor;
+ if ($unpacked_bytes >= $self->rotate_bytes) {
+ $max++;
+ next;
+ }
+ my ($n, $e) = git_ident($git);
+ $self->{im} = $im = PublicInbox::Import->new($git, $n, $e);
+ $im->{bytes_added} = int($packed_bytes / $self->packing_factor);
+ $im->{lock_path} = undef;
+ $im->{path_type} = 'v2';
+ return $im;
+ }
+}
+
+sub search {
+ PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir});
+}
+
+sub eidx_init {
+ my ($self) = @_;
+ my $eidx = $self->{priv_eidx};
+ $eidx->idx_init({-private => 1});
+ $eidx;
+}
+
+# when a message has no Message-IDs at all, this is needed for
+# unsent Draft messages, at least
+sub _fake_mid_for ($$) {
+ my ($eml, $dig) = @_;
+ my $mids = mids_in($eml, qw(X-Alt-Message-ID Resent-Message-ID));
+ $eml->{-lei_fake_mid} =
+ $mids->[0] // PublicInbox::Import::digest2mid($dig, $eml);
+}
+
+sub _docids_for ($$) {
+ my ($self, $eml) = @_;
+ my %docids;
+ my $dig = content_digest($eml);
+ my $chash = $dig->clone->digest;
+ my $eidx = eidx_init($self);
+ my $oidx = $eidx->{oidx};
+ my $im = $self->{im};
+ my $mids = mids($eml);
+ $mids->[0] //= _fake_mid_for($eml, $dig);
+ for my $mid (@$mids) {
+ my ($id, $prev);
+ while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) {
+ my $oid = $cur->{blob};
+ my $docid = $cur->{num};
+ my $bref = $im ? $im->cat_blob($oid) : undef;
+ $bref //= $eidx->git->cat_file($oid) // do {
+ warn "W: $oid (#$docid) <$mid> not found\n";
+ next;
+ };
+ local $self->{current_info} = $oid;
+ my $x = PublicInbox::Eml->new($bref);
+ $docids{$docid} = $docid if content_hash($x) eq $chash;
+ }
+ }
+ sort { $a <=> $b } values %docids;
+}
+
+sub set_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->ipc_do('set_keywords', $docid, @kw);
+ }
+ \@docids;
+}
+
+sub add_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->ipc_do('add_keywords', $docid, @kw);
+ }
+ \@docids;
+}
+
+sub remove_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->ipc_do('remove_keywords', $docid, @kw)
+ }
+ \@docids;
+}
+
+# cf: https://doc.dovecot.org/configuration_manual/mail_location/mbox/
+my %status2kw = (F => 'flagged', A => 'answered', R => 'seen', T => 'draft');
+# O (old/non-recent), and D (deleted) aren't in JMAP,
+# so probably won't be supported by us.
+sub mbox_keywords {
+ my $eml = $_[-1];
+ my $s = "@{[$eml->header_raw('X-Status'),$eml->header_raw('Status')]}";
+ my %kw;
+ $s =~ s/([FART])/$kw{$status2kw{$1}} = 1/sge;
+ sort(keys %kw);
+}
+
+# cf: https://cr.yp.to/proto/maildir.html
+my %c2kw = ('D' => 'draft', F => 'flagged', R => 'answered', S => 'seen');
+sub maildir_keywords {
+ $_[-1] =~ /:2,([A-Z]+)\z/i ?
+ sort(map { $c2kw{$_} // () } split(//, $1)) : ();
+}
+
+sub add_eml {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my $oidx = $eidx->{oidx};
+ my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg';
+ my $im = $self->importer;
+ $im->add($eml, undef, $smsg) or return; # duplicate returns undef
+
+ local $self->{current_info} = $smsg->{blob};
+ if (my @docids = _docids_for($self, $eml)) {
+ for my $docid (@docids) {
+ my $idx = $eidx->idx_shard($docid);
+ $oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
+ # add_eidx_info for List-Id
+ $idx->ipc_do('add_eidx_info', $docid, '.', $eml);
+ $idx->ipc_do('add_keywords', $docid, @kw) if @kw;
+ }
+ \@docids;
+ } else {
+ $smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
+ $oidx->add_overview($eml, $smsg);
+ $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
+ my $idx = $eidx->idx_shard($smsg->{num});
+ $idx->index_eml($eml, $smsg);
+ $idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw;
+ $smsg;
+ }
+}
+
+sub set_eml {
+ my ($self, $eml, @kw) = @_;
+ add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw);
+}
+
+sub done {
+ my ($self) = @_;
+ my $err = '';
+ if (my $im = delete($self->{im})) {
+ eval { $im->done };
+ if ($@) {
+ $err .= "import done: $@\n";
+ warn $err;
+ }
+ }
+ $self->{priv_eidx}->done;
+ die $err if $err;
+}
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Writes PublicInbox::Eml objects atomically to a mbox variant or Maildir
+package PublicInbox::LeiToMail;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::Eml;
+use PublicInbox::Lock;
+use PublicInbox::ProcessPipe;
+use PublicInbox::Spawn qw(which spawn popen_rd);
+use PublicInbox::LeiDedupe;
+use PublicInbox::OnDestroy;
+use Symbol qw(gensym);
+use IO::Handle; # ->autoflush
+use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
+use Errno qw(EEXIST ESPIPE ENOENT);
+use PublicInbox::Git;
+
+my %kw2char = ( # Maildir characters
+ draft => 'D',
+ flagged => 'F',
+ answered => 'R',
+ seen => 'S'
+);
+
+my %kw2status = (
+ flagged => [ 'X-Status' => 'F' ],
+ answered => [ 'X-Status' => 'A' ],
+ seen => [ 'Status' => 'R' ],
+ draft => [ 'X-Status' => 'T' ],
+);
+
+sub _mbox_hdr_buf ($$$) {
+ my ($eml, $type, $smsg) = @_;
+ $eml->header_set($_) for (qw(Lines Bytes Content-Length));
+
+ # Messages are always 'O' (non-\Recent in IMAP), it saves
+ # MUAs the trouble of rewriting the mbox if no other
+ # changes are made
+ my %hdr = (Status => [ 'O' ]); # set Status, X-Status
+ for my $k (@{$smsg->{kw} // []}) {
+ if (my $ent = $kw2status{$k}) {
+ push @{$hdr{$ent->[0]}}, $ent->[1];
+ } else { # X-Label?
+ warn "TODO: keyword `$k' not supported for mbox\n";
+ }
+ }
+ while (my ($name, $chars) = each %hdr) {
+ $eml->header_set($name, join('', sort @$chars));
+ }
+ my $buf = delete $eml->{hdr};
+
+ # fixup old bug from import (pre-a0c07cba0e5d8b6a)
+ $$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+ my $ident = $smsg->{blob} // 'lei';
+ if (defined(my $pct = $smsg->{pct})) { $ident .= "=$pct" }
+
+ substr($$buf, 0, 0, # prepend From line
+ "From $ident\@$type Thu Jan 1 00:00:00 1970$eml->{crlf}");
+ $buf;
+}
+
+sub atomic_append { # for on-disk destinations (O_APPEND, or O_EXCL)
+ my ($fh, $buf) = @_;
+ defined(my $w = syswrite($fh, $$buf)) or die "write: $!";
+ $w == length($$buf) or die "short write: $w != ".length($$buf);
+}
+
+sub _print_full {
+ my ($fh, $buf) = @_;
+ print $fh $$buf or die "print: $!";
+}
+
+sub eml2mboxrd ($;$) {
+ my ($eml, $smsg) = @_;
+ my $buf = _mbox_hdr_buf($eml, 'mboxrd', $smsg);
+ if (my $bdy = delete $eml->{bdy}) {
+ $$bdy =~ s/^(>*From )/>$1/gm;
+ $$buf .= $eml->{crlf};
+ substr($$bdy, 0, 0, $$buf); # prepend header
+ $buf = $bdy;
+ }
+ $$buf .= $eml->{crlf};
+ $buf;
+}
+
+sub eml2mboxo {
+ my ($eml, $smsg) = @_;
+ my $buf = _mbox_hdr_buf($eml, 'mboxo', $smsg);
+ if (my $bdy = delete $eml->{bdy}) {
+ $$bdy =~ s/^From />From /gm;
+ $$buf .= $eml->{crlf};
+ substr($$bdy, 0, 0, $$buf); # prepend header
+ $buf = $bdy;
+ }
+ $$buf .= $eml->{crlf};
+ $buf;
+}
+
+sub _mboxcl_common ($$$) {
+ my ($buf, $bdy, $crlf) = @_;
+ # add Lines: so mutt won't have to add it on MUA close
+ my $lines = $$bdy =~ tr!\n!\n!;
+ $$buf .= 'Content-Length: '.length($$bdy).$crlf.
+ 'Lines: '.$lines.$crlf.$crlf;
+ substr($$bdy, 0, 0, $$buf); # prepend header
+ $_[0] = $bdy;
+}
+
+# mboxcl still escapes "From " lines
+sub eml2mboxcl {
+ my ($eml, $smsg) = @_;
+ my $buf = _mbox_hdr_buf($eml, 'mboxcl', $smsg);
+ my $crlf = $eml->{crlf};
+ if (my $bdy = delete $eml->{bdy}) {
+ $$bdy =~ s/^From />From /gm;
+ _mboxcl_common($buf, $bdy, $crlf);
+ }
+ $$buf .= $crlf;
+ $buf;
+}
+
+# mboxcl2 has no "From " escaping
+sub eml2mboxcl2 {
+ my ($eml, $smsg) = @_;
+ my $buf = _mbox_hdr_buf($eml, 'mboxcl2', $smsg);
+ my $crlf = $eml->{crlf};
+ if (my $bdy = delete $eml->{bdy}) {
+ _mboxcl_common($buf, $bdy, $crlf);
+ }
+ $$buf .= $crlf;
+ $buf;
+}
+
+sub git_to_mail { # git->cat_async callback
+ my ($bref, $oid, $type, $size, $arg) = @_;
+ if ($type ne 'blob') {
+ if ($type eq 'missing') {
+ warn "missing $oid\n";
+ } else {
+ warn "unexpected type=$type for $oid\n";
+ }
+ }
+ my ($write_cb, $smsg) = @$arg;
+ if ($smsg->{blob} ne $oid) {
+ die "BUG: expected=$smsg->{blob} got=$oid";
+ }
+ $write_cb->($bref, $smsg) if $size > 0;
+}
+
+sub reap_compress { # dwaitpid callback
+ my ($lei, $pid) = @_;
+ my $cmd = delete $lei->{"pid.$pid"};
+ return if $? == 0;
+ $lei->fail("@$cmd failed", $? >> 8);
+}
+
+# all of these support -c for stdout and -d for decompression,
+# mutt is commonly distributed with hooks for gz, bz2 and xz, at least
+# { foo => '' } means "--foo" is passed to the command-line,
+# otherwise { foo => '--bar' } passes "--bar"
+our %zsfx2cmd = (
+ gz => [ qw(GZIP pigz gzip), { rsyncable => '', threads => '-p' } ],
+ bz2 => [ 'bzip2', {} ],
+ xz => [ 'xz', { threads => '-T' } ],
+ # XXX does anybody care for these? I prefer zstd on entire FSes,
+ # so it's probably not necessary on a per-file basis
+ # zst => [ 'zstd', { -default => [ qw(-q) ], # it's noisy by default
+ # rsyncable => '', threads => '-T' } ],
+ # zz => [ 'pigz', { -default => [ '--zlib' ],
+ # rsyncable => '', threads => '-p' }],
+ # lzo => [ 'lzop', {} ],
+ # lzma => [ 'lzma', {} ],
+);
+
+sub zsfx2cmd ($$$) {
+ my ($zsfx, $decompress, $lei) = @_;
+ my $x = $zsfx2cmd{$zsfx} // die "no support for suffix=.$zsfx";
+ my @info = @$x;
+ my $cmd_opt = pop @info;
+ my @cmd = (undef, $decompress ? qw(-dc) : qw(-c));
+ for my $exe (@info) {
+ # I think respecting client's ENV{GZIP} is OK, not sure
+ # about ENV overrides for other, less-common compressors
+ if ($exe eq uc($exe)) {
+ $exe = $lei->{env}->{$exe} or next;
+ }
+ $cmd[0] = which($exe) and last;
+ }
+ $cmd[0] // die join(' or ', @info)." missing for .$zsfx";
+ # push @cmd, @{$cmd_opt->{-default}} if $cmd_opt->{-default};
+ for my $bool (qw(rsyncable)) {
+ my $switch = $cmd_opt->{rsyncable} // next;
+ push @cmd, '--'.($switch || $bool);
+ }
+ for my $key (qw(threads)) { # support compression level?
+ my $switch = $cmd_opt->{$key} // next;
+ my $val = $lei->{opt}->{$key} // next;
+ push @cmd, $switch, $val;
+ }
+ \@cmd;
+}
+
+sub _post_augment_mbox { # open a compressor process
+ my ($self, $lei, $zpipe) = @_;
+ my $zsfx = $self->{zsfx} or return;
+ my $cmd = zsfx2cmd($zsfx, undef, $lei);
+ my ($r, $w) = splice(@$zpipe, 0, 2);
+ my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2} };
+ my $pid = spawn($cmd, $lei->{env}, $rdr);
+ my $pp = gensym;
+ my $dup = bless { "pid.$pid" => $cmd }, ref($lei);
+ $dup->{$_} = $lei->{$_} for qw(2 sock);
+ tie *$pp, 'PublicInbox::ProcessPipe', $pid, $w, \&reap_compress, $dup;
+ $lei->{1} = $pp;
+ die 'BUG: unexpected {ovv}->{lock_path}' if $lei->{ovv}->{lock_path};
+ $lei->{ovv}->ovv_out_lk_init;
+}
+
+sub decompress_src ($$$) {
+ my ($in, $zsfx, $lei) = @_;
+ my $cmd = zsfx2cmd($zsfx, 1, $lei);
+ popen_rd($cmd, $lei->{env}, { 0 => $in, 2 => $lei->{2} });
+}
+
+sub dup_src ($) {
+ my ($in) = @_;
+ open my $dup, '+>>&', $in or die "dup: $!";
+ $dup;
+}
+
+# --augment existing output destination, with deduplication
+sub _augment { # MboxReader eml_cb
+ my ($eml, $lei) = @_;
+ # ignore return value, just populate the skv
+ $lei->{dedupe}->is_dup($eml);
+}
+
+sub _mbox_write_cb ($$) {
+ my ($self, $lei) = @_;
+ my $ovv = $lei->{ovv};
+ my $m = 'eml2'.$ovv->{fmt};
+ my $eml2mbox = $self->can($m) or die "$self->$m missing";
+ my $out = $lei->{1} // die "no stdout ($m, $ovv->{dst})"; # redirected earlier
+ $out->autoflush(1);
+ my $write = $ovv->{lock_path} ? \&_print_full : \&atomic_append;
+ my $dedupe = $lei->{dedupe};
+ $dedupe->prepare_dedupe;
+ sub { # for git_to_mail
+ my ($buf, $smsg, $eml) = @_;
+ return unless $out;
+ $eml //= PublicInbox::Eml->new($buf);
+ if (!$dedupe->is_dup($eml, $smsg->{blob})) {
+ $buf = $eml2mbox->($eml, $smsg);
+ my $lk = $ovv->lock_for_scope;
+ eval { $write->($out, $buf) };
+ if ($@) {
+ die $@ if ref($@) ne 'PublicInbox::SIGPIPE';
+ undef $out
+ }
+ }
+ }
+}
+
+sub _maildir_each_file ($$;@) {
+ my ($dir, $cb, @arg) = @_;
+ for my $d (qw(new/ cur/)) {
+ my $pfx = $dir.$d;
+ opendir my $dh, $pfx or next;
+ while (defined(my $fn = readdir($dh))) {
+ $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/;
+ }
+ }
+}
+
+sub _augment_file { # _maildir_each_file cb
+ my ($f, $lei) = @_;
+ my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return;
+ _augment($eml, $lei);
+}
+
+# _maildir_each_file callback, \&CORE::unlink doesn't work with it
+sub _unlink { unlink($_[0]) }
+
+sub _rand () {
+ state $seq = 0;
+ sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq);
+}
+
+sub _buf2maildir {
+ my ($dst, $buf, $smsg) = @_;
+ my $kw = $smsg->{kw} // [];
+ my $sfx = join('', sort(map { $kw2char{$_} // () } @$kw));
+ my $rand = ''; # chosen by die roll :P
+ my ($tmp, $fh, $final);
+ my $common = $smsg->{blob} // _rand;
+ if (defined(my $pct = $smsg->{pct})) { $common .= "=$pct" }
+ do {
+ $tmp = $dst.'tmp/'.$rand.$common;
+ } while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY) &&
+ $! == EEXIST && ($rand = _rand.','));
+ if (print $fh $$buf and close($fh)) {
+ # ignore new/ and write only to cur/, otherwise MUAs
+ # with R/W access to the Maildir will end up doing
+ # a mass rename which can take a while with thousands
+ # of messages.
+ $dst .= 'cur/';
+ $rand = '';
+ do {
+ $final = $dst.$rand.$common.':2,'.$sfx;
+ } while (!link($tmp, $final) && $! == EEXIST &&
+ ($rand = _rand.','));
+ unlink($tmp) or warn "W: failed to unlink $tmp: $!\n";
+ } else {
+ my $err = $!;
+ unlink($tmp);
+ die "Error writing $smsg->{blob} to $dst: $err";
+ }
+}
+
+sub _maildir_write_cb ($$) {
+ my ($self, $lei) = @_;
+ my $dedupe = $lei->{dedupe};
+ $dedupe->prepare_dedupe;
+ my $dst = $lei->{ovv}->{dst};
+ sub { # for git_to_mail
+ my ($buf, $smsg, $eml) = @_;
+ $buf //= \($eml->as_string);
+ return _buf2maildir($dst, $buf, $smsg) if !$dedupe;
+ $eml //= PublicInbox::Eml->new($$buf); # copy buf
+ return if $dedupe->is_dup($eml, $smsg->{blob});
+ undef $eml;
+ _buf2maildir($dst, $buf, $smsg);
+ }
+}
+
+sub write_cb { # returns a callback for git_to_mail
+ my ($self, $lei) = @_;
+ # _mbox_write_cb or _maildir_write_cb
+ my $m = "_$self->{base_type}_write_cb";
+ $self->$m($lei);
+}
+
+sub new {
+ my ($cls, $lei) = @_;
+ my $fmt = $lei->{ovv}->{fmt};
+ my $dst = $lei->{ovv}->{dst};
+ my $self = bless {}, $cls;
+ if ($fmt eq 'maildir') {
+ $self->{base_type} = 'maildir';
+ -e $dst && !-d _ and die
+ "$dst exists and is not a directory\n";
+ $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
+ } elsif (substr($fmt, 0, 4) eq 'mbox') {
+ (-d $dst || (-e _ && !-w _)) and die
+ "$dst exists and is not a writable file\n";
+ $self->can("eml2$fmt") or die "bad mbox --format=$fmt\n";
+ $self->{base_type} = 'mbox';
+ } else {
+ die "bad mail --format=$fmt\n";
+ }
+ $lei->{dedupe} = PublicInbox::LeiDedupe->new($lei);
+ $self;
+}
+
+sub _pre_augment_maildir {} # noop
+
+sub _do_augment_maildir {
+ my ($self, $lei) = @_;
+ my $dst = $lei->{ovv}->{dst};
+ if ($lei->{opt}->{augment}) {
+ my $dedupe = $lei->{dedupe};
+ if ($dedupe && $dedupe->prepare_dedupe) {
+ require PublicInbox::InboxWritable; # eml_from_path
+ _maildir_each_file($dst, \&_augment_file, $lei);
+ $dedupe->pause_dedupe;
+ }
+ } else { # clobber existing Maildir
+ _maildir_each_file($dst, \&_unlink);
+ }
+}
+
+sub _post_augment_maildir {
+ my ($self, $lei) = @_;
+ my $dst = $lei->{ovv}->{dst};
+ for my $x (qw(tmp new cur)) {
+ my $d = $dst.$x;
+ next if -d $d;
+ require File::Path;
+ File::Path::mkpath($d);
+ -d $d or die "$d is not a directory";
+ }
+}
+
+sub _pre_augment_mbox {
+ my ($self, $lei) = @_;
+ my $dst = $lei->{ovv}->{dst};
+ if ($dst ne '/dev/stdout') {
+ my $mode = -p $dst ? '>' : '+>>';
+ if (-f _ && !$lei->{opt}->{augment} and !unlink($dst)) {
+ $! == ENOENT or die "unlink($dst): $!";
+ }
+ open my $out, $mode, $dst or die "open($dst): $!";
+ $lei->{old_1} = $lei->{1};
+ $lei->{1} = $out;
+ }
+ # Perl does SEEK_END even with O_APPEND :<
+ $self->{seekable} = seek($lei->{1}, 0, SEEK_SET);
+ if (!$self->{seekable} && $! != ESPIPE && $dst ne '/dev/stdout') {
+ die "seek($dst): $!\n";
+ }
+ state $zsfx_allow = join('|', keys %zsfx2cmd);
+ ($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/) or return;
+ pipe(my ($r, $w)) or die "pipe: $!";
+ [ $r, $w ];
+}
+
+sub _do_augment_mbox {
+ my ($self, $lei) = @_;
+ return if !$lei->{opt}->{augment};
+ my $dedupe = $lei->{dedupe};
+ my $dst = $lei->{ovv}->{dst};
+ die "cannot augment $dst, not seekable\n" if !$self->{seekable};
+ my $out = $lei->{1};
+ if (-s $out && $dedupe && $dedupe->prepare_dedupe) {
+ my $zsfx = $self->{zsfx};
+ my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) :
+ dup_src($out);
+ my $fmt = $lei->{ovv}->{fmt};
+ require PublicInbox::MboxReader;
+ PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei);
+ }
+ # maybe some systems don't honor O_APPEND, Perl does this:
+ seek($out, 0, SEEK_END) or die "seek $dst: $!";
+ $dedupe->pause_dedupe if $dedupe;
+}
+
+sub pre_augment { # fast (1 disk seek), runs in main daemon
+ my ($self, $lei) = @_;
+ # _pre_augment_maildir, _pre_augment_mbox
+ my $m = "_pre_augment_$self->{base_type}";
+ $self->$m($lei);
+}
+
+sub do_augment { # slow, runs in wq worker
+ my ($self, $lei) = @_;
+ # _do_augment_maildir, _do_augment_mbox
+ my $m = "_do_augment_$self->{base_type}";
+ $self->$m($lei);
+}
+
+sub post_augment { # fast (spawn compressor or mkdir), runs in main daemon
+ my ($self, $lei, @args) = @_;
+ # _post_augment_maildir, _post_augment_mbox
+ my $m = "_post_augment_$self->{base_type}";
+ $self->$m($lei, @args);
+}
+
+sub write_mail { # via ->wq_do
+ my ($self, $git_dir, $smsg, $lei) = @_;
+ my $not_done = delete $self->{4}; # write end of {each_smsg_done}
+ my $wcb = $self->{wcb} //= do { # first message
+ my %sig = $lei->atfork_child_wq($self);
+ @SIG{keys %sig} = values %sig; # not local
+ $lei->{dedupe}->prepare_dedupe;
+ $self->write_cb($lei);
+ };
+ my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir);
+ $git->cat_async($smsg->{blob}, \&git_to_mail, [$wcb, $smsg, $not_done]);
+}
+
+sub ipc_atfork_prepare {
+ my ($self) = @_;
+ # FDs: (done_wr, stdout|mbox, stderr, 3: sock, 4: each_smsg_done_wr)
+ $self->SUPER::ipc_atfork_prepare; # PublicInbox::IPC
+}
+
+# We rely on OnDestroy to run this before ->DESTROY, since ->DESTROY
+# ordering is unstable at worker exit and may cause segfaults
+sub reap_gits {
+ my ($self) = @_;
+ delete $self->{wcb};
+ for my $git (delete @$self{grep(/\A$$\0/, keys %$self)}) {
+ $git->async_wait_all;
+ }
+}
+
+sub DESTROY { delete $_[0]->{wcb} }
+
+sub ipc_atfork_child { # runs after IPC::wq_worker_loop
+ my ($self) = @_;
+ $self->SUPER::ipc_atfork_child;
+ # reap_gits needs to run before $self->DESTROY,
+ # IPC.pm will ensure that.
+ PublicInbox::OnDestroy->new($$, \&reap_gits, $self);
+}
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Combine any combination of PublicInbox::Search,
+# PublicInbox::ExtSearch, and PublicInbox::LeiSearch objects
+# into one Xapian DB
+package PublicInbox::LeiXSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiSearch PublicInbox::IPC);
+use PublicInbox::DS qw(dwaitpid);
+use PublicInbox::OpPipe;
+use PublicInbox::Import;
+use File::Temp 0.19 (); # 0.19 for ->newdir
+use File::Spec ();
+use PublicInbox::Search qw(xap_terms);
+use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::MID qw(mids);
+
+sub new {
+ my ($class) = @_;
+ PublicInbox::Search::load_xapian();
+ bless {
+ qp_flags => $PublicInbox::Search::QP_FLAGS |
+ PublicInbox::Search::FLAG_PURE_NOT(),
+ }, $class
+}
+
+sub attach_external {
+ my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
+ my $desc = $ibxish->{inboxdir} // $ibxish->{topdir};
+ my $srch = $ibxish->search or
+ return warn("$desc not indexed for Xapian\n");
+ my @shards = $srch->xdb_shards_flat or
+ return warn("$desc has no Xapian shardsXapian\n");
+
+ if (delete $self->{xdb}) { # XXX: do we need this?
+ # clobber existing {xdb} if amending
+ my $expect = delete $self->{nshard};
+ my $shards = delete $self->{shards_flat};
+ scalar(@$shards) == $expect or die
+ "BUG: {nshard}$expect != shards=".scalar(@$shards);
+
+ my $prev = {};
+ for my $old_ibxish (@{$self->{shard2ibx}}) {
+ next if $prev == $old_ibxish;
+ $prev = $old_ibxish;
+ my @shards = $old_ibxish->search->xdb_shards_flat;
+ push @{$self->{shards_flat}}, @shards;
+ }
+ my $nr = scalar(@{$self->{shards_flat}});
+ $nr == $expect or die
+ "BUG: reloaded $nr shards, expected $expect"
+ }
+ push @{$self->{shards_flat}}, @shards;
+ push(@{$self->{shard2ibx}}, $ibxish) for (@shards);
+}
+
+# returns a list of local inboxes (or count in scalar context)
+sub locals { @{$_[0]->{locals} // []} }
+
+sub remotes { @{$_[0]->{remotes} // []} }
+
+# called by PublicInbox::Search::xdb
+sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} }
+
+# like over->get_art
+sub smsg_for {
+ my ($self, $mitem) = @_;
+ # cf. https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
+ my $nshard = $self->{nshard};
+ my $docid = $mitem->get_docid;
+ my $shard = ($docid - 1) % $nshard;
+ my $num = int(($docid - 1) / $nshard) + 1;
+ my $ibx = $self->{shard2ibx}->[$shard];
+ my $smsg = $ibx->over->get_art($num);
+ if (ref($ibx->can('msg_keywords'))) {
+ my $kw = xap_terms('K', $mitem->get_document);
+ $smsg->{kw} = [ sort keys %$kw ];
+ }
+ $smsg->{docid} = $docid;
+ $smsg;
+}
+
+sub recent {
+ my ($self, $qstr, $opt) = @_;
+ $opt //= {};
+ $opt->{relevance} //= -2;
+ $self->mset($qstr //= 'bytes:1..', $opt);
+}
+
+sub over {}
+
+sub _mset_more ($$) {
+ my ($mset, $mo) = @_;
+ my $size = $mset->size;
+ $size && (($mo->{offset} += $size) < ($mo->{limit} // 10000));
+}
+
+# $startq will EOF when query_prepare is done augmenting and allow
+# query_mset and query_thread_mset to proceed.
+sub wait_startq ($) {
+ my ($startq) = @_;
+ $_[0] = undef;
+ read($startq, my $query_prepare_done, 1);
+}
+
+sub query_thread_mset { # for --thread
+ my ($self, $lei, $ibxish) = @_;
+ local $0 = "$0 query_thread_mset";
+ my $startq = delete $self->{5};
+ my %sig = $lei->atfork_child_wq($self);
+ local @SIG{keys %sig} = values %sig;
+
+ my ($srch, $over) = ($ibxish->search, $ibxish->over);
+ unless ($srch && $over) {
+ my $desc = $ibxish->{inboxdir} // $ibxish->{topdir};
+ warn "$desc not indexed by Xapian\n";
+ return;
+ }
+ my $mo = { %{$lei->{mset_opt}} };
+ my $mset;
+ my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $ibxish);
+ do {
+ $mset = $srch->mset($mo->{qstr}, $mo);
+ my $ids = $srch->mset_to_artnums($mset, $mo);
+ my $ctx = { ids => $ids };
+ my $i = 0;
+ my %n2item = map { ($ids->[$i++], $_) } $mset->items;
+ while ($over->expand_thread($ctx)) {
+ for my $n (@{$ctx->{xids}}) {
+ my $smsg = $over->get_art($n) or next;
+ wait_startq($startq) if $startq;
+ my $mitem = delete $n2item{$smsg->{num}};
+ $each_smsg->($smsg, $mitem);
+ }
+ @{$ctx->{xids}} = ();
+ }
+ } while (_mset_more($mset, $mo));
+ undef $each_smsg; # drops @io for l2m->{each_smsg_done}
+ $lei->{ovv}->ovv_atexit_child($lei);
+}
+
+sub query_mset { # non-parallel for non-"--thread" users
+ my ($self, $lei) = @_;
+ local $0 = "$0 query_mset";
+ my $startq = delete $self->{5};
+ my %sig = $lei->atfork_child_wq($self);
+ local @SIG{keys %sig} = values %sig;
+ my $mo = { %{$lei->{mset_opt}} };
+ my $mset;
+ for my $loc (locals($self)) {
+ attach_external($self, $loc);
+ }
+ my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $self);
+ do {
+ $mset = $self->mset($mo->{qstr}, $mo);
+ for my $mitem ($mset->items) {
+ my $smsg = smsg_for($self, $mitem) or next;
+ wait_startq($startq) if $startq;
+ $each_smsg->($smsg, $mitem);
+ }
+ } while (_mset_more($mset, $mo));
+ undef $each_smsg; # drops @io for l2m->{each_smsg_done}
+ $lei->{ovv}->ovv_atexit_child($lei);
+}
+
+sub each_eml { # callback for MboxReader->mboxrd
+ my ($eml, $self, $lei, $each_smsg) = @_;
+ my $smsg = bless {}, 'PublicInbox::Smsg';
+ $smsg->populate($eml);
+ $smsg->parse_references($eml, mids($eml));
+ $smsg->{$_} //= '' for qw(from to cc ds subject references mid);
+ delete @$smsg{qw(From Subject -ds -ts)};
+ if (my $startq = delete($self->{5})) { wait_startq($startq) }
+ $each_smsg->($smsg, undef, $eml);
+}
+
+sub query_remote_mboxrd {
+ my ($self, $lei, $uris) = @_;
+ local $0 = "$0 query_remote_mboxrd";
+ my %sig = $lei->atfork_child_wq($self); # keep $self->{5} startq
+ local @SIG{keys %sig} = values %sig;
+ my ($opt, $env) = @$lei{qw(opt env)};
+ my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm');
+ push(@qform, t => 1) if $opt->{thread};
+ my @cmd = (qw(curl -sSf -d), '');
+ my $verbose = $opt->{verbose};
+ push @cmd, '-v' if $verbose;
+ for my $o ($lei->curl_opt) {
+ $o =~ s/\|[a-z0-9]\b//i; # remove single char short option
+ if ($o =~ s/=[is]@\z//) {
+ my $ary = $opt->{$o} or next;
+ push @cmd, map { ("--$o", $_) } @$ary;
+ } elsif ($o =~ s/=[is]\z//) {
+ my $val = $opt->{$o} // next;
+ push @cmd, "--$o", $val;
+ } elsif ($opt->{$o}) {
+ push @cmd, "--$o";
+ }
+ }
+ $opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
+ my $tor = $opt->{torsocks} //= 'auto';
+ my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
+ for my $uri (@$uris) {
+ $uri->query_form(@qform);
+ my $cmd = [ @cmd, $uri->as_string ];
+ if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
+ (($env->{LD_PRELOAD}//'') !~ /torsocks/)) {
+ unshift @$cmd, 'torsocks';
+ } elsif (PublicInbox::Config::git_bool($tor)) {
+ unshift @$cmd, 'torsocks';
+ }
+ $lei->err("# @$cmd") if $verbose;
+ $? = 0;
+ my $fh = popen_rd($cmd, $env, { 2 => $lei->{2} });
+ $fh = IO::Uncompress::Gunzip->new($fh);
+ eval {
+ PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self,
+ $lei, $each_smsg);
+ };
+ return $lei->fail("E: @$cmd: $@") if $@;
+ if (($? >> 8) == 22) { # HTTP 404 from curl(1)
+ $uri->query_form(q => $lei->{mset_opt}->{qstr});
+ $lei->err('# no results from '.$uri->as_string);
+ } elsif ($?) {
+ $uri->query_form(q => $lei->{mset_opt}->{qstr});
+ $lei->err('E: '.$uri->as_string);
+ $lei->child_error($?);
+ }
+ }
+ undef $each_smsg;
+ $lei->{ovv}->ovv_atexit_child($lei);
+}
+
+sub git {
+ my ($self) = @_;
+ my (%seen, @dirs);
+ my $tmp = File::Temp->newdir('lei_xsrch_git-XXXXXXXX', TMPDIR => 1);
+ for my $ibx (@{$self->{shard2ibx} // []}) {
+ my $d = File::Spec->canonpath($ibx->git->{git_dir});
+ $seen{$d} //= push @dirs, "$d/objects\n"
+ }
+ my $git_dir = $tmp->dirname;
+ PublicInbox::Import::init_bare($git_dir);
+ my $f = "$git_dir/objects/info/alternates";
+ open my $alt, '>', $f or die "open($f): $!";
+ print $alt @dirs or die "print $f: $!";
+ close $alt or die "close $f: $!";
+ my $git = PublicInbox::Git->new($git_dir);
+ $git->{-tmp} = $tmp;
+ $git;
+}
+
+sub query_done { # EOF callback
+ my ($lei) = @_;
+ my $has_l2m = exists $lei->{l2m};
+ for my $f (qw(lxs l2m)) {
+ my $wq = delete $lei->{$f} or next;
+ $wq->wq_wait_old;
+ }
+ $lei->{ovv}->ovv_end($lei);
+ if ($has_l2m) { # close() calls LeiToMail reap_compress
+ if (my $out = delete $lei->{old_1}) {
+ if (my $mbout = $lei->{1}) {
+ close($mbout) or return $lei->fail(<<"");
+Error closing $lei->{ovv}->{dst}: $!
+
+ }
+ $lei->{1} = $out;
+ }
+ $lei->start_mua;
+ }
+ $lei->dclose;
+}
+
+sub do_post_augment {
+ my ($lei, $zpipe, $au_done) = @_;
+ my $l2m = $lei->{l2m} or die 'BUG: no {l2m}';
+ eval { $l2m->post_augment($lei, $zpipe) };
+ if (my $err = $@) {
+ if (my $lxs = delete $lei->{lxs}) {
+ $lxs->wq_kill;
+ $lxs->wq_close;
+ }
+ $lei->fail("$err");
+ }
+ close $au_done; # triggers wait_startq
+}
+
+my $MAX_PER_HOST = 4;
+sub MAX_PER_HOST { $MAX_PER_HOST }
+
+sub concurrency {
+ my ($self, $opt) = @_;
+ my $nl = $opt->{thread} ? locals($self) : 1;
+ my $nr = remotes($self);
+ $nr = $MAX_PER_HOST if $nr > $MAX_PER_HOST;
+ $nl + $nr;
+}
+
+sub start_query { # always runs in main (lei-daemon) process
+ my ($self, $io, $lei) = @_;
+ if ($lei->{opt}->{thread}) {
+ for my $ibxish (locals($self)) {
+ $self->wq_do('query_thread_mset', $io, $lei, $ibxish);
+ }
+ } elsif (locals($self)) {
+ $self->wq_do('query_mset', $io, $lei);
+ }
+ my $i = 0;
+ my $q = [];
+ for my $uri (remotes($self)) {
+ push @{$q->[$i++ % $MAX_PER_HOST]}, $uri;
+ }
+ for my $uris (@$q) {
+ $self->wq_do('query_remote_mboxrd', $io, $lei, $uris);
+ }
+ @$io = ();
+}
+
+sub query_prepare { # called by wq_do
+ my ($self, $lei) = @_;
+ local $0 = "$0 query_prepare";
+ my %sig = $lei->atfork_child_wq($self);
+ -p $lei->{0} or die "BUG: \$done pipe expected";
+ local @SIG{keys %sig} = values %sig;
+ eval { $lei->{l2m}->do_augment($lei) };
+ $lei->fail($@) if $@;
+ syswrite($lei->{0}, '.') == 1 or die "do_post_augment trigger: $!";
+}
+
+sub sigpipe_handler { # handles SIGPIPE from l2m/lxs workers
+ my ($lei) = @_;
+ my $lxs = delete $lei->{lxs};
+ if ($lxs && $lxs->wq_kill_old) {
+ kill 'PIPE', $$;
+ $lxs->wq_wait_old;
+ }
+ close(delete $lei->{1}) if $lei->{1};
+}
+
+sub do_query {
+ my ($self, $lei_orig) = @_;
+ my ($lei, @io) = $lei_orig->atfork_parent_wq($self);
+ $io[0] = undef;
+ pipe(my $done, $io[0]) or die "pipe $!";
+ $lei_orig->{1}->autoflush(1);
+
+ $lei_orig->event_step_init; # wait for shutdowns
+ my $done_op = {
+ '' => [ \&query_done, $lei_orig ],
+ '!' => [ \&sigpipe_handler, $lei_orig ]
+ };
+ my $in_loop = exists $lei_orig->{sock};
+ $done = PublicInbox::OpPipe->new($done, $done_op, $in_loop);
+ my $l2m = $lei->{l2m};
+ if ($l2m) {
+ # may redirect $lei->{1} for mbox
+ my $zpipe = $l2m->pre_augment($lei_orig);
+ $io[1] = $lei_orig->{1};
+ pipe(my ($startq, $au_done)) or die "pipe: $!";
+ $done_op->{'.'} = [ \&do_post_augment, $lei_orig,
+ $zpipe, $au_done ];
+ local $io[4] = *STDERR{GLOB}; # don't send l2m->{-wq_s1}
+ die "BUG: unexpected \$io[5]: $io[5]" if $io[5];
+ $self->wq_do('query_prepare', \@io, $lei);
+ fcntl($startq, 1031, 4096) if $^O eq 'linux'; # F_SETPIPE_SZ
+ $io[5] = $startq;
+ $io[1] = $zpipe->[1] if $zpipe;
+ }
+ start_query($self, \@io, $lei);
+ $self->wq_close(1);
+ unless ($in_loop) {
+ # for the $lei->atfork_child_wq PIPE handler:
+ while ($done->{sock}) { $done->event_step }
+ }
+}
+
+sub ipc_atfork_prepare {
+ my ($self) = @_;
+ if (exists $self->{remotes}) {
+ require PublicInbox::MboxReader;
+ require IO::Uncompress::Gunzip;
+ }
+ # FDS: (0: done_wr, 1: stdout|mbox, 2: stderr,
+ # 3: sock, 4: $l2m->{-wq_s1}, 5: $startq)
+ $self->SUPER::ipc_atfork_prepare; # PublicInbox::IPC
+}
+
+sub prepare_external {
+ my ($self, $loc, $boost) = @_; # n.b. already ordered by boost
+ if (ref $loc) { # already a URI, or PublicInbox::Inbox-like object
+ return push(@{$self->{remotes}}, $loc) if $loc->can('scheme');
+ } elsif ($loc =~ m!\Ahttps?://!) {
+ require URI;
+ return push(@{$self->{remotes}}, URI->new($loc));
+ } elsif (-f "$loc/ei.lock") {
+ require PublicInbox::ExtSearch;
+ $loc = PublicInbox::ExtSearch->new($loc);
+ } elsif (-f "$loc/inbox.lock" || -d "$loc/public-inbox") {
+ require PublicInbox::Inbox; # v2, v1
+ $loc = bless { inboxdir => $loc }, 'PublicInbox::Inbox';
+ } else {
+ warn "W: ignoring $loc, unable to determine type\n";
+ return;
+ }
+ push @{$self->{locals}}, $loc;
+}
+
+
+1;
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# two-step linkification.
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used by -nntpd for listen sockets
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Base class for per-inbox locking
package PublicInbox::Lock;
use strict;
-use warnings;
+use v5.10.1;
use Fcntl qw(:flock :DEFAULT);
use Carp qw(croak);
+use PublicInbox::OnDestroy;
+use File::Temp ();
# we only acquire the flock if creating or reindexing;
# PublicInbox::Import already has the lock on its own.
close $lockfh or croak "close $lock_path failed: $!\n";
}
+# caller must use return value
+sub lock_for_scope {
+ my ($self, @single_pid) = @_;
+ lock_acquire($self) or return; # lock_path not set
+ PublicInbox::OnDestroy->new(@single_pid, \&lock_release, $self);
+}
+
+sub new_tmp {
+ my ($cls, $ident) = @_;
+ my $tmp = File::Temp->new("$ident.lock-XXXXXX", TMPDIR => 1);
+ bless { lock_path => $tmp->filename, tmp => $tmp }, $cls;
+}
+
1;
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# For the -mda script (mail delivery agent)
}
sub inboxes_for_list_id ($$) {
- my ($klass, $config, $simple) = @_;
+ my ($klass, $pi_cfg, $simple) = @_;
# newer Email::Simple allows header_raw, as does Email::MIME:
my @list_ids = $simple->can('header_raw') ?
my @dests;
for my $list_id (@list_ids) {
$list_id =~ /<[ \t]*(.+)?[ \t]*>/ or next;
- if (my $ibx = $config->lookup_list_id($1)) {
+ if (my $ibx = $pi_cfg->lookup_list_id($1)) {
push @dests, $ibx;
}
}
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Various Message-ID-related functions.
use warnings;
use base qw/Exporter/;
our @EXPORT_OK = qw(mid_clean id_compress mid2path mid_escape MID_ESC
- mids references mids_for_index $MID_EXTRACT);
+ mids references mids_for_index mids_in $MID_EXTRACT);
use URI::Escape qw(uri_escape_utf8);
use Digest::SHA qw/sha1_hex/;
require PublicInbox::Address;
uniq_mids(extract_mids(@mids));
}
+# for Resent-Message-ID and maybe others
+sub mids_in ($@) {
+ my ($eml, @headers) = @_;
+ uniq_mids(extract_mids(map { ($eml->header_raw($_)) } @headers));
+}
+
# we allow searching on X-Alt-Message-ID since PublicInbox::NNTP uses them
# to placate some clients, and we want to ensure NNTP-only clients can
# import and index without relying on HTTP endpoints
sub mids_for_index ($) {
- my ($hdr) = @_;
- my @mids = $hdr->header_raw('Message-ID');
- my @alts = $hdr->header_raw('X-Alt-Message-ID');
- uniq_mids(extract_mids(@mids, @alts));
+ mids_in($_[0], qw(Message-ID X-Alt-Message-ID));
}
# last References should be IRT, but some mail clients do things
warn "Message-ID: <$mid> too long, truncating\n";
$mid = substr($mid, 0, MAX_MID_SIZE);
}
- push(@ret, $mid) unless $seen->{$mid}++;
+ $seen->{$mid} //= push(@ret, $mid);
}
\@ret;
}
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# generates manifest.js.gz for grokmirror(1)
use strict;
use v5.10.1;
use parent qw(PublicInbox::WwwListing);
-use Digest::SHA ();
-use File::Spec ();
use bytes (); # length
-use PublicInbox::Inbox;
-use PublicInbox::Git;
+use PublicInbox::Config;
use IO::Compress::Gzip qw(gzip);
use HTTP::Date qw(time2str);
-*try_cat = \&PublicInbox::Inbox::try_cat;
-our $json;
-for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) {
- eval "require $mod" or next;
- # ->ascii encodes non-ASCII to "\uXXXX"
- $json = $mod->new->ascii(1) and last;
-}
+my $json = PublicInbox::Config::json();
# called by WwwListing
sub url_regexp {
$ctx->SUPER::url_regexp('publicInbox.grokManifest', 'match=domain');
}
-sub fingerprint ($) {
- my ($git) = @_;
- # TODO: convert to qspawn for fairness when there's
- # thousands of repos
- my ($fh, $pid) = $git->popen('show-ref');
- my $dig = Digest::SHA->new(1);
- while (read($fh, my $buf, 65536)) {
- $dig->add($buf);
- }
- close $fh;
- waitpid($pid, 0);
- return if $?; # empty, uninitialized git repo
- $dig->hexdigest;
+sub inject_entry ($$$;$) {
+ my ($ctx, $url_path, $ent, $git_dir) = @_;
+ $ctx->{-abs2urlpath}->{$git_dir // delete $ent->{git_dir}} = $url_path;
+ my $modified = $ent->{modified};
+ $ctx->{-mtime} = $modified if $modified > ($ctx->{-mtime} // 0);
+ $ctx->{manifest}->{$url_path} = $ent;
}
sub manifest_add ($$;$$) {
my ($ctx, $ibx, $epoch, $default_desc) = @_;
my $url_path = "/$ibx->{name}";
- my $git_dir = $ibx->{inboxdir};
+ my $git;
if (defined $epoch) {
- $git_dir .= "/git/$epoch.git";
$url_path .= "/git/$epoch.git";
+ $git = $ibx->git_epoch($epoch) or return;
+ } else {
+ $git = $ibx->git;
}
- return unless -d $git_dir;
- my $git = PublicInbox::Git->new($git_dir);
- my $fingerprint = fingerprint($git) or return; # no empty repos
-
- chomp(my $owner = $git->qx('config', 'gitweb.owner'));
- chomp(my $desc = try_cat("$git_dir/description"));
- utf8::decode($owner);
- utf8::decode($desc);
- $owner = undef if $owner eq '';
- $desc = 'Unnamed repository' if $desc eq '';
-
- # templates/hooks--update.sample and git-multimail in git.git
- # only match "Unnamed repository", not the full contents of
- # templates/this--description in git.git
- if ($desc =~ /\AUnnamed repository/) {
- $desc = "$default_desc [epoch $epoch]" if defined($epoch);
- }
-
- my $reference;
- chomp(my $alt = try_cat("$git_dir/objects/info/alternates"));
- if ($alt) {
- # n.b.: GitPython doesn't seem to handle comments or C-quoted
- # strings like native git does; and we don't for now, either.
- my @alt = split(/\n+/, $alt);
-
- # grokmirror only supports 1 alternate for "reference",
- if (scalar(@alt) == 1) {
- my $objdir = "$git_dir/objects";
- $reference = File::Spec->rel2abs($alt[0], $objdir);
- $reference =~ s!/[^/]+/?\z!!; # basename
- }
- }
- $ctx->{-abs2urlpath}->{$git_dir} = $url_path;
- my $modified = $git->modified;
- if ($modified > ($ctx->{-mtime} // 0)) {
- $ctx->{-mtime} = $modified;
- }
- $ctx->{manifest}->{$url_path} = {
- owner => $owner,
- reference => $reference,
- description => $desc,
- modified => $modified,
- fingerprint => $fingerprint,
- };
+ my $ent = $git->manifest_entry($epoch, $default_desc) or return;
+ inject_entry($ctx, $url_path, $ent, $git->{git_dir});
}
-sub ibx_entry {
+sub slow_manifest_add ($$) {
my ($ctx, $ibx) = @_;
eval {
if (defined(my $max = $ibx->max_git_epoch)) {
manifest_add($ctx, $ibx);
}
};
+}
+
+sub eidx_manifest_add ($$$) {
+ my ($ctx, $ALL, $ibx) = @_;
+ if (my $data = $ALL->misc->inbox_data($ibx)) {
+ $data = $json->decode($data);
+ delete $data->{''}; # private
+ while (my ($url_path, $ent) = each %$data) {
+ inject_entry($ctx, $url_path, $ent);
+ }
+ } else {
+ warn "E: `${\$ibx->eidx_key}' not indexed by $ALL->{topdir}\n";
+ }
+}
+
+sub ibx_entry {
+ my ($ctx, $ibx) = @_;
+ my $ALL = $ctx->{www}->{pi_cfg}->ALL;
+ if ($ALL) {
+ eidx_manifest_add($ctx, $ALL, $ibx);
+ } else {
+ slow_manifest_add($ctx, $ibx);
+ }
warn "E: $@" if $@;
}
sub per_inbox {
my ($ctx) = @_;
- ibx_entry($ctx, $ctx->{-inbox});
+ # only one inbox, slow is probably OK
+ slow_manifest_add($ctx, $ctx->{ibx});
psgi_triple($ctx);
}
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Streaming interface for mboxrd HTTP responses
sub getline {
my ($ctx) = @_; # ctx
my $smsg = $ctx->{smsg} or return;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $eml = $ibx->smsg_eml($smsg) or return;
my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
- $ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
+ $ctx->zmore(msg_hdr($ctx, $eml));
if ($n) {
$ctx->translate(msg_body($eml));
} else { # last message
my ($ctx, $eml) = @_;
my $smsg = delete $ctx->{smsg};
# next message
- $ctx->{smsg} = $ctx->{-inbox}->over->next_by_mid(@{$ctx->{next_arg}});
+ $ctx->{smsg} = $ctx->{ibx}->over->next_by_mid(@{$ctx->{next_arg}});
- $ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
+ $ctx->zmore(msg_hdr($ctx, $eml));
$ctx->{http_out}->write($ctx->translate(msg_body($eml)));
}
$fn =~ s/^re:\s+//i;
$fn = to_filename($fn) // 'no-subject';
my @hdr = ('Content-Type');
- if ($ctx->{-inbox}->{obfuscate}) {
+ if ($ctx->{ibx}->{obfuscate}) {
# obfuscation is stupid, but maybe scrapers are, too...
push @hdr, 'application/mbox';
$fn .= '.mbox';
# for rare cases where v1 inboxes aren't indexed w/ ->over at all
sub no_over_raw ($) {
my ($ctx) = @_;
- my $mref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return;
+ my $mref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return;
my $eml = PublicInbox::Eml->new($mref);
[ 200, res_hdr($ctx, $eml->header_str('Subject')),
- [ msg_hdr($ctx, $eml, $ctx->{mid}) . msg_body($eml) ] ]
+ [ msg_hdr($ctx, $eml) . msg_body($eml) ] ]
}
# /$INBOX/$MESSAGE_ID/raw
sub emit_raw {
my ($ctx) = @_;
- $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
- my $over = $ctx->{-inbox}->over or return no_over_raw($ctx);
+ $ctx->{base_url} = $ctx->{ibx}->base_url($ctx->{env});
+ my $over = $ctx->{ibx}->over or return no_over_raw($ctx);
my ($id, $prev);
my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return;
$ctx->psgi_response(200, $res_hdr);
}
-sub msg_hdr ($$;$) {
- my ($ctx, $eml, $mid) = @_;
+sub msg_hdr ($$) {
+ my ($ctx, $eml) = @_;
my $header_obj = $eml->header_obj;
# drop potentially confusing headers, ssoma already should've dropped
foreach my $d (qw(Lines Bytes Content-Length Status)) {
$header_obj->header_set($d);
}
- my $ibx = $ctx->{-inbox};
- my $base = $ctx->{base_url};
- $mid = $ctx->{mid} unless defined $mid;
- $mid = mid_escape($mid);
- my @append = (
- 'Archived-At', "<$base$mid/>",
- 'List-Archive', "<$base>",
- 'List-Post', "<mailto:$ibx->{-primary_address}>",
- );
my $crlf = $header_obj->crlf;
my $buf = $header_obj->as_string;
# fixup old bug from import (pre-a0c07cba0e5d8b6a)
$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
- $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf;
-
- for (my $i = 0; $i < @append; $i += 2) {
- my $k = $append[$i];
- my $v = $append[$i + 1];
- my @v = $header_obj->header_raw($k);
- foreach (@v) {
- if ($v eq $_) {
- $v = undef;
- last;
- }
- }
- $buf .= "$k: $v$crlf" if defined $v;
- }
- $buf .= $crlf;
+ "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf . $crlf;
}
sub msg_body ($) {
sub mbox_all_ids {
my ($ctx) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $prev = 0;
my $mm = $ctx->{mm} = $ibx->mm;
my $ids = $mm->ids_after(\$prev) or return
PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all');
}
+sub gone ($$) {
+ my ($ctx, $what) = @_;
+ warn "W: `$ctx->{ibx}->{inboxdir}' $what went away unexpectedly\n";
+ undef;
+}
+
sub results_cb {
my ($ctx) = @_;
- my $over = $ctx->{-inbox}->over or return;
+ my $over = $ctx->{ibx}->over or return gone($ctx, 'over');
while (1) {
while (defined(my $num = shift(@{$ctx->{ids}}))) {
my $smsg = $over->get_art($num) or next;
return $smsg;
}
# refill result set
- my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
+ my $srch = $ctx->{ibx}->isrch or return gone($ctx, 'search');
my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
my $size = $mset->size or return;
$ctx->{qopts}->{offset} += $size;
- $ctx->{ids} = $srch->mset_to_artnums($mset);
+ $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
}
}
sub results_thread_cb {
my ($ctx) = @_;
- my $over = $ctx->{-inbox}->over or return;
+ my $over = $ctx->{ibx}->over or return gone($ctx, 'over');
while (1) {
while (defined(my $num = shift(@{$ctx->{xids}}))) {
my $smsg = $over->get_art($num) or next;
next if $over->expand_thread($ctx);
# refill result set
- my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
+ my $srch = $ctx->{ibx}->isrch or return gone($ctx, 'search');
my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
my $size = $mset->size or return;
$ctx->{qopts}->{offset} += $size;
- $ctx->{ids} = $srch->mset_to_artnums($mset);
+ $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
}
}
my ($ctx, $q) = @_;
my $q_string = $q->{'q'};
return mbox_all_ids($ctx) if $q_string !~ /\S/;
- my $srch = $ctx->{-inbox}->search or
+ my $srch = $ctx->{ibx}->isrch or
return PublicInbox::WWW::need($ctx, 'Search');
- my $over = $ctx->{-inbox}->over or
+ my $over = $ctx->{ibx}->over or
return PublicInbox::WWW::need($ctx, 'Overview');
- my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid
+ my $qopts = $ctx->{qopts} = { relevance => -1 }; # ORDER BY docid ASC
$qopts->{thread} = 1 if $q->{t};
my $mset = $srch->mset($q_string, $qopts);
$qopts->{offset} = $mset->size or
return [404, [qw(Content-Type text/plain)],
["No results found\n"]];
$ctx->{query} = $q_string;
- $ctx->{ids} = $srch->mset_to_artnums($mset);
+ $ctx->{ids} = $srch->mset_to_artnums($mset, $qopts);
require PublicInbox::MboxGz;
my $fn;
if ($q->{t} && $srch->has_threadid) {
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::MboxGz;
use strict;
sub mbox_gz {
my ($self, $cb, $fn) = @_;
$self->{cb} = $cb;
- $self->{base_url} = $self->{-inbox}->base_url($self->{env});
+ $self->{base_url} = $self->{ibx}->base_url($self->{env});
$self->{gz} = PublicInbox::GzipFilter::gzip_or_die();
$fn = to_filename($fn // '') // 'no-subject';
# http://www.iana.org/assignments/media-types/application/gzip
my ($self) = @_;
my $cb = $self->{cb} or return;
while (my $smsg = $cb->($self)) {
- my $eml = $self->{-inbox}->smsg_eml($smsg) or next;
- $self->zmore(msg_hdr($self, $eml, $smsg->{mid}));
+ my $eml = $self->{ibx}->smsg_eml($smsg) or next;
+ $self->zmore(msg_hdr($self, $eml));
return $self->translate(msg_body($eml));
}
# signal that we're done and can return undef next call:
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# reader for mbox variants we support
+package PublicInbox::MboxReader;
+use strict;
+use v5.10.1;
+use Data::Dumper;
+$Data::Dumper::Useqq = 1; # should've been the default, for bad data
+
+my $from_strict =
+ qr/^From \S+ +\S+ \S+ +\S+ [^\n:]+:[^\n:]+:[^\n:]+ [^\n:]+\n/sm;
+
+sub _mbox_from {
+ my ($mbfh, $from_re, $eml_cb, @arg) = @_;
+ my $buf = '';
+ my @raw;
+ while (defined(my $r = read($mbfh, $buf, 65536, length($buf)))) {
+ if ($r == 0) { # close here to check for "curl --fail"
+ close($mbfh) or die "error closing mbox: \$?=$? $!";
+ @raw = ($buf);
+ } else {
+ @raw = split(/$from_strict/mos, $buf, -1);
+ next if scalar(@raw) == 0;
+ $buf = pop(@raw); # last bit may be incomplete
+ }
+ @raw = grep /[^ \t\r\n]/s, @raw; # skip empty messages
+ while (defined(my $raw = shift @raw)) {
+ $raw =~ s/\r?\n\z//s;
+ $raw =~ s/$from_re/$1/gms;
+ my $eml = PublicInbox::Eml->new(\$raw);
+ $eml_cb->($eml, @arg);
+ }
+ return if $r == 0; # EOF
+ }
+ die "error reading mboxo/mboxrd handle: $!";
+}
+
+sub mboxrd {
+ my (undef, $mbfh, $eml_cb, @arg) = @_;
+ _mbox_from($mbfh, qr/^>(>*From )/ms, $eml_cb, @arg);
+}
+
+sub mboxo {
+ my (undef, $mbfh, $eml_cb, @arg) = @_;
+ _mbox_from($mbfh, qr/^>(From )/ms, $eml_cb, @arg);
+}
+
+sub _cl_body {
+ my ($mbfh, $bref, $cl) = @_;
+ my $body = substr($$bref, 0, $cl, '');
+ my $need = $cl - length($body);
+ if ($need > 0) {
+ $mbfh or die "E: needed $need bytes after EOF";
+ defined(my $r = read($mbfh, $body, $need, length($body))) or
+ die "E: read error: $!\n";
+ $r == $need or die "E: read $r of $need bytes\n";
+ }
+ \$body;
+}
+
+sub _extract_hdr {
+ my ($ref) = @_;
+ if (index($$ref, "\r\n") < 0 && (my $pos = index($$ref, "\n\n")) >= 0) {
+ # likely on *nix
+ \substr($$ref, 0, $pos + 2, ''); # sv_chop on $$ref
+ } elsif ($$ref =~ /\r?\n\r?\n/s) {
+ \substr($$ref, 0, $+[0], ''); # sv_chop on $$ref
+ } else {
+ undef
+ }
+}
+
+sub _mbox_cl ($$$;@) {
+ my ($mbfh, $uxs_from, $eml_cb, @arg) = @_;
+ my $buf = '';
+ while (defined(my $r = read($mbfh, $buf, 65536, length($buf)))) {
+ if ($r == 0) { # detect "curl --fail"
+ close($mbfh) or
+ die "error closing mboxcl/mboxcl2: \$?=$? $!";
+ undef $mbfh;
+ }
+ while (my $hdr = _extract_hdr(\$buf)) {
+ $$hdr =~ s/\A[\r\n]*From [^\n]*\n//s or
+ die "E: no 'From ' line in:\n", Dumper($hdr);
+ my $eml = PublicInbox::Eml->new($hdr);
+ my @cl = $eml->header_raw('Content-Length');
+ my $n = scalar(@cl);
+ $n == 0 and die "E: Content-Length missing in:\n",
+ Dumper($eml->as_string);
+ $n == 1 or die "E: multiple ($n) Content-Length in:\n",
+ Dumper($eml->as_string);
+ $cl[0] =~ /\A[0-9]+\z/ or die
+ "E: Content-Length `$cl[0]' invalid\n",
+ Dumper($eml->as_string);
+ if (($eml->{bdy} = _cl_body($mbfh, \$buf, $cl[0]))) {
+ $uxs_from and
+ ${$eml->{bdy}} =~ s/^>From /From /sgm;
+ }
+ $eml_cb->($eml, @arg);
+ }
+ if ($r == 0) {
+ $buf =~ /[^ \r\n\t]/ and
+ warn "W: leftover at end of mboxcl/mboxcl2:\n",
+ Dumper(\$buf);
+ return;
+ }
+ }
+ die "error reading mboxcl/mboxcl2 handle: $!";
+}
+
+sub mboxcl {
+ my (undef, $mbfh, $eml_cb, @arg) = @_;
+ _mbox_cl($mbfh, 1, $eml_cb, @arg);
+}
+
+sub mboxcl2 {
+ my (undef, $mbfh, $eml_cb, @arg) = @_;
+ _mbox_cl($mbfh, undef, $eml_cb, @arg);
+}
+
+sub new { bless \(my $x), __PACKAGE__ }
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# like PublicInbox::SearchIdx, but for searching for non-mail messages.
+# Things indexed include:
+# * inboxes themselves
+# * epoch information
+# * (maybe) git code repository information
+# Expect ~100K-1M documents with no parallelism opportunities,
+# so no sharding, here.
+#
+# See MiscSearch for read-only counterpart
+package PublicInbox::MiscIdx;
+use strict;
+use v5.10.1;
+use PublicInbox::InboxWritable;
+use PublicInbox::Search; # for SWIG Xapian and Search::Xapian compat
+use PublicInbox::SearchIdx qw(index_text term_generator add_val);
+use PublicInbox::Spawn qw(nodatacow_dir);
+use Carp qw(croak);
+use File::Path ();
+use PublicInbox::MiscSearch;
+use PublicInbox::Config;
+my $json;
+
+sub new {
+ my ($class, $eidx) = @_;
+ PublicInbox::SearchIdx::load_xapian_writable();
+ my $mi_dir = "$eidx->{xpfx}/misc";
+ File::Path::mkpath($mi_dir);
+ nodatacow_dir($mi_dir);
+ my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN;
+ $flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync};
+ $json //= PublicInbox::Config::json();
+ bless {
+ mi_dir => $mi_dir,
+ flags => $flags,
+ indexlevel => 'full', # small DB, no point in medium?
+ }, $class;
+}
+
+sub begin_txn {
+ my ($self) = @_;
+ croak 'BUG: already in txn' if $self->{xdb}; # XXX make lazy?
+ my $wdb = $PublicInbox::Search::X{WritableDatabase};
+ my $xdb = eval { $wdb->new($self->{mi_dir}, $self->{flags}) };
+ croak "Failed opening $self->{mi_dir}: $@" if $@;
+ $self->{xdb} = $xdb;
+ $xdb->begin_transaction;
+}
+
+sub commit_txn {
+ my ($self) = @_;
+ croak 'BUG: not in txn' unless $self->{xdb}; # XXX make lazy?
+ delete($self->{xdb})->commit_transaction;
+}
+
+sub remove_eidx_key {
+ my ($self, $eidx_key) = @_;
+ my $xdb = $self->{xdb};
+ my $head = $xdb->postlist_begin('Q'.$eidx_key);
+ my $tail = $xdb->postlist_end('Q'.$eidx_key);
+ my @docids; # only one, unless we had bugs
+ for (; $head != $tail; $head++) {
+ push @docids, $head->get_docid;
+ }
+ for my $docid (@docids) {
+ $xdb->delete_document($docid);
+ warn "I: remove inbox docid #$docid ($eidx_key)\n";
+ }
+}
+
+# adds or updates according to $eidx_key
+sub index_ibx {
+ my ($self, $ibx) = @_;
+ my $eidx_key = $ibx->eidx_key;
+ my $xdb = $self->{xdb};
+ # Q = uniQue in Xapian terminology
+ my $head = $xdb->postlist_begin('Q'.$eidx_key);
+ my $tail = $xdb->postlist_end('Q'.$eidx_key);
+ my ($docid, @drop);
+ for (; $head != $tail; $head++) {
+ if (defined $docid) {
+ my $i = $head->get_docid;
+ push @drop, $i;
+ warn <<EOF;
+W: multiple inboxes keyed to `$eidx_key', deleting #$i
+EOF
+ } else {
+ $docid = $head->get_docid;
+ }
+ }
+ $xdb->delete_document($_) for @drop; # just in case
+
+ my $doc = $PublicInbox::Search::X{Document}->new;
+ term_generator($self)->set_document($doc);
+
+ # allow sorting by modified and uidvalidity (created at)
+ add_val($doc, $PublicInbox::MiscSearch::MODIFIED, $ibx->modified);
+ add_val($doc, $PublicInbox::MiscSearch::UIDVALIDITY, $ibx->uidvalidity);
+
+ $doc->add_boolean_term('Q'.$eidx_key); # uniQue id
+ $doc->add_boolean_term('T'.'inbox'); # Type
+
+ if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) {
+ $doc->add_boolean_term('T'.'newsgroup'); # additional Type
+ }
+
+ # force reread from disk, {description} could be loaded from {misc}
+ delete $ibx->{description};
+ my $desc = $ibx->description;
+
+ # description = S/Subject (or title)
+ # address = A/Author
+ index_text($self, $desc, 1, 'S');
+ index_text($self, $ibx->{name}, 1, 'XNAME');
+ my %map = (
+ address => 'A',
+ listid => 'XLISTID',
+ infourl => 'XINFOURL',
+ url => 'XURL'
+ );
+ while (my ($f, $pfx) = each %map) {
+ for my $v (@{$ibx->{$f} // []}) {
+ index_text($self, $v, 1, $pfx);
+ }
+ }
+ my $data = {};
+ if (defined(my $max = $ibx->max_git_epoch)) { # v2
+ my $pfx = "/$ibx->{name}/git/";
+ for my $epoch (0..$max) {
+ my $git = $ibx->git_epoch($epoch) or return;
+ if (my $ent = $git->manifest_entry($epoch, $desc)) {
+ $data->{"$pfx$epoch.git"} = $ent;
+ $ent->{git_dir} = $git->{git_dir};
+ }
+ $git->cleanup; # ->modified starts cat-file --batch
+ }
+ } elsif (my $ent = $ibx->git->manifest_entry) { # v1
+ $ent->{git_dir} = $ibx->{inboxdir};
+ $data->{"/$ibx->{name}"} = $ent;
+ }
+ $doc->set_data($json->encode($data));
+ if (defined $docid) {
+ $xdb->replace_document($docid, $doc);
+ } else {
+ $xdb->add_document($doc);
+ }
+}
+
+1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# read-only counterpart to MiscIdx
+package PublicInbox::MiscSearch;
+use strict;
+use v5.10.1;
+use PublicInbox::Search qw(retry_reopen int_val);
+my $json;
+
+# Xapian value columns:
+our $MODIFIED = 0;
+our $UIDVALIDITY = 1; # (created time)
+
+# avoid conflicting with message Search::prob_prefix for UI/UX reasons
+my %PROB_PREFIX = (
+ description => 'S', # $INBOX_DIR/description
+ address => 'A',
+ listid => 'XLISTID',
+ url => 'XURL',
+ infourl => 'XINFOURL',
+ name => 'XNAME',
+ '' => 'S A XLISTID XNAME XURL XINFOURL'
+);
+
+sub new {
+ my ($class, $dir) = @_;
+ PublicInbox::Search::load_xapian();
+ $json //= PublicInbox::Config::json();
+ bless {
+ xdb => $PublicInbox::Search::X{Database}->new($dir)
+ }, $class;
+}
+
+# read-only
+sub mi_qp_new ($) {
+ my ($self) = @_;
+ my $xdb = $self->{xdb};
+ my $qp = $PublicInbox::Search::X{QueryParser}->new;
+ $qp->set_default_op(PublicInbox::Search::OP_AND());
+ $qp->set_database($xdb);
+ $qp->set_stemmer(PublicInbox::Search::stemmer($self));
+ $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME());
+ my $cb = $qp->can('set_max_wildcard_expansion') //
+ $qp->can('set_max_expansion'); # Xapian 1.5.0+
+ $cb->($qp, 100);
+ $cb = $qp->can('add_valuerangeprocessor') //
+ $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
+ while (my ($name, $prefix) = each %PROB_PREFIX) {
+ $qp->add_prefix($name, $_) for split(/ /, $prefix);
+ }
+ $qp->add_boolean_prefix('type', 'T');
+ $qp;
+}
+
+sub misc_enquire_once { # retry_reopen callback
+ my ($self, $qr, $opt) = @_;
+ my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
+ $eq->set_query($qr);
+ my $desc = !$opt->{asc};
+ my $rel = $opt->{relevance} // 0;
+ if ($rel == -1) { # ORDER BY docid/UID
+ $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
+ $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
+ } elsif ($rel) {
+ $eq->set_sort_by_relevance_then_value($MODIFIED, $desc);
+ } else {
+ $eq->set_sort_by_value_then_relevance($MODIFIED, $desc);
+ }
+ $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200);
+}
+
+sub mset {
+ my ($self, $qs, $opt) = @_;
+ $opt ||= {};
+ reopen($self);
+ my $qp = $self->{qp} //= mi_qp_new($self);
+ $qs = 'type:inbox' if $qs eq '';
+ my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
+ $opt->{relevance} = 1 unless exists $opt->{relevance};
+ retry_reopen($self, \&misc_enquire_once, $qr, $opt);
+}
+
+sub ibx_matches_once { # retry_reopen callback
+ my ($self, $qr, $by_newsgroup) = @_;
+ # double in case no newsgroups are configured:
+ my $limit = scalar(keys %$by_newsgroup) * 2;
+ my $opt = { limit => $limit, offset => 0, relevance => -1 };
+ my $ret = {}; # newsgroup => $ibx of matches
+ while (1) {
+ my $mset = misc_enquire_once($self, $qr, $opt);
+ for my $mi ($mset->items) {
+ my $doc = $mi->get_document;
+ my $end = $doc->termlist_end;
+ my $cur = $doc->termlist_begin;
+ $cur->skip_to('Q');
+ if ($cur != $end) {
+ my $ng = $cur->get_termname; # eidx_key
+ $ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
+ if (my $ibx = $by_newsgroup->{$ng}) {
+ $ret->{$ng} = $ibx;
+ }
+ } else {
+ warn <<EOF;
+W: docid=${\$mi->get_docid} has no `Q' (eidx_key) term
+EOF
+ }
+ }
+ my $nr = $mset->size;
+ return $ret if $nr < $limit;
+ $opt->{offset} += $nr;
+ }
+}
+
+# returns a newsgroup => PublicInbox::Inbox mapping
+sub newsgroup_matches {
+ my ($self, $qs, $pi_cfg) = @_;
+ my $qp = $self->{qp} //= mi_qp_new($self);
+ $qs .= ' type:inbox';
+ my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
+ retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup});
+}
+
+sub ibx_data_once {
+ my ($self, $ibx) = @_;
+ my $xdb = $self->{xdb};
+ my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private
+ my $head = $xdb->postlist_begin($term);
+ my $tail = $xdb->postlist_end($term);
+ if ($head != $tail) {
+ my $doc = $xdb->get_document($head->get_docid);
+ $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
+ $ibx->{-modified} = int_val($doc, $MODIFIED);
+ $doc->get_data;
+ } else {
+ undef;
+ }
+}
+
+sub inbox_data {
+ my ($self, $ibx) = @_;
+ retry_reopen($self, \&ibx_data_once, $ibx);
+}
+
+sub ibx_cache_load {
+ my ($doc, $cache) = @_;
+ my $end = $doc->termlist_end;
+ my $cur = $doc->termlist_begin;
+ $cur->skip_to('Q');
+ return if $cur == $end;
+ my $eidx_key = $cur->get_termname;
+ $eidx_key =~ s/\AQ// or return; # expired
+ my $ce = $cache->{$eidx_key} = {};
+ $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
+ $ce->{-modified} = int_val($doc, $MODIFIED);
+ $ce->{description} = do {
+ # extract description from manifest.js.gz epoch description
+ my $d;
+ my $data = $json->decode($doc->get_data);
+ for (values %$data) {
+ $d = $_->{description} // next;
+ $d =~ s/ \[epoch [0-9]+\]\z// or next;
+ last;
+ }
+ $d;
+ }
+}
+
+sub _nntpd_cache_load { # retry_reopen callback
+ my ($self) = @_;
+ my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 };
+ my $mset = mset($self, 'type:newsgroup type:inbox', $opt);
+ my $cache = {};
+ for my $it ($mset->items) {
+ ibx_cache_load($it->get_document, $cache);
+ }
+ $cache
+}
+
+# returns { newsgroup => $cache_entry } mapping, $cache_entry contains
+# anything which may trigger seeks at startup, currently: description,
+# -modified, and uidvalidity.
+sub nntpd_cache_load {
+ my ($self) = @_;
+ retry_reopen($self, \&_nntpd_cache_load);
+}
+
+no warnings 'once';
+*reopen = \&PublicInbox::Search::reopen;
+
+1;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# read-only utilities for Email::MIME
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Various date/time-related functions
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# bidirectional Message-ID <-> Article Number mapping for the NNTP
create_tables($dbh);
$self->created_at(time) unless $self->created_at;
- my $max = $self->max // 0;
- $self->num_highwater($max);
+ $self->num_highwater(max($self));
$dbh->commit;
}
$self;
my $sth = $_[0]->{dbh}->prepare_cached('SELECT MAX(num) FROM msgmap',
undef, 1);
$sth->execute;
- $sth->fetchrow_array;
+ $sth->fetchrow_array // 0;
}
sub minmax {
my $sth = $_[0]->{dbh}->prepare_cached('SELECT MIN(num) FROM msgmap',
undef, 1);
$sth->execute;
- ($sth->fetchrow_array, max($_[0]));
+ ($sth->fetchrow_array // 0, max($_[0]));
}
sub mid_delete {
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Pure-perl class for Linux non-Inline::C users to disable COW for btrfs
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Each instance of this represents a NNTP client socket
# fields:
# nntpd: PublicInbox::NNTPD ref
# article: per-session current article number
-# ng: PublicInbox::Inbox ref
+# ibx: PublicInbox::Inbox ref
# long_cb: long_response private data
package PublicInbox::NNTP;
use strict;
use Digest::SHA qw(sha1_hex);
use Time::Local qw(timegm timelocal);
use PublicInbox::GitAsyncCat;
+use PublicInbox::Address;
+
use constant {
LINE_MAX => 512, # RFC 977 section 2.3
r501 => '501 command syntax error',
my $ONE_MSGID = qr/\A$MID_EXTRACT\z/;
my @OVERVIEW = qw(Subject From Date Message-ID References);
my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') .
- "Xref:full\r\n";
+ "Xref:full\r\n.";
my $LIST_HEADERS = join("\r\n", @OVERVIEW,
- qw(:bytes :lines Xref To Cc)) . "\r\n";
+ qw(:bytes :lines Xref To Cc)) . "\r\n.";
my $CAPABILITIES = <<"";
101 Capability list:\r
VERSION 2\r
err($self, 'error from: %s (%s)', $l, $err);
$res = '503 program fault - command not performed';
}
- return 0 unless defined $res;
- res($self, $res);
+ defined($res) ? res($self, $res) : 0;
}
# The keyword argument is not used (rfc3977 5.2.2)
sub cmd_mode ($$) {
my ($self, $arg) = @_;
- $arg = uc $arg;
- return r501 unless $arg eq 'READER';
- '201 Posting prohibited';
+ uc($arg) eq 'READER' ? '201 Posting prohibited' : r501;
}
sub cmd_slave ($) { '202 slave status noted' }
my ($self, $wildmat) = @_;
more($self, '282 list of groups and descriptions follows');
list_newsgroups($self, $wildmat);
- '.'
}
-sub list_overview_fmt ($) {
- my ($self) = @_;
- $self->msg_more($OVERVIEW_FMT);
-}
+sub list_overview_fmt ($) { $OVERVIEW_FMT }
-sub list_headers ($;$) {
- my ($self) = @_;
- $self->msg_more($LIST_HEADERS);
+sub list_headers ($;$) { $LIST_HEADERS }
+
+sub list_active_i { # "LIST ACTIVE" and also just "LIST" (no args)
+ my ($self, $groupnames) = @_;
+ my @window = splice(@$groupnames, 0, 100) or return 0;
+ my $ibx;
+ my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+ for my $ngname (@window) {
+ $ibx = $groups->{$ngname} and group_line($self, $ibx);
+ }
+ scalar(@$groupnames); # continue if there's more
}
-sub list_active ($;$) {
+sub list_active ($;$) { # called by cmd_list
my ($self, $wildmat) = @_;
wildmat2re($wildmat);
- foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
- $ng->{newsgroup} =~ $wildmat or next;
- group_line($self, $ng);
+ long_response($self, \&list_active_i, [
+ grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}) ]);
+}
+
+sub list_active_times_i {
+ my ($self, $groupnames) = @_;
+ my @window = splice(@$groupnames, 0, 100) or return 0;
+ my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+ for my $ngname (@window) {
+ my $ibx = $groups->{$ngname} or next;
+ my $c = eval { $ibx->uidvalidity } // time;
+ more($self, "$ngname $c <$ibx->{-primary_address}>");
}
+ scalar(@$groupnames); # continue if there's more
}
-sub list_active_times ($;$) {
+sub list_active_times ($;$) { # called by cmd_list
my ($self, $wildmat) = @_;
wildmat2re($wildmat);
- foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
- $ng->{newsgroup} =~ $wildmat or next;
- my $c = eval { $ng->mm->created_at } || time;
- more($self, "$ng->{newsgroup} $c $ng->{-primary_address}");
+ long_response($self, \&list_active_times_i, [
+ grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}) ]);
+}
+
+sub list_newsgroups_i {
+ my ($self, $groupnames) = @_;
+ my @window = splice(@$groupnames, 0, 100) or return 0;
+ my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+ my $ibx;
+ for my $ngname (@window) {
+ $ibx = $groups->{$ngname} and
+ more($self, "$ngname ".$ibx->description);
}
+ scalar(@$groupnames); # continue if there's more
}
-sub list_newsgroups ($;$) {
+sub list_newsgroups ($;$) { # called by cmd_list
my ($self, $wildmat) = @_;
wildmat2re($wildmat);
- foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
- $ng->{newsgroup} =~ $wildmat or next;
- my $d = $ng->description;
- more($self, "$ng->{newsgroup} $d");
- }
+ long_response($self, \&list_newsgroups_i, [
+ grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}) ]);
}
# LIST SUBSCRIPTIONS, DISTRIB.PATS are not supported
if (scalar @args) {
my $arg = shift @args;
$arg =~ tr/A-Z./a-z_/;
+ my $ret = $arg eq 'active';
$arg = "list_$arg";
$arg = $self->can($arg);
return r501 unless $arg && args_ok($arg, scalar @args);
$arg->($self, @args);
} else {
more($self, '215 list of newsgroups follows');
- foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
- group_line($self, $ng);
- }
+ long_response($self, \&list_active_i, [ # copy array
+ @{$self->{nntpd}->{groupnames}} ]);
}
- '.'
}
sub listgroup_range_i {
my ($self, $beg, $end) = @_;
- my $r = $self->{ng}->mm->msg_range($beg, $end, 'num');
+ my $r = $self->{ibx}->mm->msg_range($beg, $end, 'num');
scalar(@$r) or return;
- more($self, join("\r\n", map { $_->[0] } @$r));
+ $self->msg_more(join('', map { "$_->[0]\r\n" } @$r));
1;
}
sub listgroup_all_i {
my ($self, $num) = @_;
- my $ary = $self->{ng}->mm->ids_after($num);
+ my $ary = $self->{ibx}->mm->ids_after($num);
scalar(@$ary) or return;
more($self, join("\r\n", @$ary));
1;
return $res if ($res !~ /\A211 /);
more($self, $res);
}
- $self->{ng} or return '412 no newsgroup selected';
+ $self->{ibx} or return '412 no newsgroup selected';
if (defined $range) {
my $r = get_range($self, $range);
return $r unless ref $r;
}
sub group_line ($$) {
- my ($self, $ng) = @_;
- my ($min, $max) = $ng->mm->minmax;
- more($self, "$ng->{newsgroup} $max $min n") if defined $min && defined $max;
+ my ($self, $ibx) = @_;
+ my ($min, $max) = $ibx->mm->minmax;
+ more($self, "$ibx->{newsgroup} $max $min n");
+}
+
+sub newgroups_i {
+ my ($self, $ts, $i, $groupnames) = @_;
+ my $end = $$i + 100;
+ my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup};
+ while ($$i < $end) {
+ my $ngname = $groupnames->[$$i++] // return;
+ my $ibx = $groups->{$ngname} or next; # expired on reload
+ next unless (eval { $ibx->uidvalidity } // 0) > $ts;
+ group_line($self, $ibx);
+ }
+ 1;
}
sub cmd_newgroups ($$$;$$) {
# TODO dists
more($self, '231 list of new newsgroups follows');
- foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
- my $c = eval { $ng->mm->created_at } || 0;
- next unless $c > $ts;
- group_line($self, $ng);
- }
- '.'
+ long_response($self, \&newgroups_i, $ts, \(my $i = 0),
+ $self->{nntpd}->{groupnames});
}
sub wildmat2re (;$) {
}
sub newnews_i {
- my ($self, $overs, $ts, $prev) = @_;
- my $over = $overs->[0];
- my $msgs = $over->query_ts($ts, $$prev);
- if (scalar @$msgs) {
- more($self, '<' .
- join(">\r\n<", map { $_->{mid} } @$msgs ).
- '>');
- $$prev = $msgs->[-1]->{num};
- } else {
- shift @$overs;
- if (@$overs) { # continue onto next newsgroup
- $$prev = 0;
- return 1;
- } else { # break out of the long response.
- return;
+ my ($self, $names, $ts, $prev) = @_;
+ my $ngname = $names->[0];
+ if (my $ibx = $self->{nntpd}->{pi_cfg}->{-by_newsgroup}->{$ngname}) {
+ if (my $over = $ibx->over) {
+ my $msgs = $over->query_ts($ts, $$prev);
+ if (scalar @$msgs) {
+ $self->msg_more(join('', map {
+ "<$_->{mid}>\r\n";
+ } @$msgs));
+ $$prev = $msgs->[-1]->{num};
+ return 1; # continue on current group
+ }
}
}
+ shift @$names;
+ if (@$names) { # continue onto next newsgroup
+ $$prev = 0;
+ 1;
+ } else { # all done, break out of the long_response
+ undef;
+ }
}
sub cmd_newnews ($$$$;$$) {
my ($keep, $skip) = split('!', $newsgroups, 2);
ngpat2re($keep);
ngpat2re($skip);
- my @overs;
- foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
- $ng->{newsgroup} =~ $keep or next;
- $ng->{newsgroup} =~ $skip and next;
- my $over = $ng->over or next;
- push @overs, $over;
- };
- return '.' unless @overs;
-
+ my @names = grep(!/$skip/, grep(/$keep/,
+ @{$self->{nntpd}->{groupnames}}));
+ return '.' unless scalar(@names);
my $prev = 0;
- long_response($self, \&newnews_i, \@overs, $ts, \$prev);
+ long_response($self, \&newnews_i, \@names, $ts, \$prev);
}
sub cmd_group ($$) {
my ($self, $group) = @_;
- my $no_such = '411 no such news group';
my $nntpd = $self->{nntpd};
- my $ng = $nntpd->{groups}->{$group} or return $no_such;
+ my $ibx = $nntpd->{pi_cfg}->{-by_newsgroup}->{$group} or
+ return '411 no such news group';
$nntpd->idler_start;
- $self->{ng} = $ng;
- my ($min, $max) = $ng->mm->minmax;
- $min ||= 0;
- $max ||= 0;
+ $self->{ibx} = $ibx;
+ my ($min, $max) = $ibx->mm->minmax;
$self->{article} = $min;
my $est_size = $max - $min;
"211 $est_size $min $max $group";
sub article_adj ($$) {
my ($self, $off) = @_;
- my $ng = $self->{ng} or return '412 no newsgroup selected';
+ my $ibx = $self->{ibx} or return '412 no newsgroup selected';
my $n = $self->{article};
defined $n or return '420 no current article has been selected';
$n += $off;
- my $mid = $ng->mm->mid_for($n);
+ my $mid = $ibx->mm->mid_for($n);
unless ($mid) {
$n = $off > 0 ? 'next' : 'previous';
return "421 no $n article in this group";
# the single-point-of-failure a single server provides.
sub cmd_post ($) {
my ($self) = @_;
- my $ng = $self->{ng};
- $ng ? "440 mailto:$ng->{-primary_address} to post"
+ my $ibx = $self->{ibx};
+ $ibx ? "440 mailto:$ibx->{-primary_address} to post"
: '440 posting not allowed'
}
$hdr->header_set($k, @v, $v);
}
-sub xref ($$$$) {
- my ($self, $ng, $n, $mid) = @_;
- my $ret = $self->{nntpd}->{servername} . " $ng->{newsgroup}:$n";
+sub xref_by_tc ($$$) {
+ my ($xref, $pi_cfg, $smsg) = @_;
+ my $by_addr = $pi_cfg->{-by_addr};
+ my $mid = $smsg->{mid};
+ for my $f (qw(to cc)) {
+ my @ibxs = map {
+ $by_addr->{lc($_)} // ()
+ } (PublicInbox::Address::emails($smsg->{$f} // ''));
+ for my $ibx (@ibxs) {
+ my $ngname = $ibx->{newsgroup} // next;
+ next if defined $xref->{$ngname};
+ $xref->{$ngname} = eval { $ibx->mm->num_for($mid) };
+ }
+ }
+}
- # num_for is pretty cheap and sometimes we'll lookup the existence
- # of an article without getting even the OVER info. In other words,
- # I'm not sure if its worth optimizing by scanning To:/Cc: and
- # PublicInbox::ExtMsg on the PSGI end is just as expensive
- foreach my $other (@{$self->{nntpd}->{grouplist}}) {
- next if $ng eq $other;
- my $num = eval { $other->mm->num_for($mid) } or next;
- $ret .= " $other->{newsgroup}:$num";
+sub xref ($$$) {
+ my ($self, $cur_ibx, $smsg) = @_;
+ my $nntpd = $self->{nntpd};
+ my $cur_ng = $cur_ibx->{newsgroup};
+ my $xref;
+ if (my $ALL = $nntpd->{pi_cfg}->ALL) {
+ $xref = $ALL->nntp_xref_for($cur_ibx, $smsg);
+ xref_by_tc($xref, $nntpd->{pi_cfg}, $smsg);
+ } else { # slow path
+ $xref = { $cur_ng => $smsg->{num} };
+ my $mid = $smsg->{mid};
+ for my $ibx (values %{$nntpd->{pi_cfg}->{-by_newsgroup}}) {
+ next if defined($xref->{$ibx->{newsgroup}});
+ my $num = eval { $ibx->mm->num_for($mid) } // next;
+ $xref->{$ibx->{newsgroup}} = $num;
+ }
}
+ my $ret = "$nntpd->{servername} $cur_ng:".delete($xref->{$cur_ng});
+ $ret .= " $_:$xref->{$_}" for (sort keys %$xref);
$ret;
}
# clobber some existing headers
my $ibx = $smsg->{-ibx};
- my $xref = xref($smsg->{nntp}, $ibx, $smsg->{num}, $mid);
+ my $xref = xref($smsg->{nntp}, $ibx, $smsg);
$hdr->header_set('Xref', $xref);
# RFC 5536 3.1.4
# *something* here is required for leafnode, try to follow
# RFC 5536 3.1.5...
$hdr->header_set('Path', $server_name . '!not-for-mail');
-
- header_append($hdr, 'List-Post', "<mailto:$ibx->{-primary_address}>");
- if (my $url = $ibx->base_url) {
- $mid = mid_escape($mid);
- header_append($hdr, 'Archived-At', "<$url$mid/>");
- header_append($hdr, 'List-Archive', "<$url>");
- }
}
sub art_lookup ($$$) {
my ($self, $art, $code) = @_;
- my $ng = $self->{ng};
- my ($n, $mid);
+ my ($ibx, $n);
my $err;
if (defined $art) {
if ($art =~ /\A[0-9]+\z/) {
$err = '423 no such article number in this group';
$n = int($art);
- goto find_mid;
+ goto find_ibx;
} elsif ($art =~ $ONE_MSGID) {
- $mid = $1;
- $err = r430;
- $n = $ng->mm->num_for($mid) if $ng;
- goto found if defined $n;
- foreach my $g (values %{$self->{nntpd}->{groups}}) {
- $n = $g->mm->num_for($mid);
- if (defined $n) {
- $ng = $g;
- goto found;
- }
- }
- return $err;
+ ($ibx, $n) = mid_lookup($self, $1);
+ goto found if $ibx;
+ return r430;
} else {
return r501;
}
} else {
$err = '420 no current article has been selected';
- $n = $self->{article};
- defined $n or return $err;
-find_mid:
- $ng or return '412 no newsgroup has been selected';
- $mid = $ng->mm->mid_for($n);
- defined $mid or return $err;
+ $n = $self->{article} // return $err;
+find_ibx:
+ $ibx = $self->{ibx} or
+ return '412 no newsgroup has been selected';
}
found:
- my $smsg = $ng->over->get_art($n) or return $err;
- $smsg->{-ibx} = $ng;
+ my $smsg = $ibx->over->get_art($n) or return $err;
+ $smsg->{-ibx} = $ibx;
if ($code == 223) { # STAT
set_art($self, $n);
"223 $n <$smsg->{mid}> article retrieved - " .
$smsg->{nntp_code} = $code;
set_art($self, $art);
# this dereferences to `undef'
- ${git_async_cat($ng->git, $smsg->{blob}, \&blob_cb, $smsg)};
+ ${git_async_cat($ibx->git, $smsg->{blob}, \&blob_cb, $smsg)};
}
}
sub get_range ($$) {
my ($self, $range) = @_;
- my $ng = $self->{ng} or return '412 no news group has been selected';
+ my $ibx = $self->{ibx} or return '412 no news group has been selected';
defined $range or return '420 No article(s) selected';
my ($beg, $end);
- my ($min, $max) = $ng->mm->minmax;
+ my ($min, $max) = $ibx->mm->minmax;
if ($range =~ /\A([0-9]+)\z/) {
$beg = $end = $1;
} elsif ($range =~ /\A([0-9]+)-\z/) {
sub hdr_msgid_range_i {
my ($self, $beg, $end) = @_;
- my $r = $self->{ng}->mm->msg_range($beg, $end);
+ my $r = $self->{ibx}->mm->msg_range($beg, $end);
@$r or return;
- more($self, join("\r\n", map { "$_->[0] <$_->[1]>" } @$r));
+ $self->msg_more(join('', map { "$_->[0] <$_->[1]>\r\n" } @$r));
1;
}
my ($self, $xhdr, $range) = @_;
if (defined $range && $range =~ $ONE_MSGID) {
- my ($ng, $n) = mid_lookup($self, $1);
+ my ($ibx, $n) = mid_lookup($self, $1);
return r430 unless $n;
- hdr_mid_response($self, $xhdr, $ng, $n, $range, $range);
+ hdr_mid_response($self, $xhdr, $ibx, $n, $range, $range);
} else { # numeric range
$range = $self->{article} unless defined $range;
my $r = get_range($self, $range);
sub mid_lookup ($$) {
my ($self, $mid) = @_;
- my $self_ng = $self->{ng};
- if ($self_ng) {
- my $n = $self_ng->mm->num_for($mid);
- return ($self_ng, $n) if defined $n;
+ my $cur_ibx = $self->{ibx};
+ if ($cur_ibx) {
+ my $n = $cur_ibx->mm->num_for($mid);
+ return ($cur_ibx, $n) if defined $n;
}
- foreach my $ng (values %{$self->{nntpd}->{groups}}) {
- next if defined $self_ng && $ng eq $self_ng;
- my $n = $ng->mm->num_for($mid);
- return ($ng, $n) if defined $n;
+ my $pi_cfg = $self->{nntpd}->{pi_cfg};
+ if (my $ALL = $pi_cfg->ALL) {
+ my ($id, $prev);
+ while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) {
+ my $xr3 = $ALL->over->get_xref3($smsg->{num});
+ if (my @x = grep(/:$smsg->{blob}\z/, @$xr3)) {
+ my ($ngname, $xnum) = split(/:/, $x[0]);
+ my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname};
+ return ($ibx, $xnum) if $ibx;
+ # fall through to trying all xref3s
+ } else {
+ warn <<EOF;
+W: xref3 missing for <$mid> ($smsg->{blob}) in $ALL->{topdir}, -extindex bug?
+EOF
+ }
+ # try all xref3s
+ for my $x (@$xr3) {
+ my ($ngname, $xnum) = split(/:/, $x);
+ my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname};
+ return ($ibx, $xnum) if $ibx;
+ warn "W: `$ngname' does not exist for #$xnum\n";
+ }
+ }
+ # no warning here, $mid is just invalid
+ } else { # slow path for non-ALL users
+ for my $ibx (values %{$pi_cfg->{-by_newsgroup}}) {
+ next if defined $cur_ibx && $ibx eq $cur_ibx;
+ my $n = $ibx->mm->num_for($mid);
+ return ($ibx, $n) if defined $n;
+ }
}
(undef, undef);
}
sub xref_range_i {
my ($self, $beg, $end) = @_;
- my $ng = $self->{ng};
- my $r = $ng->mm->msg_range($beg, $end);
- @$r or return;
- more($self, join("\r\n", map {
- my $num = $_->[0];
- "$num ".xref($self, $ng, $num, $_->[1]);
- } @$r));
+ my $ibx = $self->{ibx};
+ my $msgs = $ibx->over->query_xover($$beg, $end);
+ scalar(@$msgs) or return;
+ $$beg = $msgs->[-1]->{num} + 1;
+ $self->msg_more(join('', map {
+ "$_->{num} ".xref($self, $ibx, $_) . "\r\n";
+ } @$msgs));
1;
}
if (defined $range && $range =~ $ONE_MSGID) {
my $mid = $1;
- my ($ng, $n) = mid_lookup($self, $mid);
+ my ($ibx, $n) = mid_lookup($self, $mid);
return r430 unless $n;
- hdr_mid_response($self, $xhdr, $ng, $n, $range,
- xref($self, $ng, $n, $mid));
+ my $smsg = $ibx->over->get_art($n) or return;
+ hdr_mid_response($self, $xhdr, $ibx, $n, $range,
+ xref($self, $ibx, $smsg));
} else { # numeric range
$range = $self->{article} unless defined $range;
my $r = get_range($self, $range);
sub smsg_range_i {
my ($self, $beg, $end, $field) = @_;
- my $over = $self->{ng}->over;
+ my $over = $self->{ibx}->over;
my $msgs = $over->query_xover($$beg, $end);
scalar(@$msgs) or return;
my $tmp = '';
sub hdr_smsg ($$$$) {
my ($self, $xhdr, $field, $range) = @_;
if (defined $range && $range =~ $ONE_MSGID) {
- my ($ng, $n) = mid_lookup($self, $1);
+ my ($ibx, $n) = mid_lookup($self, $1);
return r430 unless defined $n;
- my $v = over_header_for($ng->over, $n, $field);
- hdr_mid_response($self, $xhdr, $ng, $n, $range, $v);
+ my $v = over_header_for($ibx->over, $n, $field);
+ hdr_mid_response($self, $xhdr, $ibx, $n, $range, $v);
} else { # numeric range
$range = $self->{article} unless defined $range;
my $r = get_range($self, $range);
}
sub hdr_mid_prefix ($$$$$) {
- my ($self, $xhdr, $ng, $n, $mid) = @_;
+ my ($self, $xhdr, $ibx, $n, $mid) = @_;
return $mid if $xhdr;
# HDR for RFC 3977 users
- if (my $self_ng = $self->{ng}) {
- ($self_ng eq $ng) ? $n : '0';
+ if (my $cur_ibx = $self->{ibx}) {
+ ($cur_ibx eq $ibx) ? $n : '0';
} else {
'0';
}
}
sub hdr_mid_response ($$$$$$) {
- my ($self, $xhdr, $ng, $n, $mid, $v) = @_;
+ my ($self, $xhdr, $ibx, $n, $mid, $v) = @_;
my $res = '';
if ($xhdr) {
$res .= r221 . "\r\n";
$res .= "$mid $v\r\n";
} else {
$res .= r225 . "\r\n";
- my $pfx = hdr_mid_prefix($self, $xhdr, $ng, $n, $mid);
+ my $pfx = hdr_mid_prefix($self, $xhdr, $ibx, $n, $mid);
$res .= "$pfx $v\r\n";
}
res($self, $res .= '.');
sub xrover_i {
my ($self, $beg, $end) = @_;
- my $h = over_header_for($self->{ng}->over, $$beg, 'references');
+ my $h = over_header_for($self->{ibx}->over, $$beg, 'references');
more($self, "$$beg $h") if defined($h);
$$beg++ < $end;
}
sub cmd_xrover ($;$) {
my ($self, $range) = @_;
- my $ng = $self->{ng} or return '412 no newsgroup selected';
+ my $ibx = $self->{ibx} or return '412 no newsgroup selected';
(defined $range && $range =~ /[<>]/) and
return '420 No article(s) selected'; # no message IDs
long_response($self, \&xrover_i, @$r);
}
-sub over_line ($$$$) {
- my ($self, $ng, $num, $smsg) = @_;
+sub over_line ($$$) {
+ my ($self, $ibx, $smsg) = @_;
# n.b. field access and procedural calls can be
# 10%-15% faster than OO method calls:
- my $s = join("\t", $num,
+ my $s = join("\t", $smsg->{num},
$smsg->{subject},
$smsg->{from},
PublicInbox::Smsg::date($smsg),
$smsg->{references},
$smsg->{bytes},
$smsg->{lines},
- "Xref: " . xref($self, $ng, $num, $smsg->{mid}));
+ "Xref: " . xref($self, $ibx, $smsg));
utf8::encode($s);
- $s
+ $s .= "\r\n";
}
sub cmd_over ($;$) {
my ($self, $range) = @_;
if ($range && $range =~ $ONE_MSGID) {
- my ($ng, $n) = mid_lookup($self, $1);
+ my ($ibx, $n) = mid_lookup($self, $1);
defined $n or return r430;
- my $smsg = $ng->over->get_art($n) or return r430;
+ my $smsg = $ibx->over->get_art($n) or return r430;
more($self, '224 Overview information follows (multi-line)');
# Only set article number column if it's the current group
- my $self_ng = $self->{ng};
- $n = 0 if (!$self_ng || $self_ng ne $ng);
- more($self, over_line($self, $ng, $n, $smsg));
+ # (RFC 3977 8.3.2)
+ my $cur_ibx = $self->{ibx};
+ if (!$cur_ibx || $cur_ibx ne $ibx) {
+ # set {-orig_num} for nntp_xref_for
+ $smsg->{-orig_num} = $smsg->{num};
+ $smsg->{num} = 0;
+ }
+ $self->msg_more(over_line($self, $ibx, $smsg));
'.';
} else {
cmd_xover($self, $range);
sub xover_i {
my ($self, $beg, $end) = @_;
- my $ng = $self->{ng};
- my $msgs = $ng->over->query_xover($$beg, $end);
+ my $ibx = $self->{ibx};
+ my $msgs = $ibx->over->query_xover($$beg, $end);
my $nr = scalar @$msgs or return;
# OVERVIEW.FMT
- more($self, join("\r\n", map {
- over_line($self, $ng, $_->{num}, $_);
+ $self->msg_more(join('', map {
+ over_line($self, $ibx, $_);
} @$msgs));
$$beg = $msgs->[-1]->{num} + 1;
}
return r501 unless $mid =~ $ONE_MSGID;
$mid = $1;
my @paths;
- foreach my $ng (values %{$self->{nntpd}->{groups}}) {
- my $n = $ng->mm->num_for($mid);
- push @paths, "$ng->{newsgroup}/$n" if defined $n;
+ my $pi_cfg = $self->{nntpd}->{pi_cfg};
+ my $groups = $pi_cfg->{-by_newsgroup};
+ if (my $ALL = $pi_cfg->ALL) {
+ my ($id, $prev, %seen);
+ while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) {
+ my $xr3 = $ALL->over->get_xref3($smsg->{num});
+ for my $x (@$xr3) {
+ my ($ngname, $n) = split(/:/, $x);
+ $x = "$ngname/$n";
+ if ($groups->{$ngname} && !$seen{$x}++) {
+ push(@paths, $x);
+ }
+ }
+ }
+ } else { # slow path, no point in using long_response
+ for my $ibx (values %$groups) {
+ my $n = $ibx->mm->num_for($mid) // next;
+ push @paths, "$ibx->{newsgroup}/$n";
+ }
}
return '430 no such article on server' unless @paths;
- '223 '.join(' ', @paths);
+ '223 '.join(' ', sort(@paths));
}
sub res ($$) { do_write($_[0], $_[1] . "\r\n") }
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# represents an NNTPD (currently a singleton),
sub new {
my ($class) = @_;
- my $pi_config = PublicInbox::Config->new;
- my $name = $pi_config->{'publicinbox.nntpserver'};
+ my $pi_cfg = PublicInbox::Config->new;
+ my $name = $pi_cfg->{'publicinbox.nntpserver'};
if (!defined($name) or $name eq '') {
$name = hostname;
} elsif (ref($name) eq 'ARRAY') {
groups => {},
err => \*STDERR,
out => \*STDOUT,
- grouplist => [],
- pi_config => $pi_config,
+ pi_cfg => $pi_cfg,
servername => $name,
greet => \"201 $name ready - post via email\r\n",
# accept_tls => { SSL_server => 1, ..., SSL_reuse_ctx => ... }
sub refresh_groups {
my ($self, $sig) = @_;
- my $pi_config = $sig ? PublicInbox::Config->new : $self->{pi_config};
- my $new = {};
- my @list;
- $pi_config->each_inbox(sub {
- my ($ng) = @_;
- my $ngname = $ng->{newsgroup} or return;
- if (ref $ngname) {
- warn 'multiple newsgroups not supported: '.
- join(', ', @$ngname). "\n";
- # Newsgroup name needs to be compatible with RFC 3977
- # wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR.
- # Leave out a few chars likely to cause problems or conflicts:
- # '|', '<', '>', ';', '#', '$', '&',
- } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]!) {
- warn "newsgroup name invalid: `$ngname'\n";
- } elsif ($ng->nntp_usable) {
- # Only valid if msgmap and search works
- $new->{$ngname} = $ng;
- push @list, $ng;
-
+ my $pi_cfg = $sig ? PublicInbox::Config->new : $self->{pi_cfg};
+ my $groups = $pi_cfg->{-by_newsgroup}; # filled during each_inbox
+ my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {};
+ $pi_cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ my $ngname = $ibx->{newsgroup} // return;
+ my $ce = $cache->{$ngname};
+ if (($ce and (%$ibx = (%$ibx, %$ce))) || $ibx->nntp_usable) {
+ # only valid if msgmap and over works
# preload to avoid fragmentation:
- $ng->description;
- $ng->base_url;
+ $ibx->description;
+ $ibx->base_url;
+ } else {
+ delete $groups->{$ngname};
+ delete $ibx->{newsgroup};
+ # Note: don't be tempted to delete more for memory
+ # savings just yet: NNTP, IMAP, and WWW may all
+ # run in the same process someday.
}
});
- @list = sort { $a->{newsgroup} cmp $b->{newsgroup} } @list;
- $self->{grouplist} = \@list;
- $self->{pi_config} = $pi_config;
+ $self->{groupnames} = [ sort(keys %$groups) ];
# this will destroy old groups that got deleted
- %{$self->{groups}} = %$new;
+ $self->{pi_cfg} = $pi_cfg;
}
sub idler_start {
- $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_config});
+ $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_cfg});
}
1;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# RFC 8054 NNTP COMPRESS DEFLATE implementation
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Plack app redirector for mapping /$NEWSGROUP requests to
use PublicInbox::Hval qw(prurl);
sub new {
- my ($class, $pi_config) = @_;
- $pi_config ||= PublicInbox::Config->new;
- bless { pi_config => $pi_config }, $class;
+ my ($class, $pi_cfg) = @_;
+ bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
}
sub redirect ($$) {
# /inbox.foo.bar/123456
my (undef, @parts) = split(m!/!, $env->{PATH_INFO});
my ($ng, $article) = @parts;
- my $pi_config = $self->{pi_config};
- if (my $ibx = $pi_config->lookup_newsgroup($ng)) {
+ my $pi_cfg = $self->{pi_cfg};
+ if (my $ibx = $pi_cfg->lookup_newsgroup($ng)) {
my $url = prurl($env, $ibx->{url});
my $code = 301;
if (defined $article && $article =~ /\A[0-9]+\z/) {
return redirect($code, $url);
}
- my $res;
my @try = (join('/', @parts));
# trailing slash is in the rest of our WWW, so maybe some users
pop @parts;
push @try, join('/', @parts);
}
-
- foreach my $mid (@try) {
- my $arg = [ $mid ];
- $pi_config->each_inbox(\&try_inbox, $arg);
- defined($res = $arg->[1]) and last;
+ my $ALL = $pi_cfg->ALL;
+ if (my $over = $ALL ? $ALL->over : undef) {
+ my $by_eidx_key = $pi_cfg->{-by_eidx_key};
+ for my $mid (@try) {
+ my ($id, $prev);
+ while (my $x = $over->next_by_mid($mid, \$id, \$prev)) {
+ my $xr3 = $over->get_xref3($x->{num});
+ for (@$xr3) {
+ s/:[0-9]+:$x->{blob}\z// or next;
+ my $ibx = $by_eidx_key->{$_} // next;
+ my $url = $ibx->base_url or next;
+ $url .= mid_escape($mid) . '/';
+ return redirect(302, $url);
+ }
+ }
+ }
+ } else { # slow path, scan every inbox
+ for my $mid (@try) {
+ my $arg = [ $mid ]; # [1] => result
+ $pi_cfg->each_inbox(\&try_inbox, $arg);
+ return $arg->[1] if $arg->[1];
+ }
}
- $res || [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
+ [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
}
1;
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::OnDestroy;
+
+sub new {
+ shift; # ($class, $cb, @args)
+ bless [ @_ ], __PACKAGE__;
+}
+
+sub DESTROY {
+ my ($cb, @args) = @{$_[0]};
+ if (!ref($cb)) {
+ my $pid = $cb;
+ return if $pid != $$;
+ $cb = shift @args;
+ }
+ $cb->(@args) if $cb;
+}
+
+1;
--- /dev/null
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# bytecode dispatch pipe, reads a byte, runs a sub
+# byte => [ sub, @operands ]
+package PublicInbox::OpPipe;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::DS);
+use PublicInbox::Syscall qw(EPOLLIN);
+
+sub new {
+ my ($cls, $rd, $op_map, $in_loop) = @_;
+ my $self = bless { sock => $rd, op_map => $op_map }, $cls;
+ # 1031: F_SETPIPE_SZ, 4096: page size
+ fcntl($rd, 1031, 4096) if $^O eq 'linux';
+ if ($in_loop) { # iff using DS->EventLoop
+ $rd->blocking(0);
+ $self->SUPER::new($rd, EPOLLIN);
+ }
+ $self;
+}
+
+sub event_step {
+ my ($self) = @_;
+ my $rd = $self->{sock};
+ my $byte;
+ until (defined(sysread($rd, $byte, 1))) {
+ return if $!{EAGAIN};
+ next if $!{EINTR};
+ die "read \$rd: $!";
+ }
+ my $op = $self->{op_map}->{$byte} or die "BUG: unknown byte `$byte'";
+ if ($byte eq '') { # close on EOF
+ $rd->blocking ? delete($self->{sock}) : $self->close;
+ }
+ my ($sub, @args) = @$op;
+ $sub->(@args);
+}
+
+1;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI
$smsg ? load_from_row($smsg) : undef;
}
+sub get_xref3 {
+ my ($self, $num, $raw) = @_;
+ my $dbh = dbh($self);
+ my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT ibx_id,xnum,oidbin FROM xref3 WHERE docid = ? ORDER BY ibx_id,xnum ASC
+
+ $sth->execute($num);
+ my $rows = $sth->fetchall_arrayref;
+ return $rows if $raw;
+ my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT eidx_key FROM inboxes WHERE ibx_id = ?
+
+ [ map {
+ my $r = $_;
+ $eidx_key_sth->execute($r->[0]);
+ my $eidx_key = $eidx_key_sth->fetchrow_array;
+ $eidx_key //= "missing://ibx_id=$r->[0]";
+ "$eidx_key:$r->[1]:".unpack('H*', $r->[2]);
+ } @$rows ];
+}
+
sub next_by_mid {
my ($self, $mid, $id, $prev) = @_;
my $dbh = dbh($self);
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI
}
}
+sub ibx_id {
+ my ($self, $eidx_key) = @_;
+ id_for($self, 'inboxes', 'ibx_id', eidx_key => $eidx_key);
+}
+
sub sid {
my ($self, $path) = @_;
return unless defined $path && $path ne '';
$tid;
}
-sub parse_references ($$$) {
- my ($smsg, $hdr, $mids) = @_;
- my $refs = references($hdr);
- push(@$refs, @$mids) if scalar(@$mids) > 1;
- return $refs if scalar(@$refs) == 0;
-
- # prevent circular references here:
- my %seen = ( $smsg->{mid} => 1 );
- my @keep;
- foreach my $ref (@$refs) {
- if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
- warn "References: <$ref> too long, ignoring\n";
- next;
- }
- push(@keep, $ref) unless $seen{$ref}++;
- }
- $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
- \@keep;
-}
-
# normalize subjects so they are suitable as pathnames for URLs
# XXX: consider for removal
sub subject_path ($) {
lc($subj);
}
+sub ddd_for ($) {
+ my ($smsg) = @_;
+ my $dd = $smsg->to_doc_data;
+ utf8::encode($dd);
+ compress($dd);
+}
+
sub add_overview {
my ($self, $eml, $smsg) = @_;
$smsg->{lines} = $eml->body_raw =~ tr!\n!\n!;
my $mids = mids_for_index($eml);
- my $refs = parse_references($smsg, $eml, $mids);
+ my $refs = $smsg->parse_references($eml, $mids);
+ $mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid};
+ $smsg->{mid} //= '';
my $subj = $smsg->{subject};
my $xpath;
if ($subj ne '') {
$xpath = subject_path($subj);
$xpath = id_compress($xpath);
}
- my $dd = $smsg->to_doc_data;
- utf8::encode($dd);
- $dd = compress($dd);
- add_over($self, $smsg, $mids, $refs, $xpath, $dd);
+ add_over($self, $smsg, $mids, $refs, $xpath, ddd_for($smsg));
}
sub _add_over {
$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS over (
- num INTEGER NOT NULL, /* NNTP article number == IMAP UID */
+ num INTEGER PRIMARY KEY NOT NULL, /* NNTP article number == IMAP UID */
tid INTEGER NOT NULL, /* THREADID (IMAP REFERENCES threading, JMAP) */
sid INTEGER, /* Subject ID (IMAP ORDEREDSUBJECT "threading") */
ts INTEGER, /* IMAP INTERNALDATE (Received: header, git commit time) */
ds INTEGER, /* RFC-2822 sent Date: header, git author time */
- ddd VARBINARY, /* doc-data-deflated (->to_doc_data, ->load_from_data) */
- UNIQUE (num)
+ ddd VARBINARY /* doc-data-deflated (->to_doc_data, ->load_from_data) */
)
$dbh->do('CREATE INDEX IF NOT EXISTS idx_tid ON over (tid)');
sub create {
my ($self) = @_;
- unless (-r $self->{filename}) {
+ my $fn = $self->{filename} // do {
+ Carp::confess('BUG: no {filename}') unless $self->{dbh};
+ return;
+ };
+ unless (-r $fn) {
require File::Path;
require File::Basename;
- File::Path::mkpath(File::Basename::dirname($self->{filename}));
+ File::Path::mkpath(File::Basename::dirname($fn));
}
# create the DB:
PublicInbox::Over::dbh($self);
$pr->("I: rethread culled $total ghosts\n") if $pr && $total;
}
+# used for cross-inbox search
+sub eidx_prep ($) {
+ my ($self) = @_;
+ $self->{-eidx_prep} //= do {
+ my $dbh = $self->dbh;
+ $dbh->do(<<'');
+INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid')
+
+ $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS inboxes (
+ ibx_id INTEGER PRIMARY KEY AUTOINCREMENT,
+ eidx_key VARCHAR(255) NOT NULL, /* {newsgroup} // {inboxdir} */
+ UNIQUE (eidx_key)
+)
+
+ $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS xref3 (
+ docid INTEGER NOT NULL, /* <=> over.num */
+ ibx_id INTEGER NOT NULL, /* <=> inboxes.ibx_id */
+ xnum INTEGER NOT NULL, /* NNTP article number in ibx */
+ oidbin VARBINARY NOT NULL, /* 20-byte SHA-1 or 32-byte SHA-256 */
+ UNIQUE (docid, ibx_id, xnum, oidbin)
+)
+
+ $dbh->do('CREATE INDEX IF NOT EXISTS idx_docid ON xref3 (docid)');
+
+ # performance critical, this is not UNIQUE since we may need to
+ # tolerate some old bugs from indexing mirrors
+ $dbh->do('CREATE INDEX IF NOT EXISTS idx_nntp ON '.
+ 'xref3 (oidbin,xnum,ibx_id)');
+
+ $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS eidx_meta (
+ key VARCHAR(255) PRIMARY KEY,
+ val VARCHAR(255) NOT NULL
+)
+
+ # A queue of current docids which need reindexing.
+ # eidxq persists across aborted -extindex invocations
+ # Currently used for "-extindex --reindex" for Xapian
+ # data, but may be used in more places down the line.
+ $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS eidxq (docid INTEGER PRIMARY KEY NOT NULL)
+
+ 1;
+ };
+}
+
+sub eidx_meta { # requires transaction
+ my ($self, $key, $val) = @_;
+
+ my $sql = 'SELECT val FROM eidx_meta WHERE key = ? LIMIT 1';
+ my $dbh = $self->{dbh};
+ defined($val) or return $dbh->selectrow_array($sql, undef, $key);
+
+ my $prev = $dbh->selectrow_array($sql, undef, $key);
+ if (defined $prev) {
+ $sql = 'UPDATE eidx_meta SET val = ? WHERE key = ?';
+ $dbh->do($sql, undef, $val, $key);
+ } else {
+ $sql = 'INSERT INTO eidx_meta (key,val) VALUES (?,?)';
+ $dbh->do($sql, undef, $key, $val);
+ }
+ $prev;
+}
+
+sub eidx_max {
+ my ($self) = @_;
+ get_counter($self->{dbh}, 'eidx_docid');
+}
+
+sub add_xref3 {
+ my ($self, $docid, $xnum, $oidhex, $eidx_key) = @_;
+ begin_lazy($self);
+ my $ibx_id = ibx_id($self, $eidx_key);
+ my $oidbin = pack('H*', $oidhex);
+ my $sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?)
+
+ $sth->bind_param(1, $docid);
+ $sth->bind_param(2, $ibx_id);
+ $sth->bind_param(3, $xnum);
+ $sth->bind_param(4, $oidbin, SQL_BLOB);
+ $sth->execute;
+}
+
+# returns remaining reference count to $docid
+sub remove_xref3 {
+ my ($self, $docid, $oidhex, $eidx_key, $rm_eidx_info) = @_;
+ begin_lazy($self);
+ my $oidbin = pack('H*', $oidhex);
+ my ($sth, $ibx_id);
+ if (defined $eidx_key) {
+ $ibx_id = ibx_id($self, $eidx_key);
+ $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? AND oidbin = ?
+
+ $sth->bind_param(1, $docid);
+ $sth->bind_param(2, $ibx_id);
+ $sth->bind_param(3, $oidbin, SQL_BLOB);
+ } else {
+ $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM xref3 WHERE docid = ? AND oidbin = ?
+
+ $sth->bind_param(1, $docid);
+ $sth->bind_param(2, $oidbin, SQL_BLOB);
+ }
+ $sth->execute;
+ $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE docid = ?
+
+ $sth->execute($docid);
+ my $nr = $sth->fetchrow_array;
+ if ($nr == 0) {
+ delete_by_num($self, $docid);
+ } elsif (defined($ibx_id) && $rm_eidx_info) {
+ # if deduplication rules in ContentHash change, it's
+ # possible a docid can have multiple rows with the
+ # same ibx_id. This governs whether or not we call
+ # ->shard_remove_eidx_info in ExtSearchIdx.
+ $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+ $sth->execute($docid, $ibx_id);
+ my $count = $sth->fetchrow_array;
+ $$rm_eidx_info = ($count == 0);
+ }
+ $nr;
+}
+
+# for when an xref3 goes missing, this does NOT update {ts}
+sub update_blob {
+ my ($self, $smsg, $oidhex) = @_;
+ my $sth = $self->{dbh}->prepare(<<'');
+UPDATE over SET ddd = ? WHERE num = ?
+
+ $smsg->{blob} = $oidhex;
+ $sth->bind_param(1, ddd_for($smsg), SQL_BLOB);
+ $sth->bind_param(2, $smsg->{num});
+ $sth->execute;
+}
+
+sub eidxq_add {
+ my ($self, $docid) = @_;
+ $self->dbh->prepare_cached(<<'')->execute($docid);
+INSERT OR IGNORE INTO eidxq (docid) VALUES (?)
+
+}
+
+sub eidxq_del {
+ my ($self, $docid) = @_;
+ $self->dbh->prepare_cached(<<'')->execute($docid);
+DELETE FROM eidxq WHERE docid = ?
+
+}
+
+sub blob_exists {
+ my ($self, $oidhex) = @_;
+ my $sth = $self->dbh->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE oidbin = ?
+
+ $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+ $sth->execute;
+ $sth->fetchrow_array;
+}
+
1;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# a tied handle for auto reaping of children tied to a pipe, see perltie(1)
package PublicInbox::ProcessPipe;
use strict;
-use warnings;
+use v5.10.1;
+use Carp qw(carp);
sub TIEHANDLE {
- my ($class, $pid, $fh) = @_;
- bless { pid => $pid, fh => $fh }, $class;
+ my ($class, $pid, $fh, $cb, $arg) = @_;
+ bless { pid => $pid, fh => $fh, ppid => $$, cb => $cb, arg => $arg },
+ $class;
}
+sub BINMODE { binmode(shift->{fh}) } # for IO::Uncompress::Gunzip
+
sub READ { read($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
sub READLINE { readline($_[0]->{fh}) }
-sub CLOSE {
- my $fh = delete($_[0]->{fh});
- my $ret = defined $fh ? close($fh) : '';
- my $pid = delete $_[0]->{pid};
- if (defined $pid) {
- # PublicInbox::DS may not be loaded
- eval { PublicInbox::DS::dwaitpid($pid, undef, undef) };
+sub WRITE {
+ use bytes qw(length);
+ syswrite($_[0]->{fh}, $_[1], $_[2] // length($_[1]), $_[3] // 0);
+}
+
+sub PRINT {
+ my $self = shift;
+ print { $self->{fh} } @_;
+}
+
+sub FILENO { fileno($_[0]->{fh}) }
- if ($@) { # ok, not in the event loop, work synchronously
- waitpid($pid, 0);
+sub _close ($;$) {
+ my ($self, $wait) = @_;
+ my $fh = delete $self->{fh};
+ my $ret = defined($fh) ? close($fh) : '';
+ my ($pid, $cb, $arg) = delete @$self{qw(pid cb arg)};
+ return $ret unless defined($pid) && $self->{ppid} == $$;
+ if ($wait) { # caller cares about the exit status:
+ my $wp = waitpid($pid, 0);
+ if ($wp == $pid) {
$ret = '' if $?;
+ if ($cb) {
+ eval { $cb->($arg, $pid) };
+ carp "E: cb(arg, $pid): $@" if $@;
+ }
+ } else {
+ carp "waitpid($pid, 0) = $wp, \$!=$!, \$?=$?";
}
+ } else { # caller just undef-ed it, let event loop deal with it
+ require PublicInbox::DS;
+ PublicInbox::DS::dwaitpid($pid, $cb, $arg);
}
$ret;
}
-sub FILENO { fileno($_[0]->{fh}) }
+# if caller uses close(), assume they want to check $? immediately so
+# we'll waitpid() synchronously. n.b. wantarray doesn't seem to
+# propagate `undef' down to tied methods, otherwise I'd rely on that.
+sub CLOSE { _close($_[0], 1) }
+# if relying on DESTROY, assume the caller doesn't care about $? and
+# we can let the event loop call waitpid() whenever it gets SIGCHLD
sub DESTROY {
- CLOSE(@_);
+ _close($_[0]);
undef;
}
-sub pid { $_[0]->{pid} }
-
1;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Like most Perl modules in public-inbox, this is internal and
# operate in. This can be useful to ensure smaller inboxes can
# be cloned while cloning of large inboxes is maxed out.
#
-# This does not depend on PublicInbox::DS or any other external
-# scheduling mechanism, you just need to call start() and finish()
-# appropriately. However, public-inbox-httpd (which uses PublicInbox::DS)
-# will be able to schedule this based on readability of stdout from
-# the spawned process. See GitHTTPBackend.pm and SolverGit.pm for
-# usage examples. It does not depend on any form of threading.
+# This does not depend on the PublicInbox::DS->EventLoop or any
+# other external scheduling mechanism, you just need to call
+# start() and finish() appropriately. However, public-inbox-httpd
+# (which uses PublicInbox::DS) will be able to schedule this
+# based on readability of stdout from the spawned process.
+# See GitHTTPBackend.pm and SolverGit.pm for usage examples.
+# It does not depend on any form of threading.
#
# This is useful for scheduling CGI execution of both long-lived
# git-http-backend(1) process (for "git clone") as well as short-lived
$self->{cmd} = $o{quiet} ? undef : $cmd;
eval {
# popen_rd may die on EMFILE, ENFILE
- ($self->{rpipe}, $self->{pid}) = popen_rd($cmd, $cmd_env, \%o);
+ $self->{rpipe} = popen_rd($cmd, $cmd_env, \%o);
- die "E: $!" unless defined($self->{pid});
+ die "E: $!" unless defined($self->{rpipe});
$limiter->{running}++;
$start_cb->($self); # EPOLL_CTL_ADD may ENOSPC/ENOMEM
}
}
-# callback for dwaitpid
-sub waitpid_err ($$) {
- my ($self, $pid) = @_;
- my $xpid = delete $self->{pid};
- my $err;
- if (defined $pid) {
- if ($pid > 0) { # success!
- $err = child_err($?);
- } elsif ($pid < 0) { # ??? does this happen in our case?
- $err = "W: waitpid($xpid, 0) => $pid: $!";
- } # else should not be called with pid == 0
- }
- finalize($self, $err);
-}
-
-sub do_waitpid ($) {
- my ($self) = @_;
- my $pid = $self->{pid};
- # PublicInbox::DS may not be loaded
- eval { PublicInbox::DS::dwaitpid($pid, \&waitpid_err, $self) };
- # done if we're running in PublicInbox::DS::EventLoop
- if ($@) {
- # non public-inbox-{httpd,nntpd} callers may block:
- my $ret = waitpid($pid, 0);
- waitpid_err($self, $ret);
- }
-}
+# callback for dwaitpid or ProcessPipe
+sub waitpid_err { finalize($_[0], child_err($?)) }
sub finish ($;$) {
my ($self, $err) = @_;
- if (delete $self->{rpipe}) {
- do_waitpid($self);
- } else {
- finalize($self, $err);
- }
+ my $tied_pp = delete($self->{rpipe}) or return finalize($self, $err);
+ my PublicInbox::ProcessPipe $pp = tied *$tied_pp;
+ @$pp{qw(cb arg)} = (\&waitpid_err, $self); # for ->DESTROY
}
sub start ($$$) {
}
sub setup_rlimit {
- my ($self, $name, $config) = @_;
+ my ($self, $name, $cfg) = @_;
foreach my $rlim (@PublicInbox::Spawn::RLIMITS) {
my $k = lc($rlim);
$k =~ tr/_//d;
$k = "publicinboxlimiter.$name.$k";
- defined(my $v = $config->{$k}) or next;
+ defined(my $v = $cfg->{$k}) or next;
my @rlimit = split(/\s*,\s*/, $v);
if (scalar(@rlimit) == 1) {
push @rlimit, $rlimit[0];
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# For reply instructions and address generation in WWW UI
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# SpamAssassin rules useful for running a mailing list mirror. We want to:
=head1 COPYRIGHT
-Copyright (C) 2016-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright (C) 2016-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt>
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# based on notmuch, but with no concept of folders, files or flags
#
package PublicInbox::Search;
use strict;
use parent qw(Exporter);
-our @EXPORT_OK = qw(mdocid);
+our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
use List::Util qw(max);
# values for searching, changing the numeric value breaks
use PublicInbox::Smsg;
use PublicInbox::Over;
-my $QP_FLAGS;
-our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem);
+our $QP_FLAGS;
+our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query);
our $Xap; # 'Search::Xapian' or 'Xapian'
-my $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
-my $ENQ_ASCENDING;
+our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
+
+# ENQ_DESCENDING and ENQ_ASCENDING weren't in SWIG Xapian.pm prior to 1.4.16,
+# let's hope the ABI is stable
+our $ENQ_DESCENDING = 0;
+our $ENQ_ASCENDING = 1;
sub load_xapian () {
return 1 if defined $Xap;
'NumberRangeProcessor' : 'NumberValueRangeProcessor');
$X{$_} = $Xap.'::'.$_ for (keys %X);
- # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm,
- # so lets hope this part of the ABI is stable because it's
- # just an integer:
- $ENQ_ASCENDING = $x eq 'Xapian' ?
- 1 : Search::Xapian::ENQ_ASCENDING();
-
- # for Smsg:
- *PublicInbox::Smsg::sortable_unserialise =
- $Xap.'::sortable_unserialise';
+ *sortable_serialise = $x.'::sortable_serialise';
+ *sortable_unserialise = $x.'::sortable_unserialise';
# n.b. FLAG_PURE_NOT is expensive not suitable for a public
# website as it could become a denial-of-service vector
# FLAG_PHRASE also seems to cause performance problems chert
}
}
-sub _xdb ($) {
+# returns all shards as separate Xapian::Database objects w/o combining
+sub xdb_shards_flat ($) {
my ($self) = @_;
- my $dir = xdir($self, 1);
- my ($xdb, $slow_phrase);
- my $qpf = \($self->{qp_flags} ||= $QP_FLAGS);
- if ($self->{ibx_ver} >= 2) {
- my @xdb;
- opendir(my $dh, $dir) or return; # not initialized yet
-
+ my $xpfx = $self->{xpfx};
+ my (@xdb, $slow_phrase);
+ load_xapian();
+ $self->{qp_flags} //= $QP_FLAGS;
+ if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+ @xdb = ($X{Database}->new($xpfx));
+ $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
+ } else {
+ opendir(my $dh, $xpfx) or return (); # not initialized yet
# We need numeric sorting so shard[0] is first for reading
# Xapian metadata, if needed
- my $last = max(grep(/\A[0-9]+\z/, readdir($dh)));
- return if !defined($last);
+ my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return ();
for (0..$last) {
- my $shard_dir = "$dir/$_";
- if (-d $shard_dir && -r _) {
- push @xdb, $X{Database}->new($shard_dir);
- $slow_phrase ||= -f "$shard_dir/iamchert";
- } else { # gaps from missing epochs throw off mdocid()
- warn "E: $shard_dir missing or unreadable\n";
- return;
- }
+ my $shard_dir = "$self->{xpfx}/$_";
+ push @xdb, $X{Database}->new($shard_dir);
+ $slow_phrase ||= -f "$shard_dir/iamchert";
}
- $self->{nshard} = scalar(@xdb);
- $xdb = shift @xdb;
- $xdb->add_database($_) for @xdb;
- } else {
- $slow_phrase = -f "$dir/iamchert";
- $xdb = $X{Database}->new($dir);
+ $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
}
- $$qpf |= FLAG_PHRASE() unless $slow_phrase;
- $xdb;
+ @xdb;
}
# v2 Xapian docids don't conflict, so they're identical to
sub mset_to_artnums {
my ($self, $mset) = @_;
- my $nshard = $self->{nshard} // 1;
+ my $nshard = $self->{nshard};
[ map { mdocid($nshard, $_) } $mset->items ];
}
sub xdb ($) {
my ($self) = @_;
- $self->{xdb} ||= do {
- load_xapian();
- _xdb($self);
+ $self->{xdb} //= do {
+ my @xdb = $self->xdb_shards_flat or return;
+ $self->{nshard} = scalar(@xdb);
+ my $xdb = shift @xdb;
+ $xdb->add_database($_) for @xdb;
+ $xdb;
};
}
-sub xpfx_init ($) {
- my ($self) = @_;
- if ($self->{ibx_ver} == 1) {
- $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION;
- } else {
- $self->{xpfx} .= '/xap'.SCHEMA_VERSION;
- }
-}
-
sub new {
my ($class, $ibx) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
- my $self = bless {
- xpfx => $ibx->{inboxdir}, # for xpfx_init
+ my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian';
+ bless {
+ xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION,
altid => $ibx->{altid},
- ibx_ver => $ibx->version,
}, $class;
- xpfx_init($self);
- $self;
}
sub reopen {
$opts ||= {};
my $qp = $self->{qp} //= qparse_new($self);
my $query = $qp->parse_query($query_string, $self->{qp_flags});
- $opts->{relevance} = 1 unless exists $opts->{relevance};
_do_enquire($self, $query, $opts);
}
sub retry_reopen {
- my ($self, $cb, $arg) = @_;
+ my ($self, $cb, @arg) = @_;
for my $i (1..10) {
if (wantarray) {
my @ret;
- eval { @ret = $cb->($arg) };
+ eval { @ret = $cb->($self, @arg) };
return @ret unless $@;
} else {
my $ret;
- eval { $ret = $cb->($arg) };
+ eval { $ret = $cb->($self, @arg) };
return $ret unless $@;
}
# Exception: The revision being read has been discarded -
sub _do_enquire {
my ($self, $query, $opts) = @_;
- retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]);
+ retry_reopen($self, \&_enquire_once, $query, $opts);
}
# returns true if all docs have the THREADID value
}
sub _enquire_once { # retry_reopen callback
- my ($self, $query, $opts) = @{$_[0]};
+ my ($self, $query, $opts) = @_;
my $xdb = xdb($self);
+ if (defined(my $eidx_key = $opts->{eidx_key})) {
+ $query = $X{Query}->new(OP_FILTER(), $query, 'O'.$eidx_key);
+ }
+ if (defined(my $uid_range = $opts->{uid_range})) {
+ my $range = $X{Query}->new(OP_VALUE_RANGE(), UID,
+ sortable_serialise($uid_range->[0]),
+ sortable_serialise($uid_range->[1]));
+ $query = $X{Query}->new(OP_FILTER(), $query, $range);
+ }
my $enquire = $X{Enquire}->new($xdb);
$enquire->set_query($query);
$opts ||= {};
my $desc = !$opts->{asc};
- if (($opts->{mset} || 0) == 2) { # mset == 2: ORDER BY docid/UID
+ my $rel = $opts->{relevance} // 0;
+ if ($rel == -1) { # ORDER BY docid/UID
+ $enquire->set_weighting_scheme($X{BoolWeight}->new);
$enquire->set_docid_order($ENQ_ASCENDING);
+ } elsif ($rel == 0) {
+ $enquire->set_sort_by_value_then_relevance(TS, $desc);
+ } elsif ($rel == -2) {
$enquire->set_weighting_scheme($X{BoolWeight}->new);
- } elsif ($opts->{relevance}) {
+ $enquire->set_docid_order($ENQ_DESCENDING);
+ } else { # rel > 0
$enquire->set_sort_by_relevance_then_value(TS, $desc);
- } else {
- $enquire->set_sort_by_value_then_relevance(TS, $desc);
}
# `mairix -t / --threads' or JMAP collapseThreads
sub mset_to_smsg {
my ($self, $ibx, $mset) = @_;
- my $nshard = $self->{nshard} // 1;
+ my $nshard = $self->{nshard};
my $i = 0;
my %order = map { mdocid($nshard, $_) => ++$i } $mset->items;
my @msgs = sort {
# for IMAP, undocumented for WWW and may be split off go away
$cb->($qp, $NVRP->new(BYTES, 'bytes:'));
- $cb->($qp, $NVRP->new(TS, 'ts:'));
+ $cb->($qp, $NVRP->new(TS, 'rt:'));
$cb->($qp, $NVRP->new(UID, 'uid:'));
while (my ($name, $prefix) = each %bool_pfx_external) {
\@ret;
}
+sub int_val ($$) {
+ my ($doc, $col) = @_;
+ my $val = $doc->get_value($col) or return; # undefined is '' in Xapian
+ sortable_unserialise($val) + 0; # PV => IV conversion
+}
+
+sub get_pct ($) { # mset item
+ # Capped at "99%" since "100%" takes an extra column in the
+ # thread skeleton view. <xapian/mset.h> says the value isn't
+ # very meaningful, anyways.
+ my $n = $_[0]->get_percent;
+ $n > 99 ? 99 : $n;
+}
+
+sub xap_terms ($$;@) {
+ my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
+ my %ret;
+ eval {
+ my $end = $xdb_or_doc->termlist_end(@docid);
+ my $cur = $xdb_or_doc->termlist_begin(@docid);
+ for (; $cur != $end; $cur++) {
+ $cur->skip_to($pfx);
+ last if $cur == $end;
+ my $tn = $cur->get_termname;
+ if (index($tn, $pfx) == 0) {
+ $ret{substr($tn, length($pfx))} = undef;
+ }
+ }
+ };
+ \%ret;
+}
+
1;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# based on notmuch, but with no concept of folders, files or flags
+# based on notmuch, but with no concept of folders, files
#
# Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
# with the web and NNTP interfaces. This index maintains thread
use PublicInbox::MID qw(mids_for_index mids);
use PublicInbox::MsgIter;
use PublicInbox::IdxStack;
-use Carp qw(croak);
+use Carp qw(croak carp);
use POSIX qw(strftime);
+use Time::Local qw(timegm);
use PublicInbox::OverIdx;
use PublicInbox::Spawn qw(spawn nodatacow_dir);
use PublicInbox::Git qw(git_unquote);
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size);
+our @EXPORT_OK = qw(log2stack is_ancestor check_size prepare_stack
+ index_text term_generator add_val is_bad_blob);
my $X = \%PublicInbox::Search::X;
-my ($DB_CREATE_OR_OPEN, $DB_OPEN);
+our ($DB_CREATE_OR_OPEN, $DB_OPEN);
our $DB_NO_SYNC = 0;
our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : 1_000_000;
use constant DEBUG => !!$ENV{DEBUG};
my $xapianlevels = qr/\A(?:full|medium)\z/;
my $hex = '[a-f0-9]';
my $OID = $hex .'{40,}';
+our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/;
sub new {
my ($class, $ibx, $creat, $shard) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
- my $levels = qr/\A(?:full|medium|basic)\z/;
my $inboxdir = $ibx->{inboxdir};
my $version = $ibx->version;
my $indexlevel = 'full';
$altid = [ map { PublicInbox::AltId->new($ibx, $_); } @$altid ];
}
if ($ibx->{indexlevel}) {
- if ($ibx->{indexlevel} =~ $levels) {
+ if ($ibx->{indexlevel} =~ $INDEXLEVELS) {
$indexlevel = $ibx->{indexlevel};
} else {
die("Invalid indexlevel $ibx->{indexlevel}\n");
}
}
$ibx = PublicInbox::InboxWritable->new($ibx);
- my $self = bless {
- ibx => $ibx,
- xpfx => $inboxdir, # for xpfx_init
- -altid => $altid,
- ibx_ver => $version,
- indexlevel => $indexlevel,
- }, $class;
- $self->xpfx_init;
+ my $self = PublicInbox::Search->new($ibx);
+ bless $self, $class;
+ $self->{ibx} = $ibx;
+ $self->{-altid} = $altid;
+ $self->{indexlevel} = $indexlevel;
$self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium';
if ($ibx->{-skip_docdata}) {
$self->{-set_skip_docdata_once} = 1;
$self->{-skip_docdata} = 1;
}
- $ibx->umask_prepare;
if ($version == 1) {
$self->{lock_path} = "$inboxdir/ssoma.lock";
my $dir = $self->xdir;
$DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
$DB_OPEN = eval($xap.'::DB_OPEN()');
my $ver = (eval($xap.'::major_version()') << 16) |
- (eval($xap.'::minor_version()') << 8);
+ (eval($xap.'::minor_version()') << 8) |
+ eval($xap.'::revision()');
$DB_NO_SYNC = 0x4 if $ver >= 0x10400;
+ # Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks
+ $X->{CLOEXEC_UNSET} = 1 if $ver >= 0x010215 && $ver <= 0x010218;
1;
}
}
}
return unless defined $flag;
- $flag |= $DB_NO_SYNC if $self->{ibx}->{-no_fsync};
+ $flag |= $DB_NO_SYNC if ($self->{ibx} // $self->{eidx})->{-no_fsync};
my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
croak "Failed opening $dir: $@" if $@;
$self->{xdb} = $xdb;
$self->{term_generator} //= do {
my $tg = $X->{TermGenerator}->new;
- $tg->set_stemmer($self->stemmer);
+ $tg->set_stemmer(PublicInbox::Search::stemmer($self));
$tg;
}
}
}
}
+sub index_list_id ($$$) {
+ my ($self, $doc, $hdr) = @_;
+ for my $l ($hdr->header_raw('List-Id')) {
+ $l =~ /<([^>]+)>/ or next;
+ my $lid = lc $1;
+ $doc->add_boolean_term('G' . $lid);
+ index_text($self, $lid, 1, 'XL'); # probabilistic
+ }
+}
+
sub index_ids ($$$$) {
my ($self, $doc, $hdr, $mids) = @_;
for my $mid (@$mids) {
}
}
$doc->add_boolean_term('Q' . $_) for @$mids;
- for my $l ($hdr->header_raw('List-Id')) {
- $l =~ /<([^>]+)>/ or next;
- my $lid = lc $1;
- $doc->add_boolean_term('G' . $lid);
- index_text($self, $lid, 1, 'XL'); # probabilistic
- }
+ index_list_id($self, $doc, $hdr);
}
-sub add_xapian ($$$$) {
+sub eml2doc ($$$;$) {
my ($self, $eml, $smsg, $mids) = @_;
+ $mids //= mids_for_index($eml);
my $doc = $X->{Document}->new;
add_val($doc, PublicInbox::Search::TS(), $smsg->{ts});
my @ds = gmtime($smsg->{ds});
$tg->set_document($doc);
index_headers($self, $smsg);
+ if (defined(my $eidx_key = $smsg->{eidx_key})) {
+ $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
+ }
msg_iter($eml, \&index_xapian, [ $self, $doc ]);
index_ids($self, $doc, $eml, $mids);
if (!$self->{-skip_docdata}) {
# WWW doesn't need {to} or {cc}, only NNTP
$smsg->{to} = $smsg->{cc} = '';
- PublicInbox::OverIdx::parse_references($smsg, $eml, $mids);
+ $smsg->parse_references($eml, $mids);
my $data = $smsg->to_doc_data;
$doc->set_data($data);
}
}
}
}
+ $doc;
+}
+
+sub add_xapian ($$$$) {
+ my ($self, $eml, $smsg, $mids) = @_;
+ begin_txn_lazy($self);
+ my $doc = eml2doc($self, $eml, $smsg, $mids);
$self->{xdb}->replace_document($smsg->{num}, $doc);
}
sub _msgmap_init ($) {
my ($self) = @_;
- die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1;
+ die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1;
$self->{mm} //= eval {
require PublicInbox::Msgmap;
my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1;
sub add_message {
# mime = PublicInbox::Eml or Email::MIME object
my ($self, $mime, $smsg, $sync) = @_;
+ begin_txn_lazy($self);
my $mids = mids_for_index($mime);
$smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat
$smsg->{mid} //= $mids->[0]; # v1 compatibility
$smsg->{num};
}
-sub xdb_remove {
- my ($self, $oid, @removed) = @_;
- my $xdb = $self->{xdb} or return;
- for my $num (@removed) {
- my $doc = eval { $xdb->get_document($num) };
- unless ($doc) {
- warn "E: $@\n" if $@;
- warn "E: #$num $oid missing in Xapian\n";
- next;
- }
- my $smsg = bless {}, 'PublicInbox::Smsg';
- $smsg->load_expand($doc);
- my $blob = $smsg->{blob} // '(unset)';
- if ($blob eq $oid) {
- $xdb->delete_document($num);
- } else {
- warn "E: #$num $oid != $blob in Xapian\n";
- }
+sub _get_doc ($$) {
+ my ($self, $docid) = @_;
+ my $doc = eval { $self->{xdb}->get_document($docid) };
+ $doc // do {
+ warn "E: $@\n" if $@;
+ warn "E: #$docid missing in Xapian\n";
+ undef;
+ }
+}
+
+sub add_eidx_info {
+ my ($self, $docid, $eidx_key, $eml) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ term_generator($self)->set_document($doc);
+ $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
+ index_list_id($self, $doc, $eml);
+ $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub remove_eidx_info {
+ my ($self, $docid, $eidx_key, $eml) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ eval { $doc->remove_term('O'.$eidx_key) };
+ warn "W: ->remove_term O$eidx_key: $@\n" if $@;
+ for my $l ($eml ? $eml->header_raw('List-Id') : ()) {
+ $l =~ /<([^>]+)>/ or next;
+ my $lid = lc $1;
+ eval { $doc->remove_term('G' . $lid) };
+ warn "W: ->remove_term G$lid: $@\n" if $@;
+
+ # nb: we don't remove the XL probabilistic terms
+ # since terms may overlap if cross-posted.
+ #
+ # IOW, a message which has both <foo.example.com>
+ # and <bar.example.com> would have overlapping
+ # "XLexample" and "XLcom" as terms and which we
+ # wouldn't know if they're safe to remove if we just
+ # unindex <foo.example.com> while preserving
+ # <bar.example.com>.
+ #
+ # In any case, this entire sub is will likely never
+ # be needed and users using the "l:" prefix are probably
+ # rarer.
+ }
+ $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub set_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ my %keep = map { $_ => 1 } @kw;
+ my %add = %keep;
+ my @rm;
+ my $end = $doc->termlist_end;
+ for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
+ $cur->skip_to('K');
+ last if $cur == $end;
+ my $kw = $cur->get_termname;
+ $kw =~ s/\AK//s or next;
+ $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw);
}
+ return unless (scalar(@rm) + scalar(keys %add));
+ $doc->remove_term('K'.$_) for @rm;
+ $doc->add_boolean_term('K'.$_) for (keys %add);
+ $self->{xdb}->replace_document($docid, $doc);
}
-sub remove_by_oid {
- my ($self, $oid, $num) = @_;
- die "BUG: remove_by_oid is v2-only\n" if $self->{oidx};
+sub add_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ $doc->add_boolean_term('K'.$_) for @kw;
+ $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub remove_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ my $replace;
+ eval {
+ $doc->remove_term('K'.$_);
+ $replace = 1
+ } for @kw;
+ $self->{xdb}->replace_document($docid, $doc) if $replace;
+}
+
+sub smsg_from_doc ($) {
+ my ($doc) = @_;
+ my $data = $doc->get_data or return;
+ my $smsg = bless {}, 'PublicInbox::Smsg';
+ $smsg->{ts} = int_val($doc, PublicInbox::Search::TS());
+ my $dt = int_val($doc, PublicInbox::Search::DT());
+ my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt);
+ $smsg->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
+ $smsg->load_from_data($data);
+ $smsg;
+}
+
+sub xdb_remove {
+ my ($self, @docids) = @_;
$self->begin_txn_lazy;
- xdb_remove($self, $oid, $num) if need_xapian($self);
+ my $xdb = $self->{xdb} or return;
+ for my $docid (@docids) {
+ eval { $xdb->delete_document($docid) };
+ warn "E: #$docid not in in Xapian? $@\n" if $@;
+ }
}
sub index_git_blob_id {
$tmp{$_}++ for @removed;
}
if (!$nr) {
- $mids = join('> <', @$mids);
- warn "W: <$mids> missing for removal from overview\n";
+ my $m = join('> <', @$mids);
+ warn "W: <$m> missing for removal from overview\n";
}
while (my ($num, $nr) = each %tmp) {
warn "BUG: $num appears >1 times ($nr) for $oid\n" if $nr != 1;
} else { # just in case msgmap and over.sqlite3 become desynched:
$self->{mm}->mid_delete($mids->[0]);
}
- xdb_remove($self, $oid, keys %tmp) if need_xapian($self);
+ xdb_remove($self, keys %tmp) if need_xapian($self);
}
sub index_mm {
}
}
-# returns the number of bytes to add if given a non-CRLF arg
-sub crlf_adjust ($) {
- if (index($_[0], "\r\n") < 0) {
- # common case is LF-only, every \n needs an \r;
- # so favor a cheap tr// over an expensive m//g
- $_[0] =~ tr/\n/\n/;
- } else { # count number of '\n' w/o '\r', expensive:
- scalar(my @n = ($_[0] =~ m/(?<!\r)\n/g));
+sub is_bad_blob ($$$$) {
+ my ($oid, $type, $size, $expect_oid) = @_;
+ if ($type ne 'blob') {
+ carp "W: $expect_oid is not a blob (type=$type)";
+ return 1;
}
+ croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid;
+ $size == 0 ? 1 : 0; # size == 0 means purged
}
sub index_both { # git->cat_async callback
my ($bref, $oid, $type, $size, $sync) = @_;
+ return if is_bad_blob($oid, $type, $size, $sync->{oid});
my ($nr, $max) = @$sync{qw(nr max)};
++$$nr;
$$max -= $size;
- $size += crlf_adjust($$bref);
- my $smsg = bless { bytes => $size, blob => $oid }, 'PublicInbox::Smsg';
+ my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
+ $smsg->set_bytes($$bref, $size);
my $self = $sync->{sidx};
+ local $self->{current_info} = "$self->{current_info}: $oid";
my $eml = PublicInbox::Eml->new($bref);
$smsg->{num} = index_mm($self, $eml, $oid, $sync) or
die "E: could not generate NNTP article number for $oid";
add_message($self, $eml, $smsg, $sync);
+ ++$self->{nidx};
+ my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing';
+ ${$sync->{latest_cmt}} = $cur_cmt;
}
sub unindex_both { # git->cat_async callback
- my ($bref, $oid, $type, $size, $self) = @_;
+ my ($bref, $oid, $type, $size, $sync) = @_;
+ return if is_bad_blob($oid, $type, $size, $sync->{oid});
+ my $self = $sync->{sidx};
+ local $self->{current_info} = "$self->{current_info}: $oid";
unindex_eml($self, $oid, PublicInbox::Eml->new($bref));
+ # may be undef if leftover
+ if (defined(my $cur_cmt = $sync->{cur_cmt})) {
+ ${$sync->{latest_cmt}} = $cur_cmt;
+ }
+ ++$self->{nidx};
+}
+
+sub with_umask {
+ my $self = shift;
+ ($self->{ibx} // $self->{eidx})->with_umask(@_);
}
# called by public-inbox-index
sub index_sync {
my ($self, $opt) = @_;
delete $self->{lock_path} if $opt->{-skip_lock};
- $self->{ibx}->with_umask(\&_index_sync, $self, $opt);
- if ($opt->{reindex}) {
+ $self->with_umask(\&_index_sync, $self, $opt);
+ if ($opt->{reindex} && !$opt->{quit}) {
my %again = %$opt;
delete @again{qw(rethread reindex)};
index_sync($self, \%again);
+ $opt->{quit} = $again{quit}; # propagate to caller
}
}
sub v1_checkpoint ($$;$) {
my ($self, $sync, $stk) = @_;
- $self->{ibx}->git->check_async_wait;
- $self->{ibx}->git->cat_async_wait;
+ $self->{ibx}->git->async_wait_all;
- # latest_cmt may be undef
- my $newest = $stk ? $stk->{latest_cmt} : undef;
- if ($newest) {
+ # $newest may be undef
+ my $newest = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+ if (defined($newest)) {
my $cur = $self->{mm}->last_commit || '';
if (need_update($self, $cur, $newest)) {
$self->{mm}->last_commit($newest);
}
- } else {
- ${$sync->{max}} = $self->{batch_bytes};
}
+ ${$sync->{max}} = $self->{batch_bytes};
$self->{mm}->{dbh}->commit;
- if ($newest && need_xapian($self)) {
- my $xdb = $self->{xdb};
+ my $xdb = need_xapian($self) ? $self->{xdb} : undef;
+ if ($newest && $xdb) {
my $cur = $xdb->get_metadata('last_commit');
if (need_update($self, $cur, $newest)) {
$xdb->set_metadata('last_commit', $newest);
}
-
+ }
+ if ($stk) { # all done if $stk is passed
# let SearchView know a full --reindex was done so it can
# generate ->has_threadid-dependent links
- if ($sync->{reindex} && !ref($sync->{reindex})) {
+ if ($xdb && $sync->{reindex} && !ref($sync->{reindex})) {
my $n = $xdb->get_metadata('has_threadid');
$xdb->set_metadata('has_threadid', '1') if $n ne '1';
}
+ $self->{oidx}->rethread_done($sync->{-opt}); # all done
}
-
- $self->{oidx}->rethread_done($sync->{-opt}) if $newest; # all done
commit_txn_lazy($self);
- $self->{ibx}->git->cleanup;
+ $sync->{ibx}->git->cleanup;
my $nr = ${$sync->{nr}};
idx_release($self, $nr);
# let another process do some work...
if (my $pr = $sync->{-opt}->{-progress}) {
$pr->("indexed $nr/$sync->{ntodo}\n") if $nr;
}
- if (!$stk) { # more to come
+ if (!$stk && !$sync->{quit}) { # more to come
begin_txn_lazy($self);
$self->{mm}->{dbh}->begin_work;
}
# only for v1
sub process_stack {
my ($self, $sync, $stk) = @_;
- my $git = $self->{ibx}->git;
+ my $git = $sync->{ibx}->git;
my $max = $self->{batch_bytes};
my $nr = 0;
$sync->{nr} = \$nr;
$sync->{max} = \$max;
$sync->{sidx} = $self;
+ $sync->{latest_cmt} = \(my $latest_cmt);
$self->{mm}->{dbh}->begin_work;
if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
for my $oid (@leftovers) {
+ last if $sync->{quit};
$oid = unpack('H*', $oid);
- $git->cat_async($oid, \&unindex_both, $self);
+ $git->cat_async($oid, \&unindex_both, $sync);
}
}
if ($sync->{max_size} = $sync->{-opt}->{max_size}) {
$sync->{index_oid} = \&index_both;
}
- while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
+ while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) {
+ my $arg = { %$sync, cur_cmt => $cur_cmt, oid => $oid };
+ last if $sync->{quit};
if ($f eq 'm') {
- my $arg = { %$sync, autime => $at, cotime => $ct };
+ $arg->{autime} = $at;
+ $arg->{cotime} = $ct;
if ($sync->{max_size}) {
$git->check_async($oid, \&check_size, $arg);
} else {
}
v1_checkpoint($self, $sync) if $max <= 0;
} elsif ($f eq 'd') {
- $git->cat_async($oid, \&unindex_both, $self);
+ $git->cat_async($oid, \&unindex_both, $arg);
}
}
- v1_checkpoint($self, $sync, $stk);
+ v1_checkpoint($self, $sync, $sync->{quit} ? undef : $stk);
}
-sub log2stack ($$$$) {
- my ($sync, $git, $range, $ibx) = @_;
+sub log2stack ($$$) {
+ my ($sync, $git, $range) = @_;
my $D = $sync->{D}; # OID_BIN => NR (if reindexing, undef otherwise)
my ($add, $del);
- if ($ibx->version == 1) {
+ if ($sync->{ibx}->version == 1) {
my $path = $hex.'{2}/'.$hex.'{38}';
$add = qr!\A:000000 100644 \S+ ($OID) A\t$path$!;
$del = qr!\A:100644 000000 ($OID) \S+ D\t$path$!;
my $fh = $git->popen(qw(log --raw -r --pretty=tformat:%at-%ct-%H
--no-notes --no-color --no-renames --no-abbrev),
$range);
- my ($at, $ct, $stk);
+ my ($at, $ct, $stk, $cmt);
while (<$fh>) {
+ return if $sync->{quit};
if (/\A([0-9]+)-([0-9]+)-($OID)$/o) {
- ($at, $ct) = ($1 + 0, $2 + 0);
- $stk //= PublicInbox::IdxStack->new($3);
+ ($at, $ct, $cmt) = ($1 + 0, $2 + 0, $3);
+ $stk //= PublicInbox::IdxStack->new($cmt);
} elsif (/$del/) {
my $oid = $1;
if ($D) { # reindex case
$D->{pack('H*', $oid)}++;
} else { # non-reindex case:
- $stk->push_rec('d', $at, $ct, $oid);
+ $stk->push_rec('d', $at, $ct, $oid, $cmt);
}
} elsif (/$add/) {
my $oid = $1;
my $oid_bin = pack('H*', $oid);
my $nr = --$D->{$oid_bin};
delete($D->{$oid_bin}) if $nr <= 0;
-
# nr < 0 (-1) means it never existed
- $stk->push_rec('m', $at, $ct, $oid) if $nr < 0;
- } else {
- $stk->push_rec('m', $at, $ct, $oid);
+ next if $nr >= 0;
}
+ $stk->push_rec('m', $at, $ct, $oid, $cmt);
}
}
close $fh or die "git log failed: \$?=$?";
$stk->read_prepare;
}
-sub prepare_stack ($$$) {
- my ($self, $sync, $range) = @_;
- my $git = $self->{ibx}->git;
+sub prepare_stack ($$) {
+ my ($sync, $range) = @_;
+ my $git = $sync->{ibx}->git;
if (index($range, '..') < 0) {
# don't show annoying git errors to users who run -index
return PublicInbox::IdxStack->new->read_prepare if $?;
}
$sync->{D} = $sync->{reindex} ? {} : undef; # OID_BIN => NR
- log2stack($sync, $git, $range, $self->{ibx});
+ log2stack($sync, $git, $range);
}
# --is-ancestor requires git 1.8.0+
ref($reindex) eq 'HASH' ? $reindex->{from} : '';
}
+sub quit_cb ($) {
+ my ($sync) = @_;
+ sub {
+ # we set {-opt}->{quit} too, so ->index_sync callers
+ # can abort multi-inbox loops this way
+ $sync->{quit} = $sync->{-opt}->{quit} = 1;
+ warn "gracefully quitting\n";
+ }
+}
+
# indexes all unindexed messages (v1 only)
sub _index_sync {
my ($self, $opt) = @_;
my $tip = $opt->{ref} || 'HEAD';
- my $git = $self->{ibx}->git;
+ my $ibx = $self->{ibx};
+ local $self->{current_info} = "$ibx->{inboxdir}";
$self->{batch_bytes} = $opt->{batch_size} // $BATCH_BYTES;
- $git->batch_prepare;
+ $ibx->git->batch_prepare;
my $pr = $opt->{-progress};
- my $sync = { reindex => $opt->{reindex}, -opt => $opt };
+ my $sync = { reindex => $opt->{reindex}, -opt => $opt, ibx => $ibx };
+ my $quit = quit_cb($sync);
+ local $SIG{QUIT} = $quit;
+ local $SIG{INT} = $quit;
+ local $SIG{TERM} = $quit;
my $xdb = $self->begin_txn_lazy;
$self->{oidx}->rethread_prepare($opt);
my $mm = _msgmap_init($self);
my $lx = reindex_from($sync->{reindex}, $last_commit);
my $range = $lx eq '' ? $tip : "$lx..$tip";
$pr->("counting changes\n\t$range ... ") if $pr;
- my $stk = prepare_stack($self, $sync, $range);
+ my $stk = prepare_stack($sync, $range);
$sync->{ntodo} = $stk ? $stk->num_records : 0;
$pr->("$sync->{ntodo}\n") if $pr; # continue previous line
- process_stack($self, $sync, $stk);
+ process_stack($self, $sync, $stk) if !$sync->{quit};
}
sub DESTROY {
sub begin_txn_lazy {
my ($self) = @_;
- $self->{ibx}->with_umask(\&_begin_txn, $self) if !$self->{txn};
+ $self->with_umask(\&_begin_txn, $self) if !$self->{txn};
}
# store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard)
sub _commit_txn {
my ($self) = @_;
+ if (my $eidx = $self->{eidx}) {
+ $eidx->git->async_wait_all;
+ $eidx->{transact_bytes} = 0;
+ }
if (my $xdb = $self->{xdb}) {
set_metadata_once($self);
$xdb->commit_transaction;
sub commit_txn_lazy {
my ($self) = @_;
delete($self->{txn}) and
- $self->{ibx}->with_umask(\&_commit_txn, $self);
+ $self->with_umask(\&_commit_txn, $self);
}
-sub worker_done {
- my ($self) = @_;
- if (need_xapian($self)) {
- die "$$ $0 xdb not released\n" if $self->{xdb};
+sub eidx_shard_new {
+ my ($class, $eidx, $shard) = @_;
+ my $self = bless {
+ eidx => $eidx,
+ xpfx => $eidx->{xpfx},
+ indexlevel => $eidx->{indexlevel},
+ -skip_docdata => 1,
+ shard => $shard,
+ creat => 1,
+ }, $class;
+ $self->{-set_indexlevel_once} = 1 if $self->{indexlevel} eq 'medium';
+ $self;
+}
+
+# ensure there's no stale Xapian docs by treating $over as canonical
+sub over_check {
+ my ($self, $over) = @_;
+ begin_txn_lazy($self);
+ my $sth = $over->dbh->prepare(<<'');
+SELECT COUNT(*) FROM over WHERE num = ?
+
+ my $xdb = $self->{xdb};
+ my $cur = $xdb->postlist_begin('');
+ my $end = $xdb->postlist_end('');
+ my $xdir = $self->xdir;
+ for (; $cur != $end; $cur++) {
+ my $docid = $cur->get_docid;
+ $sth->execute($docid);
+ my $x = $sth->fetchrow_array;
+ next if $x > 0;
+ warn "I: removing $xdir #$docid, not in `over'\n";
+ $xdb->delete_document($docid);
}
- die "$$ $0 still in transaction\n" if $self->{txn};
}
1;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Internal interface for a single Xapian shard in V2 inboxes.
package PublicInbox::SearchIdxShard;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::SearchIdx);
-use IO::Handle (); # autoflush
-use PublicInbox::Eml;
+use parent qw(PublicInbox::SearchIdx PublicInbox::IPC);
+use PublicInbox::OnDestroy;
sub new {
- my ($class, $v2w, $shard) = @_;
+ my ($class, $v2w, $shard) = @_; # v2w may be ExtSearchIdx
my $ibx = $v2w->{ibx};
- my $self = $class->SUPER::new($ibx, 1, $shard);
+ my $self = $ibx ? $class->SUPER::new($ibx, 1, $shard)
+ : $class->eidx_shard_new($v2w, $shard);
# create the DB before forking:
$self->idx_acquire;
$self->set_metadata_once;
$self->idx_release;
- $self->spawn_worker($v2w, $shard) if $v2w->{parallel};
+ if ($v2w->{parallel}) {
+ local $self->{-v2w_afc} = $v2w;
+ $self->ipc_worker_spawn("shard[$shard]");
+ # F_SETPIPE_SZ = 1031 on Linux; increasing the pipe size for
+ # inputs speeds V2Writable batch imports across 8 cores by
+ # nearly 20%. Since any of our responses are small, make
+ # the response pipe as small as possible
+ if ($^O eq 'linux') {
+ fcntl($self->{-ipc_req}, 1031, 1048576);
+ fcntl($self->{-ipc_res}, 1031, 4096);
+ }
+ }
$self;
}
-sub spawn_worker {
- my ($self, $v2w, $shard) = @_;
- my ($r, $w);
- pipe($r, $w) or die "pipe failed: $!\n";
- $w->autoflush(1);
- my $pid = fork;
- defined $pid or die "fork failed: $!\n";
- if ($pid == 0) {
- my $bnote = $v2w->atfork_child;
- close $w or die "failed to close: $!";
-
- # F_SETPIPE_SZ = 1031 on Linux; increasing the pipe size here
- # speeds V2Writable batch imports across 8 cores by nearly 20%
- fcntl($r, 1031, 1048576) if $^O eq 'linux';
-
- eval { shard_worker_loop($self, $v2w, $r, $shard, $bnote) };
- die "worker $shard died: $@\n" if $@;
- die "unexpected MM $self->{mm}" if $self->{mm};
- exit;
+sub _worker_done {
+ my ($self) = @_;
+ if ($self->need_xapian) {
+ die "$$ $0 xdb not released\n" if $self->{xdb};
}
- $self->{pid} = $pid;
- $self->{w} = $w;
- close $r or die "failed to close: $!";
+ die "$$ $0 still in transaction\n" if $self->{txn};
}
-# this reads all the writes to $self->{w} from the parent process
-sub shard_worker_loop ($$$$$) {
- my ($self, $v2w, $r, $shard, $bnote) = @_;
- $0 = "pi-v2-shard[$shard]";
+sub ipc_atfork_child { # called automatically before ipc_worker_loop
+ my ($self) = @_;
+ my $v2w = delete $self->{-v2w_afc} or die 'BUG: {-v2w_afc} missing';
+ $v2w->atfork_child; # calls ipc_sibling_atfork_child on our siblings
+ $v2w->{current_info} = "[$self->{shard}]"; # for $SIG{__WARN__}
$self->begin_txn_lazy;
- while (my $line = readline($r)) {
- $v2w->{current_info} = "[$shard] $line";
- if ($line eq "commit\n") {
- $self->commit_txn_lazy;
- } elsif ($line eq "close\n") {
- $self->idx_release;
- } elsif ($line eq "barrier\n") {
- $self->commit_txn_lazy;
- # no need to lock < 512 bytes is atomic under POSIX
- print $bnote "barrier $shard\n" or
- die "write failed for barrier $!\n";
- } elsif ($line =~ /\AD ([a-f0-9]{40,}) ([0-9]+)\n\z/s) {
- $self->remove_by_oid($1, $2 + 0);
- } else {
- chomp $line;
- # n.b. $mid may contain spaces(!)
- my ($to_read, $bytes, $num, $blob, $ds, $ts, $tid, $mid)
- = split(/ /, $line, 8);
- $self->begin_txn_lazy;
- my $n = read($r, my $msg, $to_read) or die "read: $!\n";
- $n == $to_read or die "short read: $n != $to_read\n";
- my $mime = PublicInbox::Eml->new(\$msg);
- my $smsg = bless {
- bytes => $bytes,
- num => $num + 0,
- blob => $blob,
- mid => $mid,
- tid => $tid,
- ds => $ds,
- ts => $ts,
- }, 'PublicInbox::Smsg';
- $self->add_message($mime, $smsg);
- }
- }
- $self->worker_done;
+ # caller must capture this:
+ PublicInbox::OnDestroy->new($$, \&_worker_done, $self);
}
-sub index_raw {
- my ($self, $msgref, $eml, $smsg) = @_;
- if (my $w = $self->{w}) {
- # mid must be last, it can contain spaces (but not LF)
- print $w join(' ', @$smsg{qw(raw_bytes bytes
- num blob ds ts tid mid)}),
- "\n", $$msgref or die "failed to write shard $!\n";
- } else {
- if ($eml) {
- undef $$msgref;
- } else { # --xapian-only + --sequential-shard:
- $eml = PublicInbox::Eml->new($msgref);
- }
- $self->begin_txn_lazy;
- $self->add_message($eml, $smsg);
- }
-}
-
-sub atfork_child {
- close $_[0]->{w} or die "failed to close write pipe: $!\n";
+sub index_eml {
+ my ($self, $eml, $smsg, $eidx_key) = @_;
+ $smsg->{eidx_key} = $eidx_key if defined $eidx_key;
+ $self->ipc_do('add_xapian', $eml, $smsg);
}
-sub shard_barrier {
- my ($self) = @_;
- if (my $w = $self->{w}) {
- print $w "barrier\n" or die "failed to print: $!";
- } else {
- $self->commit_txn_lazy;
- }
+# wait for return to determine when ipc_do('commit_txn_lazy') is done
+sub echo {
+ shift;
+ "@_";
}
-sub shard_commit {
+sub idx_close {
my ($self) = @_;
- if (my $w = $self->{w}) {
- print $w "commit\n" or die "failed to write commit: $!";
- } else {
- $self->commit_txn_lazy;
- }
+ die "transaction in progress $self\n" if $self->{txn};
+ $self->idx_release if $self->{xdb};
}
sub shard_close {
my ($self) = @_;
- if (my $w = delete $self->{w}) {
- my $pid = delete $self->{pid} or die "no process to wait on\n";
- print $w "close\n" or die "failed to write to pid:$pid: $!\n";
- close $w or die "failed to close pipe for pid:$pid: $!\n";
- waitpid($pid, 0) == $pid or die "remote process did not finish";
- $? == 0 or die ref($self)." pid:$pid exited with: $?";
- } else {
- die "transaction in progress $self\n" if $self->{txn};
- $self->idx_release if $self->{xdb};
- }
+ $self->ipc_do('idx_close');
+ $self->ipc_worker_stop;
}
-sub shard_remove {
- my ($self, $oid, $num) = @_;
- if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child
- print $w "D $oid $num\n" or die "failed to write remove $!";
- } else { # same process
- $self->remove_by_oid($oid, $num);
+sub shard_over_check {
+ my ($self, $over) = @_;
+ if ($self->{-ipc_req} && $over->{dbh}) {
+ # can't send DB handles over IPC
+ $over = ref($over)->new($over->{dbh}->sqlite_db_filename);
}
+ $self->ipc_do('over_check', $over);
}
1;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# used by PublicInbox::SearchView
# We'll trust the client Date: header here instead of the Received:
# time since this is for display (and not retrieval)
_set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $rootset = [ grep {
!delete($_->{parent}) && $_->visible($ibx)
} values %id_table ];
my %seen = ($cur => 1); # self-referential loop prevention
my @q = ($cur);
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
while (defined($cur = shift @q)) {
my $c = $cur->{children}; # The hashref here...
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Displays search results for the web interface
use PublicInbox::WwwStream qw(html_oneshot);
use PublicInbox::SearchThread;
use PublicInbox::SearchQuery;
-use PublicInbox::Search qw(mdocid);
+use PublicInbox::Search qw(get_pct);
my %rmap_inc;
sub mbox_results {
sub sres_top_html {
my ($ctx) = @_;
- my $srch = $ctx->{-inbox}->search or
+ my $srch = $ctx->{ibx}->isrch or
return PublicInbox::WWW::need($ctx, 'Search');
my $q = PublicInbox::SearchQuery->new($ctx->{qp});
my $x = $q->{x};
my $pad = length("$total");
my $pfx = ' ' x $pad;
my $res = \($ctx->{-html_tip});
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
- my @nums = @{$ibx->search->mset_to_artnums($mset)};
+ my @nums = @{$ibx->isrch->mset_to_artnums($mset)};
my %num2msg = map { $_->{num} => $_ } @{$ibx->over->get_all(@nums)};
my ($min, $max);
sub err_txt {
my ($ctx, $err) = @_;
- my $u = $ctx->{-inbox}->base_url($ctx->{env}) . '_/text/help/';
+ my $u = $ctx->{ibx}->base_url($ctx->{env}) . '_/text/help/';
$err =~ s/^\s*Exception:\s*//; # bad word to show users :P
$err =~ s!(\S+)!path2inc($1)!sge;
$err = ascii_html($err);
}
my $A = $q->qs_html(x => 'A', r => undef);
$rv .= qq{|<a\nhref="?$A">Atom feed</a>]};
- if ($ctx->{-inbox}->search->has_threadid) {
+ if ($ctx->{ibx}->isrch->has_threadid) {
$rv .= qq{\n\t\t\tdownload mbox.gz: } .
# we set name=z w/o using it since it seems required for
# lynx (but works fine for w3m).
} @{$_[0]} ]
}
-sub get_pct ($) {
- # Capped at "99%" since "100%" takes an extra column in the
- # thread skeleton view. <xapian/mset.h> says the value isn't
- # very meaningful, anyways.
- my $n = $_[0]->get_percent;
- $n > 99 ? 99 : $n;
-}
-
sub mset_thread {
my ($ctx, $mset, $q) = @_;
- my $ibx = $ctx->{-inbox};
- my $nshard = $ibx->search->{nshard} // 1;
- my %pct = map { mdocid($nshard, $_) => get_pct($_) } $mset->items;
- my $msgs = $ibx->over->get_all(keys %pct);
- $_->{pct} = $pct{$_->{num}} for @$msgs;
+ my $ibx = $ctx->{ibx};
+ my @pct = map { get_pct($_) } $mset->items;
+ my $msgs = $ibx->isrch->mset_to_smsg($ibx, $mset);
+ my $i = 0;
+ $_->{pct} = $pct[$i++] for @$msgs;
my $r = $q->{r};
if ($r) { # for descriptions in search_nav_bot
- my @pct = values %pct;
$q->{-min_pct} = min(@pct);
$q->{-max_pct} = max(@pct);
}
sub adump {
my ($cb, $mset, $q, $ctx) = @_;
- $ctx->{ids} = $ctx->{-inbox}->search->mset_to_artnums($mset);
+ $ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset);
$ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
PublicInbox::WwwAtomStream->response($ctx, 200, \&adump_i);
}
sub adump_i {
my ($ctx) = @_;
while (my $num = shift @{$ctx->{ids}}) {
- my $smsg = eval { $ctx->{-inbox}->over->get_art($num) } or next;
+ my $smsg = eval { $ctx->{ibx}->over->get_art($num) } or next;
return $smsg;
}
}
--- /dev/null
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# fork()-friendly key-value store. Will be used for making
+# augmenting Maildirs and mboxes less expensive, maybe.
+# We use flock(2) to avoid SQLite lock problems (busy timeouts, backoff)
+package PublicInbox::SharedKV;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock);
+use File::Temp qw(tempdir);
+use DBI ();
+use PublicInbox::Spawn;
+use File::Path qw(rmtree);
+
+sub dbh {
+ my ($self, $lock) = @_;
+ $self->{dbh} //= do {
+ my $f = $self->{filename};
+ $lock //= $self->lock_for_scope;
+ my $dbh = DBI->connect("dbi:SQLite:dbname=$f", '', '', {
+ AutoCommit => 1,
+ RaiseError => 1,
+ PrintError => 0,
+ sqlite_use_immediate_transaction => 1,
+ # no sqlite_unicode here, this is for binary data
+ });
+ my $opt = $self->{opt} // {};
+ $dbh->do('PRAGMA synchronous = OFF') if !$opt->{fsync};
+ $dbh->do('PRAGMA cache_size = '.($opt->{cache_size} || 80000));
+ $dbh->do('PRAGMA journal_mode = '.
+ ($opt->{journal_mode} // 'WAL'));
+ $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS kv (
+ k VARBINARY PRIMARY KEY NOT NULL,
+ v VARBINARY NOT NULL,
+ UNIQUE (k)
+)
+
+ $dbh;
+ }
+}
+
+sub new {
+ my ($cls, $dir, $base, $opt) = @_;
+ my $self = bless { opt => $opt }, $cls;
+ unless (defined $dir) {
+ $self->{tmpdir} = $dir = tempdir('skv-XXXXXX', TMPDIR => 1);
+ $self->{tmpid} = "$$.$self";
+ }
+ -d $dir or mkdir($dir) or die "mkdir($dir): $!";
+ $base //= '';
+ my $f = $self->{filename} = "$dir/$base.sqlite3";
+ $self->{lock_path} = $opt->{lock_path} // "$dir/$base.flock";
+ unless (-f $f) {
+ open my $fh, '+>>', $f or die "failed to open $f: $!";
+ PublicInbox::Spawn::nodatacow_fd(fileno($fh));
+ }
+ $self;
+}
+
+sub index_values {
+ my ($self) = @_;
+ my $lock = $self->lock_for_scope;
+ $self->dbh($lock)->do('CREATE INDEX IF NOT EXISTS idx_v ON kv (v)');
+}
+
+sub set_maybe {
+ my ($self, $key, $val, $lock) = @_;
+ $lock //= $self->lock_for_scope;
+ my $e = $self->{dbh}->prepare_cached(<<'')->execute($key, $val);
+INSERT OR IGNORE INTO kv (k,v) VALUES (?, ?)
+
+ $e == 0 ? undef : $e;
+}
+
+# caller calls sth->fetchrow_array
+sub each_kv_iter {
+ my ($self) = @_;
+ my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT k,v FROM kv
+
+ $sth->execute;
+ $sth
+}
+
+sub delete_by_val {
+ my ($self, $val, $lock) = @_;
+ $lock //= $self->lock_for_scope;
+ $self->{dbh}->prepare_cached(<<'')->execute($val) + 0;
+DELETE FROM kv WHERE v = ?
+
+}
+
+sub replace_values {
+ my ($self, $oldval, $newval, $lock) = @_;
+ $lock //= $self->lock_for_scope;
+ $self->{dbh}->prepare_cached(<<'')->execute($newval, $oldval) + 0;
+UPDATE kv SET v = ? WHERE v = ?
+
+}
+
+sub set {
+ my ($self, $key, $val) = @_;
+ if (defined $val) {
+ my $e = $self->{dbh}->prepare_cached(<<'')->execute($key, $val);
+INSERT OR REPLACE INTO kv (k,v) VALUES (?,?)
+
+ $e == 0 ? undef : $e;
+ } else {
+ $self->{dbh}->prepare_cached(<<'')->execute($key);
+DELETE FROM kv WHERE k = ?
+
+ }
+}
+
+sub get {
+ my ($self, $key) = @_;
+ my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT v FROM kv WHERE k = ?
+
+ $sth->execute($key);
+ $sth->fetchrow_array;
+}
+
+sub xchg {
+ my ($self, $key, $newval, $lock) = @_;
+ $lock //= $self->lock_for_scope;
+ my $oldval = get($self, $key);
+ if (defined $newval) {
+ set($self, $key, $newval);
+ } else {
+ $self->{dbh}->prepare_cached(<<'')->execute($key);
+DELETE FROM kv WHERE k = ?
+
+ }
+ $oldval;
+}
+
+sub count {
+ my ($self) = @_;
+ my $sth = $self->{dbh}->prepare_cached(<<'');
+SELECT COUNT(k) FROM kv
+
+ $sth->execute;
+ $sth->fetchrow_array;
+}
+
+sub DESTROY {
+ my ($self) = @_;
+ rmtree($self->{tmpdir}) if ($self->{tmpid} // '') eq "$$.$self";
+}
+
+1;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Wraps a signalfd (or similar) for PublicInbox::DS
package PublicInbox::Sigfd;
use strict;
use parent qw(PublicInbox::DS);
-use PublicInbox::Syscall qw(signalfd EPOLLIN EPOLLET $SFD_NONBLOCK);
-use POSIX qw(:signal_h);
+use PublicInbox::Syscall qw(signalfd EPOLLIN EPOLLET SFD_NONBLOCK);
+use POSIX ();
use IO::Handle ();
# returns a coderef to unblock signals if neither signalfd or kqueue
} else {
return; # wake up every second to check for signals
}
- if ($flags & $SFD_NONBLOCK) { # it can go into the event loop
+ if ($flags & SFD_NONBLOCK) { # it can go into the event loop
$self->SUPER::new($io, EPOLLIN | EPOLLET);
} else { # master main loop
$self->{sock} = $io;
while (wait_once($_[0])) {} # non-blocking
}
-sub sig_setmask { sigprocmask(SIG_SETMASK, @_) or die "sigprocmask: $!" }
-
-sub block_signals () {
- my $oldset = POSIX::SigSet->new;
- my $newset = POSIX::SigSet->new;
- $newset->fillset or die "fillset: $!";
- sig_setmask($newset, $oldset);
- $oldset;
-}
-
1;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# A small/skeleton/slim representation of a message.
use warnings;
use base qw(Exporter);
our @EXPORT_OK = qw(subject_normalized);
-use PublicInbox::MID qw(mids);
+use PublicInbox::MID qw(mids references);
use PublicInbox::Address;
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-use Time::Local qw(timegm);
-
-sub get_val ($$) {
- my ($doc, $col) = @_;
- # sortable_unserialise is defined by PublicInbox::Search::load_xapian()
- sortable_unserialise($doc->get_value($col));
-}
sub to_doc_data {
my ($self) = @_;
) = split(/\n/, $_[1]);
}
-sub load_expand {
- my ($self, $doc) = @_;
- my $data = $doc->get_data or return;
- $self->{ts} = get_val($doc, PublicInbox::Search::TS());
- my $dt = get_val($doc, PublicInbox::Search::DT());
- my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt);
- $self->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
- load_from_data($self, $data);
- $self;
-}
-
sub psgi_cull ($) {
my ($self) = @_;
$self;
}
-# for Import and v1 non-SQLite WWW code paths
+sub parse_references ($$$) {
+ my ($smsg, $hdr, $mids) = @_;
+ my $refs = references($hdr);
+ push(@$refs, @$mids) if scalar(@$mids) > 1;
+ return $refs if scalar(@$refs) == 0;
+
+ # prevent circular references here:
+ my %seen = ( $smsg->{mid} => 1 );
+ my @keep;
+ foreach my $ref (@$refs) {
+ if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
+ warn "References: <$ref> too long, ignoring\n";
+ next;
+ }
+ $seen{$ref} //= push(@keep, $ref);
+ }
+ $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
+ \@keep;
+}
+
+# used for v2, Import and v1 non-SQLite WWW code paths
sub populate {
my ($self, $hdr, $sync) = @_;
for my $f (qw(From To Cc Subject)) {
$self->{-ts} = [ my @ts = msg_timestamp($hdr, $sync->{cotime}) ];
$self->{ds} //= $ds[0]; # no zone
$self->{ts} //= $ts[0];
-
- # for v1 users w/o SQLite
- $self->{mid} //= eval { mids($hdr)->[0] } // '';
+ $self->{mid} //= mids($hdr)->[0];
}
# no strftime, that is locale-dependent and not for RFC822
$subj;
}
+# returns the number of bytes to add if given a non-CRLF arg
+sub crlf_adjust ($) {
+ if (index($_[0], "\r\n") < 0) {
+ # common case is LF-only, every \n needs an \r;
+ # so favor a cheap tr// over an expensive m//g
+ $_[0] =~ tr/\n/\n/;
+ } else { # count number of '\n' w/o '\r', expensive:
+ scalar(my @n = ($_[0] =~ m/(?<!\r)\n/g));
+ }
+}
+
+sub set_bytes { $_[0]->{bytes} = $_[2] + crlf_adjust($_[1]) }
+
1;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# "Solve" blobs which don't exist in git code repositories by
sub find_smsgs ($$$) {
my ($self, $ibx, $want) = @_;
- my $srch = $ibx->search or return;
+ my $srch = $ibx->isrch or return;
my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
$post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Spamchecking used by -watch and -mda tools
use warnings;
sub get {
- my ($config, $key, $default) = @_;
- my $spamcheck = $config->{$key};
+ my ($cfg, $key, $default) = @_;
+ my $spamcheck = $cfg->{$key};
$spamcheck = $default unless $spamcheck;
return if !$spamcheck || $spamcheck eq 'none';
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Default spam filter class for wrapping spamc(1)
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# This allows vfork to be used for spawning subprocesses if
use parent qw(Exporter);
use Symbol qw(gensym);
use PublicInbox::ProcessPipe;
-our @EXPORT_OK = qw/which spawn popen_rd nodatacow_dir/;
+our @EXPORT_OK = qw(which spawn popen_rd run_die nodatacow_dir);
our @RLIMITS = qw(RLIMIT_CPU RLIMIT_CORE RLIMIT_DATA);
my $vfork_spawn = <<'VFORK_SPAWN';
}
SET_NODATACOW
+# last choice for script/lei, 1st choice for lei internals
+# compatible with PublicInbox::CmdIPC4
+my $fdpass = <<'FDPASS';
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/socket.h>
+
+#if defined(CMSG_SPACE) && defined(CMSG_LEN)
+#define SEND_FD_CAPA 6
+#define SEND_FD_SPACE (SEND_FD_CAPA * sizeof(int))
+union my_cmsg {
+ struct cmsghdr hdr;
+ char pad[sizeof(struct cmsghdr) + 16 + SEND_FD_SPACE];
+};
+
+SV *send_cmd4(PerlIO *s, SV *svfds, SV *data, int flags)
+{
+ struct msghdr msg = { 0 };
+ union my_cmsg cmsg = { 0 };
+ STRLEN dlen = 0;
+ struct iovec iov;
+ ssize_t sent;
+ AV *fds = (AV *)SvRV(svfds);
+ I32 i, nfds = av_len(fds) + 1;
+ int *fdp;
+
+ if (SvOK(data)) {
+ iov.iov_base = SvPV(data, dlen);
+ iov.iov_len = dlen;
+ }
+ if (!dlen) { /* must be non-zero */
+ iov.iov_base = &msg.msg_namelen; /* whatever */
+ iov.iov_len = 1;
+ }
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ if (nfds) {
+ if (nfds > SEND_FD_CAPA) {
+ fprintf(stderr, "FIXME: bump SEND_FD_CAPA=%d\n", nfds);
+ nfds = SEND_FD_CAPA;
+ }
+ msg.msg_control = &cmsg.hdr;
+ msg.msg_controllen = CMSG_SPACE(nfds * sizeof(int));
+ cmsg.hdr.cmsg_level = SOL_SOCKET;
+ cmsg.hdr.cmsg_type = SCM_RIGHTS;
+ cmsg.hdr.cmsg_len = CMSG_LEN(nfds * sizeof(int));
+ fdp = (int *)CMSG_DATA(&cmsg.hdr);
+ for (i = 0; i < nfds; i++) {
+ SV **fd = av_fetch(fds, i, 0);
+ *fdp++ = SvIV(*fd);
+ }
+ }
+ sent = sendmsg(PerlIO_fileno(s), &msg, flags);
+ return sent >= 0 ? newSViv(sent) : &PL_sv_undef;
+}
+
+void recv_cmd4(PerlIO *s, SV *buf, STRLEN n)
+{
+ union my_cmsg cmsg = { 0 };
+ struct msghdr msg = { 0 };
+ struct iovec iov;
+ ssize_t i;
+ Inline_Stack_Vars;
+ Inline_Stack_Reset;
+
+ if (!SvOK(buf))
+ sv_setpvn(buf, "", 0);
+ iov.iov_base = SvGROW(buf, n + 1);
+ iov.iov_len = n;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &cmsg.hdr;
+ msg.msg_controllen = CMSG_SPACE(SEND_FD_SPACE);
+
+ i = recvmsg(PerlIO_fileno(s), &msg, 0);
+ if (i < 0)
+ Inline_Stack_Push(&PL_sv_undef);
+ else
+ SvCUR_set(buf, i);
+ if (i > 0 && cmsg.hdr.cmsg_level == SOL_SOCKET &&
+ cmsg.hdr.cmsg_type == SCM_RIGHTS) {
+ size_t len = cmsg.hdr.cmsg_len;
+ int *fdp = (int *)CMSG_DATA(&cmsg.hdr);
+ for (i = 0; CMSG_LEN((i + 1) * sizeof(int)) <= len; i++)
+ Inline_Stack_Push(sv_2mortal(newSViv(*fdp++)));
+ }
+ Inline_Stack_Done;
+}
+#endif /* defined(CMSG_SPACE) && defined(CMSG_LEN) */
+FDPASS
+
my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= (
$ENV{XDG_CACHE_HOME} //
( ($ENV{HOME} // '/nonexistent').'/.cache' )
).'/public-inbox/inline-c';
-$set_nodatacow = $vfork_spawn = undef unless -d $inline_dir && -w _;
+$set_nodatacow = $vfork_spawn = $fdpass = undef unless -d $inline_dir && -w _;
if (defined $vfork_spawn) {
# Inline 0.64 or later has locking in multi-process env,
# but we support 0.5 on Debian wheezy
my $f = "$inline_dir/.public-inbox.lock";
open my $fh, '>', $f or die "failed to open $f: $!\n";
flock($fh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
- eval 'use Inline C => $vfork_spawn . $set_nodatacow';
+ eval 'use Inline C => $vfork_spawn.$fdpass.$set_nodatacow';
+ # . ', BUILD_NOISY => 1';
my $err = $@;
my $ndc_err;
if ($err && $set_nodatacow) { # missing Linux kernel headers
$ndc_err = $err;
undef $set_nodatacow;
- eval 'use Inline C => $vfork_spawn';
+ eval 'use Inline C => $vfork_spawn . $fdpass';
}
flock($fh, LOCK_UN) or die "LOCK_UN failed on $f: $!\n";
die $err if $err;
};
if ($@) {
warn "Inline::C failed for vfork: $@\n";
- $set_nodatacow = $vfork_spawn = undef;
+ $set_nodatacow = $vfork_spawn = $fdpass = undef;
}
}
*nodatacow_fd = \&PublicInbox::NDC_PP::nodatacow_fd;
*nodatacow_dir = \&PublicInbox::NDC_PP::nodatacow_dir;
}
+
undef $set_nodatacow;
undef $vfork_spawn;
+undef $fdpass;
sub which ($) {
my ($file) = @_;
}
sub popen_rd {
- my ($cmd, $env, $opts) = @_;
+ my ($cmd, $env, $opt) = @_;
pipe(my ($r, $w)) or die "pipe: $!\n";
- $opts ||= {};
- $opts->{1} = fileno($w);
- my $pid = spawn($cmd, $env, $opts);
+ $opt ||= {};
+ $opt->{1} = fileno($w);
+ my $pid = spawn($cmd, $env, $opt);
return ($r, $pid) if wantarray;
my $ret = gensym;
- tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r;
+ tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r, @$opt{qw(cb arg)};
$ret;
}
+sub run_die ($;$$) {
+ my ($cmd, $env, $rdr) = @_;
+ my $pid = spawn($cmd, $env, $rdr);
+ waitpid($pid, 0) == $pid or die "@$cmd did not finish";
+ $? == 0 or die "@$cmd failed: \$?=$?\n";
+}
+
1;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Pure-Perl implementation of "spawn". This can't take advantage
# This license differs from the rest of public-inbox
#
# This module is Copyright (c) 2005 Six Apart, Ltd.
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
#
# All rights reserved.
#
EPOLLIN EPOLLOUT EPOLLET
EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
EPOLLONESHOT EPOLLEXCLUSIVE
- signalfd $SFD_NONBLOCK);
+ signalfd SFD_NONBLOCK);
our %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
EPOLLIN EPOLLOUT
EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
);
my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
-our $SFD_NONBLOCK = O_NONBLOCK;
+sub SFD_NONBLOCK () { O_NONBLOCK }
our $no_deprecated = 0;
if ($^O eq "linux") {
# epoll_wait wrapper
# ARGS: (epfd, maxevents, timeout (milliseconds), arrayref)
# arrayref: values modified to be [$fd, $event]
-our $epoll_wait_events;
+our $epoll_wait_events = '';
our $epoll_wait_size = 0;
sub epoll_wait_mod4 {
- # resize our static buffer if requested size is bigger than we've ever done
- if ($_[1] > $epoll_wait_size) {
- $epoll_wait_size = $_[1];
- $epoll_wait_events = "\0" x 12 x $epoll_wait_size;
- }
- my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
- for (0..$ct-1) {
- @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8));
- }
- return $ct;
+ my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+ # resize our static buffer if maxevents bigger than we've ever done
+ if ($maxevents > $epoll_wait_size) {
+ $epoll_wait_size = $maxevents;
+ vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0;
+ }
+ @$events = ();
+ my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+ $maxevents, $timeout_msec);
+ for (0..$ct - 1) {
+ # 12-byte struct epoll_event
+ # 4 bytes uint32_t events mask (skipped, useless to us)
+ # 8 bytes: epoll_data_t union (first 4 bytes are the fd)
+ # So we skip the first 4 bytes and take the middle 4:
+ $events->[$_] = unpack('L', substr($epoll_wait_events,
+ 12 * $_ + 4, 4));
+ }
}
sub epoll_wait_mod8 {
- # resize our static buffer if requested size is bigger than we've ever done
- if ($_[1] > $epoll_wait_size) {
- $epoll_wait_size = $_[1];
- $epoll_wait_events = "\0" x 16 x $epoll_wait_size;
- }
- my $ct;
- if ($no_deprecated) {
- $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef);
- } else {
- $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
- }
- for (0..$ct-1) {
- # 16 byte epoll_event structs, with format:
- # 4 byte mask [idx 1]
- # 4 byte padding (we put it into idx 2, useless)
- # 8 byte data (first 4 bytes are fd, into idx 0)
- @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12));
- }
- return $ct;
+ my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+
+ # resize our static buffer if maxevents bigger than we've ever done
+ if ($maxevents > $epoll_wait_size) {
+ $epoll_wait_size = $maxevents;
+ vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0;
+ }
+ @$events = ();
+ my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+ $maxevents, $timeout_msec,
+ $no_deprecated ? undef : ());
+ for (0..$ct - 1) {
+ # 16-byte struct epoll_event
+ # 4 bytes uint32_t events mask (skipped, useless to us)
+ # 4 bytes padding (skipped, useless)
+ # 8 bytes epoll_data_t union (first 4 bytes are the fd)
+ # So skip the first 8 bytes, take 4, and ignore the last 4:
+ $events->[$_] = unpack('L', substr($epoll_wait_events,
+ 16 * $_ + 8, 4));
+ }
}
sub signalfd ($$$) {
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# IO::Socket::SSL support code
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# internal APIs used only for tests
use POSIX qw(dup2);
use IO::Socket::INET;
our @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
- run_script start_script key2sub xsys xqx eml_load tick
+ run_script start_script key2sub xsys xsys_e xqx eml_load tick
have_xapian_compact);
+BEGIN {
+ require Test::More;
+ *BAIL_OUT = \&Test::More::BAIL_OUT;
+ *plan = \&Test::More::plan;
+ *skip = \&Test::More::skip;
+}
sub eml_load ($) {
my ($path, $cb) = @_;
Type => Socket::SOCK_STREAM(),
Listen => 1024,
Blocking => 0,
- ) or Test::More::BAIL_OUT("failed to create TCP server: $!");
+ ) or BAIL_OUT "failed to create TCP server: $!";
}
sub tcp_connect {
Type => Socket::SOCK_STREAM(),
PeerAddr => $addr,
%opt,
- ) or Test::More::BAIL_OUT("failed to connect to $addr: $!");
+ ) or BAIL_OUT "failed to connect to $addr: $!";
$s->autoflush(1);
$s;
}
my $cur_int = ($cur_maj << 24) | ($cur_min << 16) | ($cur_sub // 0);
if ($cur_int < $req_int) {
return 0 if $maybe;
- Test::More::plan(skip_all =>
- "git $req+ required, have $cur_maj.$cur_min.$cur_sub");
+ plan skip_all =>
+ "git $req+ required, have $cur_maj.$cur_min.$cur_sub";
}
1;
}
my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/;
my @need;
while (my $mod = shift(@mods)) {
+ if ($mod eq 'json') {
+ $mod = 'Cpanel::JSON::XS||JSON::MaybeXS||'.
+ 'JSON||JSON::PP'
+ }
if ($mod eq 'Search::Xapian') {
if (eval { require PublicInbox::Search } &&
PublicInbox::Search::load_xapian()) {
}
return unless @need;
my $m = join(', ', @need)." missing for $0";
- Test::More::skip($m, $maybe) if $maybe;
- Test::More::plan(skip_all => $m)
+ skip($m, $maybe) if $maybe;
+ plan(skip_all => $m)
}
sub key2script ($) {
for (my $fd = 0; $fd <= $#io_mode; $fd++) {
my $fh = $fhref->[$fd] or next;
my ($oldfh, $mode) = @{$io_mode[$fd]};
- open my $orig, $mode, $oldfh or die "$$oldfh $mode stash: $!";
+ open my $orig, $mode, $oldfh or die "$oldfh $mode stash: $!";
$orig_io->[$fd] = $orig;
- open $oldfh, $mode, $fh or die "$$oldfh $mode redirect: $!";
+ open $oldfh, $mode, $fh or die "$oldfh $mode redirect: $!";
}
$orig_io;
}
die RUN_SCRIPT_EXIT;
}
-my %cached_scripts;
+our %cached_scripts;
sub key2sub ($) {
my ($key) = @_;
$cached_scripts{$key} //= do {
my $orig_io = _prepare_redirects($fhref);
_run_sub($sub, $key, \@argv);
_undo_redirects($orig_io);
+ select STDOUT;
}
# slurp the redirects back into user-supplied strings
$? >> 8
}
+sub xsys_e { # like "/bin/sh -e"
+ xsys(@_) == 0 or
+ BAIL_OUT (ref $_[0] ? "@{$_[0]}" : "@_"). " failed \$?=$?"
+}
+
# like `backtick` or qx{} op, but uses spawn() for env/rdr + vfork
sub xqx {
my ($cmd, $env, $rdr) = @_;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::Tmpfile;
use strict;
-use warnings;
-use base qw(Exporter);
+use v5.10.1;
+use parent qw(Exporter);
our @EXPORT = qw(tmpfile);
use Fcntl qw(:DEFAULT);
use Errno qw(EEXIST);
# unlinked filename which makes sense when viewed with lsof
# (at least on Linux)
# And if we ever stop caring to have debuggable filenames, O_TMPFILE :)
+#
+# This is also for Perl <5.32 which lacks: open(..., '+>>', undef)
+# <https://rt.perl.org/Ticket/Display.html?id=134221>
sub tmpfile ($;$$) {
my ($id, $sock, $append) = @_;
if (defined $sock) {
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# cf. RFC 5092, which the `URI' package doesn't support
#
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Standalone PSGI app to handle HTTP(s) unsubscribe links generated
use Crypt::CBC;
use Plack::Util;
use MIME::Base64 qw(decode_base64url);
-my $CODE_URL = 'https://public-inbox.org/public-inbox.git';
+my @CODE_URL = qw(http://ou63pmih66umazou.onion/public-inbox.git
+ https://public-inbox.org/public-inbox.git);
my @CT_HTML = ('Content-Type', 'text/html; charset=UTF-8');
sub new {
my $unsubscribe = $opt{unsubscribe} or
die "`unsubscribe' callback not given\n";
+ my $code_url = $opt{code_url} || \@CODE_URL;
+ $code_url = [ $code_url ] if ref($code_url) ne 'ARRAY';
bless {
- pi_config => $opt{pi_config}, # PublicInbox::Config
+ pi_cfg => $opt{pi_config}, # PublicInbox::Config
owner_email => $opt{owner_email},
cipher => $cipher,
unsubscribe => $unsubscribe,
contact => qq(<a\nhref="mailto:$e">$e</a>),
- code_url => $opt{code_url} || $CODE_URL,
+ code_url => $code_url,
confirm => $opt{confirm},
}, $class;
}
"<html><head><title>$title</title></head><body><pre>".
join("\n", "<b>$title</b>\n", @body) . '</pre><hr>'.
"<pre>This page is available under AGPL-3.0+\n" .
- "git clone $self->{code_url}\n" .
+ join('', map { "git clone $_\n" } @{$self->{code_url}}) .
qq(Email $self->{contact} if you have any questions).
'</pre></body></html>'
] ];
my $archive_url = $self->{archive_urls}->{$list_addr};
unless ($archive_url) {
- if (my $config = $self->{pi_config}) {
+ if (my $cfg = $self->{pi_cfg}) {
# PublicInbox::Config::lookup
- my $ibx = $config->lookup($list_addr);
+ my $ibx = $cfg->lookup($list_addr);
# PublicInbox::Inbox::base_url
$archive_url = $ibx->base_url if $ibx;
}
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Self-updating module containing a sample CSS for client-side
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# This interface wraps and mimics PublicInbox::Import
use PublicInbox::InboxWritable;
use PublicInbox::OverIdx;
use PublicInbox::Msgmap;
-use PublicInbox::Spawn qw(spawn popen_rd);
-use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size);
+use PublicInbox::Spawn qw(spawn popen_rd run_die);
+use PublicInbox::Search;
+use PublicInbox::SearchIdx qw(log2stack is_ancestor check_size is_bad_blob);
use IO::Handle; # ->autoflush
use File::Temp ();
my $OID = qr/[a-f0-9]{40,}/;
# an estimate of the post-packed size to the raw uncompressed size
-my $PACKING_FACTOR = 0.4;
+our $PACKING_FACTOR = 0.4;
# SATA storage lags behind what CPUs are capable of, so relying on
# nproc(1) can be misleading and having extra Xapian shards is a
sub count_shards ($) {
my ($self) = @_;
- # always load existing shards in case core count changes:
- # Also, shard count may change while -watch is running
- my $srch = $self->{ibx}->search or return 0;
- delete $self->{ibx}->{search};
- $srch->{nshard} // 0
+ if (my $ibx = $self->{ibx}) {
+ # always load existing shards in case core count changes:
+ # Also, shard count may change while -watch is running
+ my $srch = $ibx->search or return 0;
+ delete $ibx->{search};
+ $srch->{nshard} // 0
+ } else { # ExtSearchIdx
+ $self->{nshard} ||= scalar($self->xdb_shards_flat);
+ }
}
sub new {
die "$dir does not exist\n";
}
}
- $v2ibx->umask_prepare;
-
my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION;
my $self = {
ibx => $v2ibx,
}
$self->idx_init;
$self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum;
- my $epoch_max = -1;
- git_dir_latest($self, \$epoch_max);
- if (defined $skip_epoch && $epoch_max == -1) {
- $epoch_max = $skip_epoch;
- }
- $self->git_init($epoch_max >= 0 ? $epoch_max : 0);
+ my $max = $self->{ibx}->max_git_epoch;
+ $max = $skip_epoch if (defined($skip_epoch) && !defined($max));
+ $self->git_init($max // 0);
$self->done;
}
$self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb);
}
+sub idx_shard ($$) {
+ my ($self, $num) = @_;
+ $self->{idx_shards}->[$num % scalar(@{$self->{idx_shards}})];
+}
+
# indexes a message, returns true if checkpointing is needed
-sub do_idx ($$$$) {
- my ($self, $msgref, $mime, $smsg) = @_;
- $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
- $self->{oidx}->add_overview($mime, $smsg);
- my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
- $idx->index_raw($msgref, $mime, $smsg);
- my $n = $self->{transact_bytes} += $smsg->{raw_bytes};
+sub do_idx ($$$) {
+ my ($self, $eml, $smsg) = @_;
+ $self->{oidx}->add_overview($eml, $smsg);
+ if ($self->{-need_xapian}) {
+ my $idx = idx_shard($self, $smsg->{num});
+ $idx->index_eml($eml, $smsg);
+ }
+ my $n = $self->{transact_bytes} += $smsg->{bytes};
$n >= $self->{batch_bytes};
}
$cmt = $im->get_mark($cmt);
$self->{last_commit}->[$self->{epoch_max}] = $cmt;
- my $msgref = delete $smsg->{-raw_email};
- if (do_idx($self, $msgref, $mime, $smsg)) {
+ if (do_idx($self, $mime, $smsg)) {
$self->checkpoint;
}
($num, $mid0);
}
-sub idx_shard {
- my ($self, $shard_i) = @_;
- $self->{idx_shards}->[$shard_i];
-}
-
sub _idx_init { # with_umask callback
my ($self, $opt) = @_;
$self->lock_acquire unless $opt && $opt->{-skip_lock};
$self->{shards} = $nshards if $nshards && $nshards != $self->{shards};
$self->{batch_bytes} = $opt->{batch_size} //
$PublicInbox::SearchIdx::BATCH_BYTES;
- $self->{batch_bytes} *= $self->{shards} if $self->{parallel};
# need to create all shards before initializing msgmap FD
# idx_shards must be visible to all forked processes
my $max = $self->{shards} - 1;
my $idx = $self->{idx_shards} = [];
push @$idx, PublicInbox::SearchIdxShard->new($self, $_) for (0..$max);
+ $self->{-need_xapian} = $idx->[0]->need_xapian;
+
+ # SearchIdxShard may do their own flushing, so don't scale
+ # until after forking
+ $self->{batch_bytes} *= $self->{shards} if $self->{parallel};
+
+ my $ibx = $self->{ibx} or return; # ExtIdxSearch
# Now that all subprocesses are up, we can open the FDs
# for SQLite:
my $mm = $self->{mm} = PublicInbox::Msgmap->new_file(
- "$self->{ibx}->{inboxdir}/msgmap.sqlite3",
- $self->{ibx}->{-no_fsync} ? 2 : 1);
+ "$ibx->{inboxdir}/msgmap.sqlite3",
+ $ibx->{-no_fsync} ? 2 : 1);
$mm->{dbh}->begin_work;
}
+sub parallel_init ($$) {
+ my ($self, $indexlevel) = @_;
+ $self->{parallel} = 0 if ($indexlevel // 'full') eq 'basic';
+}
+
# idempotent
sub idx_init {
my ($self, $opt) = @_;
delete @$ibx{qw(mm search)};
$ibx->git->cleanup;
- $self->{parallel} = 0 if ($ibx->{indexlevel}//'') eq 'basic';
- if ($self->{parallel}) {
- pipe(my ($r, $w)) or die "pipe failed: $!";
- # pipe for barrier notifications doesn't need to be big,
- # 1031: F_SETPIPE_SZ
- fcntl($w, 1031, 4096) if $^O eq 'linux';
- $self->{bnote} = [ $r, $w ];
- $w->autoflush(1);
- }
-
- $ibx->umask_prepare;
+ parallel_init($self, $ibx->{indexlevel});
$ibx->with_umask(\&_idx_init, $self, $opt);
}
sub _replace_oids ($$$) {
my ($self, $mime, $replace_map) = @_;
$self->done;
- my $pfx = "$self->{ibx}->{inboxdir}/git";
+ my $ibx = $self->{ibx};
+ my $pfx = "$ibx->{inboxdir}/git";
my $rewrites = []; # epoch => commit
- my $max = $self->{epoch_max};
-
- unless (defined($max)) {
- defined(my $latest = git_dir_latest($self, \$max)) or return;
- $self->{epoch_max} = $max;
- }
+ my $max = $self->{epoch_max} //= $ibx->max_git_epoch // return;
foreach my $i (0..$max) {
my $git_dir = "$pfx/$i.git";
} else { # ->purge or ->remove
$self->{mm}->num_delete($num);
}
- unindex_oid_remote($self, $oid, $mid);
+ unindex_oid_aux($self, $oid, $mid);
}
}
my ($self, $raw) = @_;
# grab the expected OID we have to reindex:
pipe(my($in, $w)) or die "pipe: $!";
- my $git_dir = $self->{ibx}->git->{git_dir};
+ my $git_dir = $self->git->{git_dir};
my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)];
my $r = popen_rd($cmd, undef, { 0 => $in });
print $w $$raw or die "print \$w: $!";
}
# make sure we really got the OID:
- my ($blob, $type, $bytes) = $self->{ibx}->git->check($expect_oid);
+ my ($blob, $type, $bytes) = $self->git->check($expect_oid);
$blob eq $expect_oid or die "BUG: $expect_oid not found after replace";
# don't leak FDs to Xapian:
- $self->{ibx}->git->cleanup;
+ $self->git->cleanup;
# reindex modified messages:
for my $smsg (@$need_reindex) {
my $new_smsg = bless {
blob => $blob,
- raw_bytes => $bytes,
num => $smsg->{num},
mid => $smsg->{mid},
}, 'PublicInbox::Smsg';
my $sync = { autime => $smsg->{ds}, cotime => $smsg->{ts} };
$new_smsg->populate($new_mime, $sync);
- do_idx($self, \$raw, $new_mime, $new_smsg);
+ $new_smsg->set_bytes($raw, $bytes);
+ do_idx($self, $new_mime, $new_smsg);
}
$rewritten->{rewrites};
}
$self->{mm}->last_commit_xap($v, $i, $cmt);
}
-sub set_last_commits ($) {
+sub set_last_commits ($) { # this is NOT for ExtSearchIdx
my ($self) = @_;
defined(my $epoch_max = $self->{epoch_max}) or return;
my $last_commit = $self->{last_commit};
}
}
-sub barrier_init {
- my ($self, $n) = @_;
- $self->{bnote} or return;
- --$n;
- my $barrier = { map { $_ => 1 } (0..$n) };
-}
-
-sub barrier_wait {
- my ($self, $barrier) = @_;
- my $bnote = $self->{bnote} or return;
- my $r = $bnote->[0];
- while (scalar keys %$barrier) {
- defined(my $l = readline($r)) or die "EOF on barrier_wait: $!";
- $l =~ /\Abarrier (\d+)/ or die "bad line on barrier_wait: $l";
- delete $barrier->{$1} or die "bad shard[$1] on barrier wait";
- }
-}
-
# public
sub checkpoint ($;$) {
my ($self, $wait) = @_;
}
my $shards = $self->{idx_shards};
if ($shards) {
- my $dbh = $self->{mm}->{dbh};
+ my $mm = $self->{mm};
+ my $dbh = $mm->{dbh} if $mm;
# SQLite msgmap data is second in importance
- $dbh->commit;
+ $dbh->commit if $dbh;
# SQLite overview is third
$self->{oidx}->commit_lazy;
# Now deal with Xapian
- if ($wait) {
- my $barrier = $self->barrier_init(scalar @$shards);
- # each shard needs to issue a barrier command
- $_->shard_barrier for @$shards;
+ # start commit_txn_lazy asynchronously on all parallel shards
+ # (non-parallel waits here)
+ $_->ipc_do('commit_txn_lazy') for @$shards;
+
+ # transactions started on parallel shards,
+ # wait for them by issuing an echo command (echo can only
+ # run after commit_txn_lazy is done)
+ if ($wait && $self->{parallel}) {
+ my $i = 0;
+ for my $shard (@$shards) {
+ my $echo = $shard->ipc_do('echo', $i);
+ $echo == $i or die <<"";
+shard[$i] bad echo:$echo != $i waiting for txn commit
+
+ ++$i;
+ }
+ }
- # wait for each Xapian shard
- $self->barrier_wait($barrier);
- } else {
- $_->shard_commit for @$shards;
+ my $midx = $self->{midx}; # misc index
+ if ($midx) {
+ $midx->commit_txn;
+ $PublicInbox::Search::X{CLOEXEC_UNSET} and
+ $self->git->cleanup;
}
# last_commit is special, don't commit these until
- # remote shards are done:
- $dbh->begin_work;
+ # Xapian shards are done:
+ $dbh->begin_work if $dbh;
set_last_commits($self);
- $dbh->commit;
-
- $dbh->begin_work;
+ if ($dbh) {
+ $dbh->commit;
+ $dbh->begin_work;
+ }
+ if ($midx) {
+ $self->git->batch_prepare;
+ $midx->begin_txn;
+ }
}
$self->{total_bytes} += $self->{transact_bytes};
$self->{transact_bytes} = 0;
}
eval { $self->{oidx}->dbh_close };
$err .= "over close: $@\n" if $@;
- delete $self->{bnote};
+ delete $self->{midx};
my $nbytes = $self->{total_bytes};
$self->{total_bytes} = 0;
$self->lock_release(!!$nbytes) if $shards;
- $self->{ibx}->git->cleanup;
+ $self->git->cleanup;
die $err if $err;
}
+sub write_alternates ($$$) {
+ my ($info_dir, $mode, $out) = @_;
+ my $fh = File::Temp->new(TEMPLATE => 'alt-XXXXXXXX', DIR => $info_dir);
+ my $tmp = $fh->filename;
+ print $fh @$out or die "print $tmp: $!\n";
+ chmod($mode, $fh) or die "fchmod $tmp: $!\n";
+ close $fh or die "close $tmp $!\n";
+ my $alt = "$info_dir/alternates";
+ rename($tmp, $alt) or die "rename $tmp => $alt: $!\n";
+ $fh->unlink_on_destroy(0);
+}
+
sub fill_alternates ($$) {
my ($self, $epoch) = @_;
}
}
return unless $new;
-
- my $fh = File::Temp->new(TEMPLATE => 'alt-XXXXXXXX', DIR => $info_dir);
- my $tmp = $fh->filename;
- print $fh join("\n", sort { $alt{$b} <=> $alt{$a} } keys %alt), "\n"
- or die "print $tmp: $!\n";
- chmod($mode, $fh) or die "fchmod $tmp: $!\n";
- close $fh or die "close $tmp $!\n";
- rename($tmp, $alt) or die "rename $tmp => $alt: $!\n";
- $fh->unlink_on_destroy(0);
+ write_alternates($info_dir, $mode,
+ [join("\n", sort { $alt{$b} <=> $alt{$a} } keys %alt), "\n"]);
}
sub git_init {
my ($self, $epoch) = @_;
my $git_dir = "$self->{ibx}->{inboxdir}/git/$epoch.git";
PublicInbox::Import::init_bare($git_dir);
- my @cmd = (qw/git config/, "--file=$git_dir/config",
- 'include.path', '../../all.git/config');
- PublicInbox::Import::run_die(\@cmd);
+ run_die([qw(git config), "--file=$git_dir/config",
+ qw(include.path ../../all.git/config)]);
fill_alternates($self, $epoch);
$git_dir
}
-sub git_dir_latest {
- my ($self, $max) = @_;
- $$max = -1;
- my $pfx = "$self->{ibx}->{inboxdir}/git";
- return unless -d $pfx;
- my $latest;
- opendir my $dh, $pfx or die "opendir $pfx: $!\n";
- while (defined(my $git_dir = readdir($dh))) {
- $git_dir =~ m!\A([0-9]+)\.git\z! or next;
- if ($1 > $$max) {
- $$max = $1;
- $latest = "$pfx/$git_dir";
- }
- }
- $latest;
-}
-
sub importer {
my ($self) = @_;
my $im = $self->{im};
}
my $epoch = 0;
my $max;
- my $latest = git_dir_latest($self, \$max);
+ my $latest = $self->{ibx}->git_dir_latest(\$max);
if (defined $latest) {
my $git = PublicInbox::Git->new($latest);
my $packed_bytes = $git->packed_bytes;
sub atfork_child {
my ($self) = @_;
- if (my $shards = $self->{idx_shards}) {
- $_->atfork_child foreach @$shards;
+ if (my $older_siblings = $self->{idx_shards}) {
+ $_->ipc_sibling_atfork_child for @$older_siblings;
}
if (my $im = $self->{im}) {
$im->atfork_child;
}
- die "unexpected mm" if $self->{mm};
- close $self->{bnote}->[0] or die "close bnote[0]: $!\n";
- $self->{bnote}->[1];
+ die "BUG: unexpected mm" if $self->{mm};
}
sub reindex_checkpoint ($$) {
my ($self, $sync) = @_;
- $self->{ibx}->git->cleanup; # *async_wait
+ $self->git->async_wait_all;
+ $self->update_last_commit($sync);
${$sync->{need_checkpoint}} = 0;
my $mm_tmp = $sync->{mm_tmp};
$mm_tmp->atfork_prepare if $mm_tmp;
- $self->done; # release lock
+ die 'BUG: {im} during reindex' if $self->{im};
+ if ($self->{ibx_map} && !$sync->{checkpoint_unlocks}) {
+ checkpoint($self, 1); # no need to release lock on pure index
+ } else {
+ $self->done; # release lock
+ }
- if (my $pr = $sync->{-opt}->{-progress}) {
+ if (my $pr = $sync->{-regen_fmt} ? $sync->{-opt}->{-progress} : undef) {
$pr->(sprintf($sync->{-regen_fmt}, ${$sync->{nr}}));
}
# allow -watch or -mda to write...
$self->idx_init($sync->{-opt}); # reacquire lock
+ if (my $intvl = $sync->{check_intvl}) { # eidx
+ $sync->{next_check} = PublicInbox::DS::now() + $intvl;
+ }
$mm_tmp->atfork_parent if $mm_tmp;
}
+sub index_finalize ($$) {
+ my ($arg, $index) = @_;
+ ++$arg->{self}->{nidx};
+ if (defined(my $cur = $arg->{cur_cmt})) {
+ ${$arg->{latest_cmt}} = $cur;
+ } elsif ($index) {
+ die 'BUG: {cur_cmt} missing';
+ } # else { unindexing @leftovers doesn't set {cur_cmt}
+}
+
sub index_oid { # cat_async callback
my ($bref, $oid, $type, $size, $arg) = @_;
- return if $size == 0; # purged
+ is_bad_blob($oid, $type, $size, $arg->{oid}) and
+ return index_finalize($arg, 1); # size == 0 purged returns here
+ my $self = $arg->{self};
+ local $self->{current_info} = "$self->{current_info} $oid";
my ($num, $mid0);
my $eml = PublicInbox::Eml->new($$bref);
my $mids = mids($eml);
my $chash = content_hash($eml);
- my $self = $arg->{v2w};
if (scalar(@$mids) == 0) {
warn "E: $oid has no Message-ID, skipping\n";
}
# {unindexed} is unlikely
- if ((my $unindexed = $arg->{unindexed}) && scalar(@$mids) == 1) {
- $num = delete($unindexed->{$mids->[0]});
+ if (my $unindexed = $arg->{unindexed}) {
+ my $oidbin = pack('H*', $oid);
+ my $u = $unindexed->{$oidbin};
+ ($num, $mid0) = splice(@$u, 0, 2) if $u;
if (defined $num) {
- $mid0 = $mids->[0];
$self->{mm}->mid_set($num, $mid0);
- delete($arg->{unindexed}) if !keys(%$unindexed);
+ if (scalar(@$u) == 0) { # done with current OID
+ delete $unindexed->{$oidbin};
+ delete($arg->{unindexed}) if !keys(%$unindexed);
+ }
}
}
if (!defined($num)) { # reuse if reindexing (or duplicates)
}
++${$arg->{nr}};
my $smsg = bless {
- raw_bytes => $size,
num => $num,
blob => $oid,
mid => $mid0,
}, 'PublicInbox::Smsg';
$smsg->populate($eml, $arg);
- if (do_idx($self, $bref, $eml, $smsg)) {
+ $smsg->set_bytes($$bref, $size);
+ if (do_idx($self, $eml, $smsg)) {
${$arg->{need_checkpoint}} = 1;
}
+ index_finalize($arg, 1);
}
# only update last_commit for $i on reindex iff newer than current
-sub update_last_commit ($$$$) {
- my ($self, $git, $i, $cmt) = @_;
- my $last = last_epoch_commit($self, $i);
- if (defined $last && is_ancestor($git, $last, $cmt)) {
- my @cmd = (qw(rev-list --count), "$last..$cmt");
- chomp(my $n = $git->qx(@cmd));
+sub update_last_commit {
+ my ($self, $sync, $stk) = @_;
+ my $unit = $sync->{unit} // return;
+ my $latest_cmt = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
+ defined($latest_cmt) or return;
+ my $last = last_epoch_commit($self, $unit->{epoch});
+ if (defined $last && is_ancestor($self->git, $last, $latest_cmt)) {
+ my @cmd = (qw(rev-list --count), "$last..$latest_cmt");
+ chomp(my $n = $unit->{git}->qx(@cmd));
return if $n ne '' && $n == 0;
}
- last_epoch_commit($self, $i, $cmt);
+ last_epoch_commit($self, $unit->{epoch}, $latest_cmt);
}
-sub git_dir_n ($$) { "$_[0]->{ibx}->{inboxdir}/git/$_[1].git" }
-
-sub last_commits ($$) {
- my ($self, $epoch_max) = @_;
+sub last_commits {
+ my ($self, $sync) = @_;
my $heads = [];
- for (my $i = $epoch_max; $i >= 0; $i--) {
+ for (my $i = $sync->{epoch_max}; $i >= 0; $i--) {
$heads->[$i] = last_epoch_commit($self, $i);
}
$heads;
}
# returns a revision range for git-log(1)
-sub log_range ($$$$$) {
- my ($self, $sync, $git, $i, $tip) = @_;
+sub log_range ($$$) {
+ my ($sync, $unit, $tip) = @_;
my $opt = $sync->{-opt};
my $pr = $opt->{-progress} if (($opt->{verbose} || 0) > 1);
+ my $i = $unit->{epoch};
my $cur = $sync->{ranges}->[$i] or do {
$pr->("$i.git indexing all of $tip\n") if $pr;
return $tip; # all of it
my $range = "$cur..$tip";
$pr->("$i.git checking contiguity... ") if $pr;
- if (is_ancestor($git, $cur, $tip)) { # common case
+ my $git = $unit->{git};
+ if (is_ancestor($sync->{self}->git, $cur, $tip)) { # common case
$pr->("OK\n") if $pr;
my $n = $git->qx(qw(rev-list --count), $range);
chomp($n);
warn "discarding history at $cur\n";
}
warn <<"";
-reindexing $git->{git_dir} starting at
-$range
-
- $sync->{unindex_range}->{$i} = "$base..$cur";
+reindexing $git->{git_dir}
+starting at $range
+
+ # $cur^0 may no longer exist if pruned by git
+ if ($git->qx(qw(rev-parse -q --verify), "$cur^0")) {
+ $unit->{unindex_range} = "$base..$cur";
+ } elsif ($base && $git->qx(qw(rev-parse -q --verify), $base)) {
+ $unit->{unindex_range} = "$base..";
+ } else {
+ warn "W: unable to unindex before $range\n";
+ }
}
$range;
}
-sub sync_prepare ($$$) {
- my ($self, $sync, $epoch_max) = @_;
+# overridden by ExtSearchIdx
+sub artnum_max { $_[0]->{mm}->num_highwater }
+
+sub sync_prepare ($$) {
+ my ($self, $sync) = @_;
+ $sync->{ranges} = sync_ranges($self, $sync);
my $pr = $sync->{-opt}->{-progress};
my $regen_max = 0;
- my $head = $self->{ibx}->{ref_head} || 'refs/heads/master';
-
- # reindex stops at the current heads and we later rerun index_sync
- # without {reindex}
- my $reindex_heads = last_commits($self, $epoch_max) if $sync->{reindex};
-
- for (my $i = $epoch_max; $i >= 0; $i--) {
- my $git_dir = git_dir_n($self, $i);
+ my $head = $sync->{ibx}->{ref_head} || 'HEAD';
+ my $pfx;
+ if ($pr) {
+ ($pfx) = ($sync->{ibx}->{inboxdir} =~ m!([^/]+)\z!g);
+ $pfx //= $sync->{ibx}->{inboxdir};
+ }
+
+ my $reindex_heads;
+ if ($self->{ibx_map}) {
+ # ExtSearchIdx won't index messages unless they're in
+ # over.sqlite3 for a given inbox, so don't read beyond
+ # what's in the per-inbox index.
+ $reindex_heads = [];
+ my $v = PublicInbox::Search::SCHEMA_VERSION;
+ my $mm = $sync->{ibx}->mm;
+ for my $i (0..$sync->{epoch_max}) {
+ $reindex_heads->[$i] = $mm->last_commit_xap($v, $i);
+ }
+ } elsif ($sync->{reindex}) { # V2 inbox
+ # reindex stops at the current heads and we later
+ # rerun index_sync without {reindex}
+ $reindex_heads = $self->last_commits($sync);
+ }
+ if ($sync->{max_size} = $sync->{-opt}->{max_size}) {
+ $sync->{index_oid} = $self->can('index_oid');
+ }
+ my $git_pfx = "$sync->{ibx}->{inboxdir}/git";
+ for (my $i = $sync->{epoch_max}; $i >= 0; $i--) {
+ my $git_dir = "$git_pfx/$i.git";
-d $git_dir or next; # missing epochs are fine
my $git = PublicInbox::Git->new($git_dir);
+ my $unit = { git => $git, epoch => $i };
+ my $tip;
if ($reindex_heads) {
- $head = $reindex_heads->[$i] or next;
+ $tip = $head = $reindex_heads->[$i] or next;
+ } else {
+ $tip = $git->qx(qw(rev-parse -q --verify), $head);
+ next if $?; # new repo
+ chomp $tip;
}
- chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head));
-
- next if $?; # new repo
- my $range = log_range($self, $sync, $git, $i, $tip) or next;
+ my $range = log_range($sync, $unit, $tip) or next;
# can't use 'rev-list --count' if we use --diff-filter
- $pr->("$i.git counting $range ... ") if $pr;
+ $pr->("$pfx $i.git counting $range ... ") if $pr;
# Don't bump num_highwater on --reindex by using {D}.
# We intentionally do NOT use {D} in the non-reindex case
# because we want NNTP article number gaps from unindexed
# messages to show up in mirrors, too.
$sync->{D} //= $sync->{reindex} ? {} : undef; # OID_BIN => NR
- my $stk = log2stack($sync, $git, $range, $self->{ibx});
+ my $stk = log2stack($sync, $git, $range);
+ return 0 if $sync->{quit};
my $nr = $stk ? $stk->num_records : 0;
$pr->("$nr\n") if $pr;
- $sync->{stacks}->[$i] = $stk if $stk;
+ $unit->{stack} = $stk; # may be undef
+ unshift @{$sync->{todo}}, $unit;
$regen_max += $nr;
}
+ return 0 if $sync->{quit};
# XXX this should not happen unless somebody bypasses checks in
# our code and blindly injects "d" file history into git repos
if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
- my $arg = { v2w => $self };
- my $all = $self->{ibx}->git;
+ local $self->{current_info} = 'leftover ';
+ my $unindex_oid = $self->can('unindex_oid');
for my $oid (@leftovers) {
+ last if $sync->{quit};
$oid = unpack('H*', $oid);
- $self->{current_info} = "leftover $oid";
- $all->cat_async($oid, \&unindex_oid, $arg);
+ my $req = { %$sync, oid => $oid };
+ $self->git->cat_async($oid, $unindex_oid, $req);
}
- $all->cat_async_wait;
+ $self->git->cat_async_wait;
}
- if (!$regen_max && !keys(%{$self->{unindex_range}})) {
+ return 0 if $sync->{quit};
+ if (!$regen_max) {
$sync->{-regen_fmt} = "%u/?\n";
return 0;
}
$sync->{-regen_fmt} = "% ${pad}u/$regen_max\n";
$sync->{nr} = \(my $nr = 0);
return -1 if $sync->{reindex};
- $regen_max + $self->{mm}->num_highwater() || 0;
+ $regen_max + $self->artnum_max || 0;
}
-sub unindex_oid_remote ($$$) {
+sub unindex_oid_aux ($$$) {
my ($self, $oid, $mid) = @_;
my @removed = $self->{oidx}->remove_oid($oid, $mid);
+ return unless $self->{-need_xapian};
for my $num (@removed) {
- my $idx = idx_shard($self, $num % $self->{shards});
- $idx->shard_remove($oid, $num);
+ idx_shard($self, $num)->ipc_do('xdb_remove', $num);
}
}
sub unindex_oid ($$;$) { # git->cat_async callback
- my ($bref, $oid, $type, $size, $sync) = @_;
- my $self = $sync->{v2w};
- my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
+ my ($bref, $oid, $type, $size, $arg) = @_;
+ is_bad_blob($oid, $type, $size, $arg->{oid}) and
+ return index_finalize($arg, 0);
+ my $self = $arg->{self};
+ local $self->{current_info} = "$self->{current_info} $oid";
+ my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef;
my $mm = $self->{mm};
my $mids = mids(PublicInbox::Eml->new($bref));
undef $$bref;
warn "BUG: multiple articles linked to $oid\n",
join(',',sort keys %gone), "\n";
}
- foreach my $num (keys %gone) {
+ # reuse (num => mid) mapping in ascending numeric order
+ for my $num (sort { $a <=> $b } keys %gone) {
+ $num += 0;
if ($unindexed) {
my $mid0 = $mm->mid_for($num);
- $unindexed->{$mid0} = $num;
+ my $oidbin = pack('H*', $oid);
+ push @{$unindexed->{$oidbin}}, $num, $mid0;
}
$mm->num_delete($num);
}
- unindex_oid_remote($self, $oid, $mid);
+ unindex_oid_aux($self, $oid, $mid);
}
+ index_finalize($arg, 0);
}
+sub git { $_[0]->{ibx}->git }
+
# this is rare, it only happens when we get discontiguous history in
# a mirror because the source used -purge or -edit
-sub unindex ($$$$) {
- my ($self, $sync, $git, $unindex_range) = @_;
- my $unindexed = $sync->{unindexed} //= {}; # $mid0 => $num
+sub unindex_todo ($$$) {
+ my ($self, $sync, $unit) = @_;
+ my $unindex_range = delete($unit->{unindex_range}) // return;
+ my $unindexed = $sync->{unindexed} //= {}; # $oidbin => [$num, $mid0]
my $before = scalar keys %$unindexed;
# order does not matter, here:
- my @cmd = qw(log --raw -r
- --no-notes --no-color --no-abbrev --no-renames);
- my $fh = $git->popen(@cmd, $unindex_range);
- my $all = $self->{ibx}->git;
+ my $fh = $unit->{git}->popen(qw(log --raw -r --no-notes --no-color
+ --no-abbrev --no-renames), $unindex_range);
local $sync->{in_unindex} = 1;
+ my $unindex_oid = $self->can('unindex_oid');
while (<$fh>) {
/\A:\d{6} 100644 $OID ($OID) [AM]\tm$/o or next;
- $all->cat_async($1, \&unindex_oid, $sync);
+ $self->git->cat_async($1, $unindex_oid, { %$sync, oid => $1 });
}
close $fh or die "git log failed: \$?=$?";
- $all->cat_async_wait;
+ $self->git->cat_async_wait;
return unless $sync->{-opt}->{prune};
my $after = scalar keys %$unindexed;
return if $before == $after;
# ensure any blob can not longer be accessed via dumb HTTP
- PublicInbox::Import::run_die(['git', "--git-dir=$git->{git_dir}",
+ run_die(['git', "--git-dir=$unit->{git}->{git_dir}",
qw(-c gc.reflogExpire=now gc --prune=all --quiet)]);
}
-sub sync_ranges ($$$) {
- my ($self, $sync, $epoch_max) = @_;
+sub sync_ranges ($$) {
+ my ($self, $sync) = @_;
my $reindex = $sync->{reindex};
-
- return last_commits($self, $epoch_max) unless $reindex;
+ return $self->last_commits($sync) unless $reindex;
return [] if ref($reindex) ne 'HASH';
my $ranges = $reindex->{from}; # arrayref;
sub index_xap_only { # git->cat_async callback
my ($bref, $oid, $type, $size, $smsg) = @_;
- my $self = $smsg->{v2w};
- my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
- $smsg->{raw_bytes} = $size;
- $idx->index_raw($bref, undef, $smsg);
- $self->{transact_bytes} += $size;
+ my $self = $smsg->{self};
+ my $idx = idx_shard($self, $smsg->{num});
+ $idx->index_eml(PublicInbox::Eml->new($bref), $smsg);
+ $self->{transact_bytes} += $smsg->{bytes};
}
sub index_xap_step ($$$;$) {
"$beg..$end (% $step)\n");
}
for (my $num = $beg; $num <= $end; $num += $step) {
+ last if $sync->{quit};
my $smsg = $ibx->over->get_art($num) or next;
- $smsg->{v2w} = $self;
+ $smsg->{self} = $self;
$ibx->git->cat_async($smsg->{blob}, \&index_xap_only, $smsg);
if ($self->{transact_bytes} >= $self->{batch_bytes}) {
${$sync->{nr}} = $num;
}
}
-sub index_epoch ($$$) {
- my ($self, $sync, $i) = @_;
-
- my $git_dir = git_dir_n($self, $i);
- -d $git_dir or return; # missing epochs are fine
- my $git = PublicInbox::Git->new($git_dir);
- if (my $unindex_range = delete $sync->{unindex_range}->{$i}) { # rare
- unindex($self, $sync, $git, $unindex_range);
- }
- defined(my $stk = $sync->{stacks}->[$i]) or return;
- $sync->{stacks}->[$i] = undef;
- my $all = $self->{ibx}->git;
- while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
- $self->{current_info} = "$i.git $oid";
+sub index_todo ($$$) {
+ my ($self, $sync, $unit) = @_;
+ return if $sync->{quit};
+ unindex_todo($self, $sync, $unit);
+ my $stk = delete($unit->{stack}) or return;
+ my $all = $self->git;
+ my $index_oid = $self->can('index_oid');
+ my $unindex_oid = $self->can('unindex_oid');
+ my $pfx;
+ if ($unit->{git}->{git_dir} =~ m!/([^/]+)/git/([0-9]+\.git)\z!) {
+ $pfx = "$1 $2"; # v2
+ } else { # v1
+ ($pfx) = ($unit->{git}->{git_dir} =~ m!/([^/]+)\z!g);
+ $pfx //= $unit->{git}->{git_dir};
+ }
+ local $self->{current_info} = "$pfx ";
+ local $sync->{latest_cmt} = \(my $latest_cmt);
+ local $sync->{unit} = $unit;
+ while (my ($f, $at, $ct, $oid, $cmt) = $stk->pop_rec) {
+ if ($sync->{quit}) {
+ warn "waiting to quit...\n";
+ $all->async_wait_all;
+ $self->update_last_commit($sync);
+ return;
+ }
+ my $req = {
+ %$sync,
+ autime => $at,
+ cotime => $ct,
+ oid => $oid,
+ cur_cmt => $cmt
+ };
if ($f eq 'm') {
- my $arg = { %$sync, autime => $at, cotime => $ct };
if ($sync->{max_size}) {
- $all->check_async($oid, \&check_size, $arg);
+ $all->check_async($oid, \&check_size, $req);
} else {
- $all->cat_async($oid, \&index_oid, $arg);
+ $all->cat_async($oid, $index_oid, $req);
}
} elsif ($f eq 'd') {
- $all->cat_async($oid, \&unindex_oid, $sync);
+ $all->cat_async($oid, $unindex_oid, $req);
}
if (${$sync->{need_checkpoint}}) {
reindex_checkpoint($self, $sync);
}
}
- $all->check_async_wait;
- $all->cat_async_wait;
- update_last_commit($self, $git, $i, $stk->{latest_cmt});
+ $all->async_wait_all;
+ $self->update_last_commit($sync, $stk);
}
sub xapian_only {
$sync //= {
need_checkpoint => \(my $bool = 0),
-opt => $opt,
- v2w => $self,
+ self => $self,
nr => \(my $nr = 0),
-regen_fmt => "%u/?\n",
};
if ($seq || !$self->{parallel}) {
my $shard_end = $self->{shards} - 1;
for my $i (0..$shard_end) {
+ last if $sync->{quit};
index_xap_step($self, $sync, $art_beg + $i);
if ($i != $shard_end) {
reindex_checkpoint($self, $sync);
index_xap_step($self, $sync, $art_beg, 1);
}
}
- $self->{ibx}->git->cat_async_wait;
+ $self->git->cat_async_wait;
$self->done;
}
$opt //= {};
return xapian_only($self, $opt) if $opt->{xapian_only};
- my $pr = $opt->{-progress};
my $epoch_max;
- my $latest = git_dir_latest($self, \$epoch_max);
- return unless defined $latest;
+ my $latest = $self->{ibx}->git_dir_latest(\$epoch_max) // return;
+ if ($opt->{'fast-noop'}) { # nanosecond (st_ctim) comparison
+ use Time::HiRes qw(stat);
+ if (my @mm = stat("$self->{ibx}->{inboxdir}/msgmap.sqlite3")) {
+ my $c = $mm[10]; # 10 = ctime (nsec NV)
+ my @hd = stat("$latest/refs/heads");
+ my @pr = stat("$latest/packed-refs");
+ return if $c > ($hd[10] // 0) && $c > ($pr[10] // 0);
+ }
+ }
+ my $pr = $opt->{-progress};
my $seq = $opt->{sequential_shard};
my $art_beg; # the NNTP article number we start xapian_only at
my $idxlevel = $self->{ibx}->{indexlevel};
$self->{oidx}->rethread_prepare($opt);
my $sync = {
need_checkpoint => \(my $bool = 0),
- unindex_range => {}, # EPOCH => oid_old..oid_new
reindex => $opt->{reindex},
-opt => $opt,
- v2w => $self,
+ self => $self,
+ ibx => $self->{ibx},
+ epoch_max => $epoch_max,
};
- $sync->{ranges} = sync_ranges($self, $sync, $epoch_max);
- if (sync_prepare($self, $sync, $epoch_max)) {
+ my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+ local $SIG{QUIT} = $quit;
+ local $SIG{INT} = $quit;
+ local $SIG{TERM} = $quit;
+
+ if (sync_prepare($self, $sync)) {
# tmp_clone seems to fail if inside a transaction, so
# we rollback here (because we opened {mm} for reading)
# Note: we do NOT rely on DBI transactions for atomicity;
# xapian_only works incrementally w/o --reindex
if ($seq && !$opt->{reindex}) {
- $art_beg = $sync->{mm_tmp}->max;
- $art_beg++ if defined($art_beg);
+ $art_beg = $sync->{mm_tmp}->max || -1;
+ $art_beg++;
}
}
- if ($sync->{max_size} = $opt->{max_size}) {
- $sync->{index_oid} = \&index_oid;
- }
# work forwards through history
- index_epoch($self, $sync, $_) for (0..$epoch_max);
- $self->{oidx}->rethread_done($opt);
+ index_todo($self, $sync, $_) for @{delete($sync->{todo}) // []};
+ $self->{oidx}->rethread_done($opt) unless $sync->{quit};
$self->done;
if (my $nr = $sync->{nr}) {
$pr->('all.git '.sprintf($sync->{-regen_fmt}, $$nr)) if $pr;
}
+ my $quit_warn;
# deal with Xapian shards sequentially
if ($seq && delete($sync->{mm_tmp})) {
- $self->{ibx}->{indexlevel} = $idxlevel;
- xapian_only($self, $opt, $sync, $art_beg);
+ if ($sync->{quit}) {
+ $quit_warn = 1;
+ } else {
+ $self->{ibx}->{indexlevel} = $idxlevel;
+ xapian_only($self, $opt, $sync, $art_beg);
+ $quit_warn = 1 if $sync->{quit};
+ }
}
# --reindex on the command-line
- if ($opt->{reindex} && !ref($opt->{reindex}) && $idxlevel ne 'basic') {
+ if (!$sync->{quit} && $opt->{reindex} &&
+ !ref($opt->{reindex}) && $idxlevel ne 'basic') {
$self->lock_acquire;
my $s0 = PublicInbox::SearchIdx->new($self->{ibx}, 0, 0);
if (my $xdb = $s0->idx_acquire) {
}
# reindex does not pick up new changes, so we rerun w/o it:
- if ($opt->{reindex}) {
+ if ($opt->{reindex} && !$sync->{quit}) {
my %again = %$opt;
$sync = undef;
delete @again{qw(rethread reindex -skip_lock)};
index_sync($self, \%again);
+ $opt->{quit} = $again{quit}; # propagate to caller
}
+ warn <<EOF if $quit_warn;
+W: interrupted, --xapian-only --reindex required upon restart
+EOF
}
1;
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used for displaying the HTML web interface.
# /$INBOX/$MSGID/ for unindexed v1 inboxes
sub no_over_html ($) {
my ($ctx) = @_;
- my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return; # 404
+ my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return; # 404
my $eml = PublicInbox::Eml->new($bref);
$ctx->{mhref} = '';
PublicInbox::WwwStream::init($ctx);
sub msg_page {
my ($ctx) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
my $over = $ctx->{over} = $ibx->over or return no_over_html($ctx);
my ($id, $prev);
'https://en.wikipedia.org/wiki/Posting_style#Interleaved_style';
my $info = '';
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
if (my $url = $ibx->{infourl}) {
$url = prurl($ctx->{env}, $url);
$info = qq(\n List information: <a\nhref="$url">$url</a>\n);
sub thread_html {
my ($ctx) = @_;
my $mid = $ctx->{mid};
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my ($nr, $msgs) = $ibx->over->get_thread($mid);
return missing_thread($ctx) if $nr == 0;
sub add_text_body { # callback for each_part
my ($p, $ctx) = @_;
my $upfx = $ctx->{mhref};
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
# $p - from each_part: [ Email::MIME-like, depth, $idx ]
my ($part, $depth, $idx) = @$p;
sub _msg_page_prepare_obuf {
my ($eml, $ctx) = @_;
- my $over = $ctx->{-inbox}->over;
+ my $over = $ctx->{ibx}->over;
my $obfs_ibx = $ctx->{-obfs_ibx};
my $rv = '';
my $mids = mids_for_index($eml);
sub thread_skel ($$$) {
my ($skel, $ctx, $hdr) = @_;
my $mid = mids($hdr)->[0];
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my ($nr, $msgs) = $ibx->over->get_thread($mid);
my $parent = in_reply_to($hdr);
$$skel .= "\n<b>Thread overview: </b>";
# returns a string buffer
sub html_footer {
my ($ctx, $hdr) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $upfx = '../';
my $skel;
my $rv = '<pre>';
my ($ctx, $level, $smsg) = @_;
my $mid = $smsg->{mid};
my $has_blob = $smsg->{blob} // do {
- if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) {
+ if (my $by_mid = $ctx->{ibx}->smsg_by_mid($mid)) {
%$smsg = (%$smsg, %$by_mid);
1;
}
}
my @out;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
# sort by recency, this allows new posts to "bump" old topics...
$t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1);
$t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1);
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $msgs = $ibx->recent($opts, $after, $before);
my $nr = scalar @$msgs;
if ($nr < $lim && defined($after)) {
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# used by PublicInbox::View
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# show any VCS object, similar to "git show"
$ctx->{'log'} = tmpfile("solve.$oid_b");
$ctx->{fn} = $fn;
- my $solver = PublicInbox::SolverGit->new($ctx->{-inbox},
+ my $solver = PublicInbox::SolverGit->new($ctx->{ibx},
\&solve_result, $ctx);
# PSGI server will call this immediately and give us a callback (-wcb)
sub {
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Main web interface for mailing list archives
our $OID_RE = qr![a-f0-9]{7,}!;
sub new {
- my ($class, $pi_config) = @_;
- $pi_config ||= PublicInbox::Config->new;
- bless { pi_config => $pi_config }, $class;
+ my ($class, $pi_cfg) = @_;
+ bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
}
# backwards compatibility, do not use
eval "require PublicInbox::$_;";
}
if (ref($self)) {
- my $pi_config = $self->{pi_config};
- if (defined($pi_config->{'publicinbox.cgitrc'})) {
- $pi_config->limiter('-cgit');
+ my $pi_cfg = $self->{pi_cfg};
+ if (defined($pi_cfg->{'publicinbox.cgitrc'})) {
+ $pi_cfg->limiter('-cgit');
}
$self->cgit;
$self->stylesheets_prepare($_) for ('', '../', '../../');
$self->news_www;
- $pi_config->each_inbox(\&preload_inbox);
+ $pi_cfg->each_inbox(\&preload_inbox);
}
}
# returns undef if valid, array ref response if invalid
sub invalid_inbox ($$) {
my ($ctx, $inbox) = @_;
- my $ibx = $ctx->{www}->{pi_config}->lookup_name($inbox);
+ my $ibx = $ctx->{www}->{pi_cfg}->lookup_name($inbox) //
+ $ctx->{www}->{pi_cfg}->lookup_ei($inbox);
if (defined $ibx) {
- $ctx->{-inbox} = $ibx;
+ $ctx->{ibx} = $ibx;
return;
}
return $ret if $ret;
my $mid = $ctx->{mid} = uri_unescape($mid_ue);
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) {
my ($x2, $x38) = ($1, $2);
# this is horrifically wasteful for legacy URLs:
- my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
+ my $str = $ctx->{ibx}->msg_by_path("$x2/$x38") or return;
my $s = PublicInbox::Eml->new($str);
$mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
return r301($ctx, $inbox, mid_escape($mid));
# /$INBOX/$MESSAGE_ID/t/
sub get_thread {
my ($ctx, $flat) = @_;
- $ctx->{-inbox}->over or return need($ctx, 'Overview');
+ $ctx->{ibx}->over or return need($ctx, 'Overview');
$ctx->{flat} = $flat;
require PublicInbox::View;
PublicInbox::View::thread_html($ctx);
# especially on older systems. Stick to zlib since that's what git uses.
sub get_thread_mbox {
my ($ctx, $sfx) = @_;
- my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview');
+ my $over = $ctx->{ibx}->over or return need($ctx, 'Overview');
require PublicInbox::Mbox;
PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx);
}
# /$INBOX/$MESSAGE_ID/t.atom -> thread as Atom feed
sub get_thread_atom {
my ($ctx) = @_;
- $ctx->{-inbox}->over or return need($ctx, 'Overview');
+ $ctx->{ibx}->over or return need($ctx, 'Overview');
require PublicInbox::Feed;
PublicInbox::Feed::generate_thread_atom($ctx);
}
sub r301 {
my ($ctx, $inbox, $mid_ue, $suffix) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
unless ($ibx) {
my $r404 = invalid_inbox($ctx, $inbox);
return $r404 if $r404;
- $ibx = $ctx->{-inbox};
+ $ibx = $ctx->{ibx};
}
my $url = $ibx->base_url($ctx->{env});
my $qs = $ctx->{env}->{QUERY_STRING};
sub serve_git {
my ($ctx, $epoch, $path) = @_;
my $env = $ctx->{env};
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $git = defined $epoch ? $ibx->git_epoch($epoch) : $ibx->git;
$git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404();
}
sub mbox_results {
my ($ctx) = @_;
if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
- $ctx->{-inbox}->search or return need($ctx, 'search');
+ $ctx->{ibx}->isrch or return need($ctx, 'search');
require PublicInbox::SearchView;
return PublicInbox::SearchView::mbox_results($ctx);
}
my ($self) = @_;
$self->{news_www} ||= do {
require PublicInbox::NewsWWW;
- PublicInbox::NewsWWW->new($self->{pi_config});
+ PublicInbox::NewsWWW->new($self->{pi_cfg});
}
}
sub cgit {
my ($self) = @_;
$self->{cgit} ||= do {
- my $pi_config = $self->{pi_config};
+ my $pi_cfg = $self->{pi_cfg};
- if (defined($pi_config->{'publicinbox.cgitrc'})) {
+ if (defined($pi_cfg->{'publicinbox.cgitrc'})) {
require PublicInbox::Cgit;
- PublicInbox::Cgit->new($pi_config);
+ PublicInbox::Cgit->new($pi_cfg);
} else {
require Plack::Util;
Plack::Util::inline_object(call => sub { r404() });
} || sub { $_[0] };
my $css_map = {};
- my $stylesheets = $self->{pi_config}->{css} || [];
+ my $stylesheets = $self->{pi_cfg}->{css} || [];
my $links = [];
my $inline_ok = 1;
my $css = $css_map->{$key};
if (!defined($css) && $key eq 'userContent') {
my $env = $ctx->{env};
- $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+ $css = PublicInbox::UserContent::sample($ctx->{ibx}, $env);
}
defined $css or return r404();
my $h = [ 'Content-Length', bytes::length($css),
sub get_description {
my ($ctx, $inbox) = @_;
invalid_inbox($ctx, $inbox) || do {
- my $d = $ctx->{-inbox}->description . "\n";
+ my $d = $ctx->{ibx}->description . "\n";
[ 200, [ 'Content-Length', bytes::length($d),
'Content-Type', 'text/plain' ], [ $d ] ];
};
=head1 COPYRIGHT
-Copyright (C) 2016-2020 all contributors L<mailto:meta@public-inbox.org>
+Copyright (C) 2016-2021 all contributors L<mailto:meta@public-inbox.org>
License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt>
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# ref: https://cr.yp.to/proto/maildir.html
}
sub new {
- my ($class, $config) = @_;
+ my ($class, $cfg) = @_;
my (%mdmap, $spamc);
my (%imap, %nntp); # url => [inbox objects] or 'watchspam'
# indefinitely...
foreach my $pfx (qw(publicinboxwatch publicinboxlearn)) {
my $k = "$pfx.watchspam";
- defined(my $dirs = $config->{$k}) or next;
+ defined(my $dirs = $cfg->{$k}) or next;
$dirs = PublicInbox::Config::_array($dirs);
for my $dir (@$dirs) {
my $url;
my $k = 'publicinboxwatch.spamcheck';
my $default = undef;
- my $spamcheck = PublicInbox::Spamcheck::get($config, $k, $default);
+ my $spamcheck = PublicInbox::Spamcheck::get($cfg, $k, $default);
$spamcheck = _spamcheck_cb($spamcheck) if $spamcheck;
- $config->each_inbox(sub {
+ $cfg->each_inbox(sub {
# need to make all inboxes writable for spam removal:
my $ibx = $_[0] = PublicInbox::InboxWritable->new($_[0]);
spamcheck => $spamcheck,
mdmap => \%mdmap,
mdre => $mdre,
- config => $config,
+ pi_cfg => $cfg,
imap => scalar keys %imap ? \%imap : undef,
nntp => scalar keys %nntp? \%nntp : undef,
importers => {},
$path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return;
my $eml = eml_from_path($path) or return;
local $SIG{__WARN__} = warn_ignore_cb();
- $self->{config}->each_inbox(\&remove_eml_i, $self, $eml, $path);
+ $self->{pi_cfg}->each_inbox(\&remove_eml_i, $self, $eml, $path);
}
sub import_eml ($$$) {
warn "unmappable dir: $1\n";
return;
}
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+ my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
local $SIG{__WARN__} = sub {
my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
$warn_cb->($pfx, "path: $path\n", @_);
# flesh out common IMAP-specific data structures
sub imap_common_init ($) {
my ($self) = @_;
- my $cfg = $self->{config};
+ my $cfg = $self->{pi_cfg};
my $mic_args = {}; # scheme://authority => Mail:IMAPClient arg
for my $url (sort keys %{$self->{imap}}) {
my $uri = PublicInbox::URIimap->new($url);
if ($flags =~ /\\Seen\b/) {
local $SIG{__WARN__} = warn_ignore_cb();
my $eml = PublicInbox::Eml->new($raw);
- $self->{config}->each_inbox(\&remove_eml_i,
+ $self->{pi_cfg}->each_inbox(\&remove_eml_i,
$self, $eml, "$url UID:$uid");
}
} else {
my $key = $req;
$key =~ s/\.PEEK//;
my ($uids, $batch);
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+ my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
local $SIG{__WARN__} = sub {
my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
$batch //= '?';
delete $self->{opendirs};
PublicInbox::DS->Reset;
%SIG = (%SIG, %{$self->{sig}}, CHLD => 'DEFAULT');
- PublicInbox::Sigfd::sig_setmask($self->{oldset});
+ PublicInbox::DS::sig_setmask($self->{oldset});
}
sub watch_atfork_parent ($) {
my ($self) = @_;
_done_for_now($self);
- PublicInbox::Sigfd::block_signals();
+ PublicInbox::DS::block_signals();
}
sub imap_idle_requeue ($) { # DS::add_timer callback
my ($self, $url_intvl) = @_;
my ($url, $intvl) = @$url_intvl;
pipe(my ($r, $w)) or die "pipe: $!";
+ my $seed = rand(0xffffffff);
defined(my $pid = fork) or die "fork: $!";
if ($pid == 0) {
+ srand($seed);
+ eval { Net::SSLeay::randomize() };
close $r;
watch_atfork_child($self);
watch_imap_idle_1($self, $url, $intvl);
imap_idle_fork($self, $url_intvl);
}
};
- PublicInbox::Sigfd::sig_setmask($oldset);
+ PublicInbox::DS::sig_setmask($oldset);
die $@ if $@;
}
fs_scan_step($self) if $self->{mdre};
return if $self->{quit};
pipe(my ($r, $w)) or die "pipe: $!";
my $oldset = watch_atfork_parent($self);
+ my $seed = rand(0xffffffff);
my $pid = fork;
if (defined($pid) && $pid == 0) {
+ srand($seed);
+ eval { Net::SSLeay::randomize() };
close $r;
watch_atfork_child($self);
if ($urls->[0] =~ m!\Aimaps?://!i) {
close $w;
_exit(0);
}
- PublicInbox::Sigfd::sig_setmask($oldset);
+ PublicInbox::DS::sig_setmask($oldset);
die "fork: $!" unless defined $pid;
$self->{poll_pids}->{$pid} = [ $intvl, $urls ];
PublicInbox::EOFpipe->new($r, \&reap, [$pid, \&poll_fetch_reap, $self]);
# flesh out common NNTP-specific data structures
sub nntp_common_init ($) {
my ($self) = @_;
- my $cfg = $self->{config};
+ my $cfg = $self->{pi_cfg};
my $nn_args = {}; # scheme://authority => Net::NNTP->new arg
for my $url (sort keys %{$self->{nntp}}) {
my $sec = uri_section(uri_new($url));
$beg = $l_art + 1;
warn "I: $url fetching ARTICLE $beg..$end\n";
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+ my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
my ($err, $art);
local $SIG{__WARN__} = sub {
my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
}
} elsif ($inboxes eq 'watchspam') {
my $eml = PublicInbox::Eml->new(\$raw);
- $self->{config}->each_inbox(\&remove_eml_i,
+ $self->{pi_cfg}->each_inbox(\&remove_eml_i,
$self, $eml, "$url ARTICLE $art");
} else {
die "BUG: destination unknown $inboxes";
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# dumps using the ".dump" command of sqlite3(1)
sub sqldump ($$) {
my ($ctx, $altid_pfx) = @_;
my $env = $ctx->{env};
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $altid_map = $ibx->altid_map;
my $fn = $altid_map->{$altid_pfx};
unless (defined $fn) {
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Atom body stream for HTTP responses
sub new {
my ($class, $ctx, $cb) = @_;
- $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env});
+ $ctx->{feed_base_url} = $ctx->{ibx}->base_url($ctx->{env});
$ctx->{cb} = $cb || \&PublicInbox::GzipFilter::close;
$ctx->{emit_header} = 1;
bless $ctx, $class;
my ($self) = @_;
my $cb = $self->{cb} or return;
while (my $smsg = $cb->($self)) {
- my $eml = $self->{-inbox}->smsg_eml($smsg) or next;
+ my $eml = $self->{ibx}->smsg_eml($smsg) or next;
return $self->translate(feed_entry($self, $smsg, $eml));
}
delete $self->{cb};
sub atom_header {
my ($ctx, $title) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $base_url = $ctx->{feed_base_url};
my $search_q = $ctx->{search_query};
my $self_url = $base_url;
$title = title_tag($title);
my $from = $eml->header('From') // $eml->header('Sender') //
- $ctx->{-inbox}->{-primary_address};
+ $ctx->{ibx}->{-primary_address};
my ($email) = PublicInbox::Address::emails($from);
my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
- $email = ascii_html($email // $ctx->{-inbox}->{-primary_address});
+ $email = ascii_html($email // $ctx->{ibx}->{-primary_address});
my $s = delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '';
$s .= "<entry><author><name>$name</name><email>$email</email>" .
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# For retrieving attachments from messages in the WWW interface
return 1 if $referer eq ''; # no referer is always OK for wget/curl
# prevent deep-linking from other domains on some browsers (Firefox)
- # n.b.: $ctx->{-inbox}->base_url($env) with INBOX_URL won't work
+ # n.b.: $ctx->{ibx}->base_url($env) with INBOX_URL won't work
# with dillo, we can only match "$url_scheme://$HTTP_HOST/" without
# path components
my $base_url = $env->{'psgi.url_scheme'} . '://' .
$ctx->{idx} = $idx;
bless $ctx, __PACKAGE__;
my $eml;
- if ($ctx->{smsg} = $ctx->{-inbox}->smsg_by_mid($ctx->{mid})) {
+ if ($ctx->{smsg} = $ctx->{ibx}->smsg_by_mid($ctx->{mid})) {
return sub { # public-inbox-httpd-only
$ctx->{wcb} = $_[0];
scan_attach($ctx);
} if $ctx->{env}->{'pi-httpd.async'};
# generic PSGI:
- $eml = $ctx->{-inbox}->smsg_eml($ctx->{smsg});
- } elsif (!$ctx->{-inbox}->over) {
- if (my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid})) {
+ $eml = $ctx->{ibx}->smsg_eml($ctx->{smsg});
+ } elsif (!$ctx->{ibx}->over) {
+ if (my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid})) {
$eml = PublicInbox::Eml->new($bref);
}
}
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Standalone PSGI app to provide syntax highlighting as-a-service
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Provide an HTTP-accessible listing of inboxes.
my ($ctx, $key, $default) = @_;
$key //= 'publicInbox.wwwListing';
$default //= '404';
- my $v = $ctx->{www}->{pi_config}->{lc $key} // $default;
+ my $v = $ctx->{www}->{pi_cfg}->{lc $key} // $default;
again:
if ($v eq 'match=domain') {
my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
sub response {
my ($class, $ctx) = @_;
bless $ctx, $class;
+ if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
+ $ALL->misc->reopen;
+ }
my $re = $ctx->url_regexp or return $ctx->psgi_triple;
- my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_config},
+ my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
\&list_match_i, $re, $ctx);
sub {
$ctx->{-wcb} = $_[0]; # HTTP server callback
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# This package can either be a PSGI response body for a static file
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# HTML body stream for which yields getline+close methods for
use bytes (); # length
use PublicInbox::Hval qw(ascii_html prurl ts2str);
our $TOR_URL = 'https://www.torproject.org/';
-our $CODE_URL = 'https://public-inbox.org/public-inbox.git';
+our $CODE_URL = [ qw(http://ou63pmih66umazou.onion/public-inbox.git
+ https://public-inbox.org/public-inbox.git) ];
sub base_url ($) {
my $ctx = shift;
- my $base_url = $ctx->{-inbox}->base_url($ctx->{env});
+ my $base_url = $ctx->{ibx}->base_url($ctx->{env});
chop $base_url; # no trailing slash for clone
$base_url;
}
sub html_top ($) {
my ($ctx) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $desc = ascii_html($ibx->description);
my $title = delete($ctx->{-title_html}) // $desc;
my $upfx = $ctx->{-upfx} || '';
qq(<a\nhref="$color">color</a> / ).
qq(<a\nhref=#mirror>mirror</a> / ).
qq(<a\nhref="$atom">Atom feed</a>);
- if ($ibx->search) {
+ if ($ibx->isrch) {
my $q_val = delete($ctx->{-q_value_html}) // '';
$q_val = qq(\nvalue="$q_val") if $q_val ne '';
# XXX gross, for SearchView.pm
sub coderepos ($) {
my ($ctx) = @_;
- my $ibx = $ctx->{-inbox};
+ my $cr = $ctx->{ibx}->{coderepo} // return ();
+ my $cfg = $ctx->{www}->{pi_cfg};
+ my $upfx = ($ctx->{-upfx} // ''). '../';
my @ret;
- if (defined(my $cr = $ibx->{coderepo})) {
- my $cfg = $ctx->{www}->{pi_config};
- my $env = $ctx->{env};
- for my $cr_name (@$cr) {
- my $urls = $cfg->{"coderepo.$cr_name.cgiturl"};
- if ($urls) {
- $ret[0] //= <<EOF;
+ for my $cr_name (@$cr) {
+ my $urls = $cfg->{"coderepo.$cr_name.cgiturl"} // next;
+ $ret[0] //= <<EOF;
code repositories for the project(s) associated with this inbox:
EOF
- $ret[0] .= "\n\t".prurl($env, $_) for @$urls;
- }
+ for (@$urls) {
+ # relative or absolute URL?, prefix relative "foo.git"
+ # with appropriate number of "../"
+ my $u = m!\A(?:[a-z\+]+:)?//! ? $_ : $upfx.$_;
+ $u = ascii_html(prurl($ctx->{env}, $u));
+ $ret[0] .= qq(\n\t<a\nhref="$u">$u</a>);
}
}
- @ret; # may be empty
+ @ret; # may be empty, this sub is called as an arg for join()
}
sub code_footer ($) {
id=mirror>This inbox may be cloned and mirrored by anyone:</a>
EOF
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $desc = ascii_html($ibx->description);
my @urls;
}
$urls .= "\n" . join('', map { "\tgit clone --mirror $_\n" } @urls);
- my $addrs = $ibx->{address};
- $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
- my $v = defined $max ? '-V2' : '-V1';
- $urls .= <<EOF;
+ if (my $addrs = $ibx->{address}) {
+ $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
+ my $v = defined $max ? '-V2' : '-V1';
+ $urls .= <<EOF;
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
$addrs
public-inbox-index $dir
EOF
+ }
my $cfg_link = ($ctx->{-upfx} // '').'_/text/config/raw';
$urls .= <<EOF;
my $cb = $ctx->{cb} or return;
while (defined(my $x = $cb->($ctx))) { # x = smsg or scalar non-ref
if (ref($x)) { # smsg
- my $eml = $ctx->{-inbox}->smsg_eml($x) or next;
+ my $eml = $ctx->{ibx}->smsg_eml($x) or next;
$ctx->{smsg} = $x;
return $ctx->translate($cb->($ctx, $eml));
} else { # scalar
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# used for displaying help texts and other non-mail content
# enforce trailing slash for "wget -r" compatibility
if (!$have_tslash && $code == 200) {
- my $url = $ctx->{-inbox}->base_url($env);
+ my $url = $ctx->{ibx}->base_url($env);
$url .= "_/text/$key/";
return [ 302, [ 'Content-Type', 'text/plain',
sub _colors_help ($$) {
my ($ctx, $txt) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $env = $ctx->{env};
my $base_url = $ibx->base_url($env);
$$txt .= "color customization for $base_url\n";
# n.b. this is a perfect candidate for memoization
sub inbox_config ($$$) {
my ($ctx, $hdr, $txt) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
my $name = dq_escape($ibx->{name});
my $inboxdir = '/path/to/top-level-inbox';
; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
; an HTTP endpoint which reconstructs git blobs via git-apply(1).
EOF
- my $pi_config = $ctx->{www}->{pi_config};
+ my $pi_cfg = $ctx->{www}->{pi_cfg};
for my $cr_name (@$cr) {
- my $urls = $pi_config->{"coderepo.$cr_name.cgiturl"};
+ my $urls = $pi_cfg->{"coderepo.$cr_name.cgiturl"};
my $path = "/path/to/$cr_name";
$cr_name = dq_escape($cr_name);
return inbox_config($ctx, $hdr, $txt) if $key eq 'config';
return if $key ne 'help'; # TODO more keys?
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $base_url = $ibx->base_url($ctx->{env});
$$txt .= "public-inbox help for $base_url\n";
$$txt .= <<EOF;
# n.b. we use the Xapian DB for any regeneratable,
# order-of-arrival-independent data.
- my $srch = $ibx->search;
+ my $srch = $ibx->isrch;
if ($srch) {
$$txt .= <<EOF;
search
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::Xapcmd;
use strict;
sub cb_spawn {
my ($cb, $args, $opt) = @_; # $cb = cpdb() or compact()
- defined(my $pid = fork) or die "fork: $!";
+ my $seed = rand(0xffffffff);
+ my $pid = fork // die "fork: $!";
return $pid if $pid > 0;
+ srand($seed);
$cb->($args, $opt);
POSIX::_exit(0);
}
$opt->{reindex}->{from} = $lc;
}
} else { # v2
- my $max;
- $im->git_dir_latest(\$max) or return;
+ my $max = $ibx->max_git_epoch // return;
my $from = $opt->{reindex}->{from};
my $mm = $ibx->mm;
my $v = PublicInbox::Search::SCHEMA_VERSION();
local %SIG = %SIG;
setup_signals();
- $ibx->umask_prepare;
$ibx->with_umask(\&_run, $ibx, $cb, $opt);
}
--- /dev/null
+/*
+ * Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+ * License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+ *
+ * libgit2 for Inline::C
+ * Avoiding Git::Raw since it doesn't guarantee a stable API,
+ * while libgit2 itself seems reasonably stable.
+ */
+#include <git2.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <poll.h>
+
+static void croak_if_err(int rc, const char *msg)
+{
+ if (rc != GIT_OK) {
+ const git_error *e = giterr_last();
+
+ croak("%d %s (%s)", rc, msg, e ? e->message : "unknown");
+ }
+}
+
+SV *new()
+{
+ git_odb *odb;
+ SV *ref, *self;
+ int rc = git_odb_new(&odb);
+ croak_if_err(rc, "git_odb_new");
+
+ ref = newSViv((IV)odb);
+ self = newRV_noinc(ref);
+ sv_bless(self, gv_stashpv("PublicInbox::Gcf2", GV_ADD));
+ SvREADONLY_on(ref);
+
+ return self;
+}
+
+static git_odb *odb_ptr(SV *self)
+{
+ return (git_odb *)SvIV(SvRV(self));
+}
+
+void DESTROY(SV *self)
+{
+ git_odb_free(odb_ptr(self));
+}
+
+/* needs "$GIT_DIR/objects", not $GIT_DIR */
+void add_alternate(SV *self, const char *objects_path)
+{
+ int rc = git_odb_add_disk_alternate(odb_ptr(self), objects_path);
+ croak_if_err(rc, "git_odb_add_disk_alternate");
+}
+
+#define CAPA(v) (sizeof(v) / sizeof((v)[0]))
+
+/*
+ * returns true on success, false on failure
+ * this requires an unabbreviated git OID
+ */
+int cat_oid(SV *self, int fd, SV *oidsv)
+{
+ /*
+ * adjust when libgit2 gets SHA-256 support, we return the
+ * same header as git-cat-file --batch "$OID $TYPE $SIZE\n"
+ */
+ char hdr[GIT_OID_HEXSZ + sizeof(" commit 18446744073709551615")];
+ struct iovec vec[3];
+ size_t nvec = CAPA(vec);
+ git_oid oid;
+ git_odb_object *object = NULL;
+ int rc, err = 0;
+ STRLEN oidlen;
+ char *oidptr = SvPV(oidsv, oidlen);
+
+ /* same trailer as git-cat-file --batch */
+ vec[2].iov_len = 1;
+ vec[2].iov_base = "\n";
+
+ rc = git_oid_fromstrn(&oid, oidptr, oidlen);
+ if (rc == GIT_OK)
+ rc = git_odb_read(&object, odb_ptr(self), &oid);
+ if (rc == GIT_OK) {
+ vec[0].iov_base = hdr;
+ vec[1].iov_base = (void *)git_odb_object_data(object);
+ vec[1].iov_len = git_odb_object_size(object);
+
+ git_oid_nfmt(hdr, GIT_OID_HEXSZ, git_odb_object_id(object));
+ vec[0].iov_len = GIT_OID_HEXSZ +
+ snprintf(hdr + GIT_OID_HEXSZ,
+ sizeof(hdr) - GIT_OID_HEXSZ,
+ " %s %zu\n",
+ git_object_type2string(
+ git_odb_object_type(object)),
+ vec[1].iov_len);
+ } else { /* caller retries */
+ nvec = 0;
+ }
+ while (nvec && !err) {
+ ssize_t w = writev(fd, vec + CAPA(vec) - nvec, nvec);
+
+ if (w > 0) {
+ size_t done = 0;
+ size_t i;
+
+ for (i = CAPA(vec) - nvec; i < CAPA(vec); i++) {
+ if (w >= vec[i].iov_len) {
+ /* fully written vec */
+ w -= vec[i].iov_len;
+ done++;
+ } else { /* partially written vec */
+ char *p = vec[i].iov_base;
+ vec[i].iov_base = p + w;
+ vec[i].iov_len -= w;
+ break;
+ }
+ }
+ nvec -= done;
+ } else if (w < 0) {
+ err = errno;
+ switch (err) {
+ case EAGAIN: {
+ struct pollfd pfd;
+ pfd.events = POLLOUT;
+ pfd.fd = fd;
+ poll(&pfd, 1, -1);
+ }
+ /* fall-through */
+ case EINTR:
+ err = 0;
+ }
+ } else { /* w == 0 */
+ err = ENOSPC;
+ }
+ }
+ if (object)
+ git_odb_object_free(object);
+ if (err)
+ croak("writev error: %s", strerror(err));
+
+ return rc == GIT_OK;
+}
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Socket qw(AF_UNIX SOCK_SEQPACKET MSG_EOR pack_sockaddr_un);
+use Errno qw(EINTR ECONNRESET);
+use PublicInbox::CmdIPC4;
+my $narg = 5;
+my ($sock, $pwd);
+my $recv_cmd = PublicInbox::CmdIPC4->can('recv_cmd4');
+my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do {
+ require PublicInbox::Spawn; # takes ~50ms even if built *sigh*
+ $recv_cmd = PublicInbox::Spawn->can('recv_cmd4');
+ PublicInbox::Spawn->can('send_cmd4');
+};
+
+sub sigchld {
+ my ($sig) = @_;
+ my $flags = $sig ? POSIX::WNOHANG() : 0;
+ while (waitpid(-1, $flags) > 0) {}
+}
+
+sub exec_cmd {
+ my ($fds, $argc, @argv) = @_;
+ my @old = (*STDIN{IO}, *STDOUT{IO}, *STDERR{IO});
+ my @rdr;
+ for my $fd (@$fds) {
+ open(my $tmpfh, '+<&=', $fd) or die "open +<&=$fd: $!";
+ push @rdr, shift(@old), $tmpfh;
+ }
+ require POSIX; # WNOHANG
+ $SIG{CHLD} = \&sigchld;
+ my $pid = fork // die "fork: $!";
+ if ($pid == 0) {
+ my %env = map { split(/=/, $_, 2) } splice(@argv, $argc);
+ while (my ($old_io, $tmpfh) = splice(@rdr, 0, 2)) {
+ open $old_io, '+<&', $tmpfh or die "open +<&=: $!";
+ }
+ %ENV = (%ENV, %env);
+ exec(@argv);
+ die "exec: @argv: $!";
+ }
+}
+
+if ($send_cmd && eval {
+ my $path = do {
+ my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei';
+ if ($runtime_dir eq '/lei') {
+ require File::Spec;
+ $runtime_dir = File::Spec->tmpdir."/lei-$<";
+ }
+ unless (-d $runtime_dir) {
+ require File::Path;
+ File::Path::mkpath($runtime_dir, 0, 0700);
+ }
+ "$runtime_dir/$narg.seq.sock";
+ };
+ my $addr = pack_sockaddr_un($path);
+ socket($sock, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!";
+ unless (connect($sock, $addr)) { # start the daemon if not started
+ local $ENV{PERL5LIB} = join(':', @INC);
+ open(my $daemon, '-|', $^X, qw[-MPublicInbox::LEI
+ -E PublicInbox::LEI::lazy_start(@ARGV)],
+ $path, $! + 0, $narg) or die "popen: $!";
+ while (<$daemon>) { warn $_ } # EOF when STDERR is redirected
+ close($daemon) or warn <<"";
+lei-daemon could not start, exited with \$?=$?
+
+ # try connecting again anyways, unlink+bind may be racy
+ connect($sock, $addr) or die <<"";
+connect($path): $! (after attempted daemon start)
+Falling back to (slow) one-shot mode
+
+ }
+ 1;
+}) { # (Socket::MsgHdr|Inline::C), $sock, $pwd are all available:
+ open my $dh, '<', '.' or die "open(.) $!";
+ my $buf = join("\0", scalar(@ARGV), @ARGV);
+ while (my ($k, $v) = each %ENV) { $buf .= "\0$k=$v" }
+ $buf .= "\0\0";
+ $send_cmd->($sock, [ 0, 1, 2, fileno($dh) ], $buf, MSG_EOR);
+ my $x_it_code = 0;
+ while (1) {
+ my (@fds) = $recv_cmd->($sock, $buf, 4096 * 33);
+ if (scalar(@fds) == 1 && !defined($fds[0])) {
+ last if $! == ECONNRESET;
+ next if $! == EINTR;
+ die "recvmsg: $!";
+ }
+ last if $buf eq '';
+ if ($buf =~ /\Ax_it ([0-9]+)\z/) {
+ $x_it_code = $1 + 0;
+ last;
+ } elsif ($buf =~ /\Achild_error ([0-9]+)\z/) {
+ $x_it_code = $1 + 0;
+ } elsif ($buf =~ /\Aexec (.+)\z/) {
+ exec_cmd(\@fds, split(/\0/, $1));
+ } else {
+ sigchld();
+ die $buf;
+ }
+ }
+ sigchld();
+ if (my $sig = ($x_it_code & 127)) {
+ kill $sig, $$;
+ sleep;
+ }
+ exit($x_it_code >> 8);
+} else { # for systems lacking Socket::MsgHdr or Inline::C
+ warn $@ if $@;
+ require PublicInbox::LEI;
+ PublicInbox::LEI::oneshot(__PACKAGE__);
+}
#!perl -w
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
#!/usr/bin/perl -w
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
die "$new_dir exists\n" if -d $new_dir;
die "$old_dir not a directory\n" unless -d $old_dir;
-require Cwd;
-Cwd->import('abs_path');
+require PublicInbox::Admin;
require PublicInbox::Config;
require PublicInbox::InboxWritable;
-my $abs = abs_path($old_dir);
-die "failed to resolve $old_dir: $!\n" if (!defined($abs));
-
my $cfg = PublicInbox::Config->new;
-my $old;
-$cfg->each_inbox(sub {
- $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir;
-});
-if ($old) {
- $old = PublicInbox::InboxWritable->new($old);
-} else {
+my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg);
+@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n",
+ map { "\t$_->{inboxdir}\n" } @old;
+my $old = PublicInbox::InboxWritable->new($old[0]);
+if (delete $old->{-unconfigured}) {
warn "W: $old_dir not configured in " .
PublicInbox::Config::default_file() . "\n";
- $old = PublicInbox::InboxWritable->new({
- inboxdir => $old_dir,
- name => 'ignored',
- -primary_address => 'old@example.com',
- address => [ 'old@example.com' ],
- });
}
die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2;
-require File::Spec;
require PublicInbox::Admin;
my $detected = PublicInbox::Admin::detect_indexlevel($old);
$old->{indexlevel} //= $detected;
}
local %ENV = (%$env, %ENV) if $env;
my $new = { %$old };
-$new->{inboxdir} = File::Spec->canonpath($new_dir);
+$new->{inboxdir} = $cfg->rel2abs_collapsed($new_dir);
$new->{version} = 2;
$new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} });
$new->{-no_fsync} = 1 if !$opt->{fsync};
my $v2w;
-$old->umask_prepare;
sub link_or_copy ($$) {
my ($src, $dst) = @_;
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used for editing messages in a public-inbox.
# rename/relink $edit_fn
open my $new_fh, '<', $edit_fn or
die "can't read edited file ($edit_fn): $!\n";
- my $new_raw = do { local $/; <$new_fh> };
+ defined(my $new_raw = do { local $/; <$new_fh> }) or die
+ "read $edit_fn: $!\n";
if (!$opt->{raw}) {
# get rid of the From we added
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# Basic tool to create a Xapian search index for a public-inbox.
+use strict;
+use v5.10.1;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...]
+
+ Create and update external (detached) search indices
+
+ --no-fsync speed up indexing, risk corruption on power outage
+ --watch run persistently and watch for inbox updates
+ -L LEVEL `medium', or `full' (default: full)
+ --all index all configured inboxes
+ --jobs=NUM set or disable parallelization (NUM=0)
+ --batch-size=BYTES flush changes to OS after a given number of bytes
+ --max-size=BYTES do not index messages larger than the given size
+ --gc perform garbage collection instead of indexing
+ --verbose | -v increase verbosity (may be repeated)
+
+BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
+See public-inbox-extindex(1) man page for full documentation.
+EOF
+my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 };
+GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
+ fsync|sync!
+ indexlevel|index-level|L=s max_size|max-size=s
+ batch_size|batch-size=s
+ gc commit-interval=i watch scan!
+ all help|h))
+ or die $help;
+if ($opt->{help}) { print $help; exit 0 };
+die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
+require IO::Handle;
+STDOUT->autoflush(1);
+STDERR->autoflush(1);
+local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync
+# require lazily to speed up --help
+require PublicInbox::Admin;
+my $cfg = PublicInbox::Config->new;
+my $eidx_dir = shift(@ARGV);
+unless (defined $eidx_dir) {
+ if ($opt->{all} && $cfg->ALL) {
+ $eidx_dir = $cfg->ALL->{topdir};
+ } else {
+ die "E: $help";
+ }
+}
+my @ibxs;
+if ($opt->{gc}) {
+ die "E: inbox paths must not be specified with --gc\n" if @ARGV;
+ die "E: --all not compatible with --gc\n" if $opt->{all};
+ die "E: --watch is not compatible with --gc\n" if $opt->{watch};
+} else {
+ @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+}
+PublicInbox::Admin::require_or_die(qw(-search));
+PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n";
+PublicInbox::Admin::progress_prepare($opt);
+my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
+local %ENV = (%ENV, %$env) if $env;
+require PublicInbox::ExtSearchIdx;
+my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt);
+if ($opt->{gc}) {
+ $eidx->attach_config($cfg);
+ $eidx->eidx_gc($opt);
+} else {
+ if ($opt->{all}) {
+ $eidx->attach_config($cfg);
+ } else {
+ $eidx->attach_inbox($_) for @ibxs;
+ }
+ if ($opt->{watch}) {
+ $cfg = undef; # save memory only after SIGHUP
+ $eidx->eidx_watch($opt);
+ } else {
+ $eidx->eidx_sync($opt);
+ }
+}
#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Standalone HTTP server for public-inbox.
require PublicInbox::HTTP;
require PublicInbox::HTTPD;
}
+
my %httpds;
my $app;
my $refresh = sub {
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Standalone read-only IMAP server for public-inbox.
#!perl -w
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Basic tool to create a Xapian search index for a public-inbox.
# Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/>
my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
usage: public-inbox-index [options] INBOX_DIR
- Create and update search indices
+ Create and update per-inbox search indices
options:
--no-fsync speed up indexing, risk corruption on power outage
-L LEVEL `basic', `medium', or `full' (default: full)
+ -E EXTINDEX update extindex (default: `all')
--all index all configured inboxes
--compact | -c run public-inbox-compact(1) after indexing
--sequential-shard index Xapian shards sequentially for slow storage
BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
See public-inbox-index(1) man page for full documentation.
EOF
-my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 };
+my $opt = {
+ quiet => -1, compact => 0, max_size => undef, fsync => 1,
+ 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given
+};
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
fsync|sync! xapian_only|xapian-only
indexlevel|index-level|L=s max_size|max-size=s
batch_size|batch-size=s
sequential_shard|seq-shard|sequential-shard
- skip-docdata all help|h))
+ no-update-extindex update-extindex|E=s@
+ fast-noop|F skip-docdata all help|h))
or die $help;
if ($opt->{help}) { print $help; exit 0 };
die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
if ($opt->{xapian_only} && !$opt->{reindex}) {
die "--xapian-only requires --reindex\n";
}
+if ($opt->{reindex} && delete($opt->{'fast-noop'})) {
+ warn "--fast-noop ignored with --reindex\n";
+}
# require lazily to speed up --help
require PublicInbox::Admin;
PublicInbox::Admin::require_or_die('-index');
my $cfg = PublicInbox::Config->new; # Config is loaded by Admin
+$opt->{-use_cwd} = 1;
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR $help; exit 1 }
+my (@eidx, %eidx_seen);
+my $update_extindex = $opt->{'update-extindex'};
+if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) {
+ # extindex and normal inboxes may have different owners
+ push(@$update_extindex, 'all') if -w $ALL->{topdir};
+}
+@$update_extindex = () if $opt->{'no-update-extindex'};
+if (scalar @$update_extindex) {
+ PublicInbox::Admin::require_or_die('-search');
+ require PublicInbox::ExtSearchIdx;
+}
+for my $ei_name (@$update_extindex) {
+ my $es = $cfg->lookup_ei($ei_name);
+ my $topdir;
+ if (!$es && -d $ei_name) { # allow dirname or config section name
+ $topdir = $ei_name;
+ } elsif ($es) {
+ $topdir = $es->{topdir};
+ } else {
+ die "extindex `$ei_name' not configured or found\n";
+ }
+ my $o = { %$opt };
+ delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic';
+ $eidx_seen{$topdir} //=
+ push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o));
+}
my $mods = {};
+my @eidx_unconfigured;
foreach my $ibx (@ibxs) {
# detect_indexlevel may also set $ibx->{-skip_docdata}
my $detected = PublicInbox::Admin::detect_indexlevel($ibx);
$ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
'full' : $detected);
PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
+ if (@eidx && $ibx->{-unconfigured}) {
+ push @eidx_unconfigured, " $ibx->{inboxdir}\n";
+ }
}
+warn <<EOF if @eidx_unconfigured;
+The following inboxes are unconfigured and will not be updated in
+@$update_extindex:\n@eidx_unconfigured
+EOF
# "Search::Xapian" includes SWIG "Xapian", too:
$opt->{compact} = 0 if !$mods->{'Search::Xapian'};
EOL
$ibx_opt = { %$opt, sequential_shard => $v };
}
- PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
+ my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
+ last if $ibx_opt->{quit};
if (my $copt = $opt->{compact_opt}) {
local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
}
+ last if $ibx_opt->{quit};
+ next if $ibx->{-unconfigured} || !$nidx;
+ for my $eidx (@eidx) {
+ $eidx->attach_inbox($ibx);
+ }
+}
+my $pr = $opt->{-progress};
+for my $eidx (@eidx) {
+ $pr->("indexing $eidx->{topdir} ...\n") if $pr;
+ $eidx->eidx_sync($opt);
+ last if $opt->{quit};
}
#!perl -w
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
warn "could not open config file: $lockfile: $!\n";
exit(255);
};
-my $auto_unlink = UnlinkMe->new($lockfile);
+require PublicInbox::OnDestroy;
+my $auto_unlink = PublicInbox::OnDestroy->new($$, sub { unlink $lockfile });
my ($perm, %seen);
if (-e $pi_config) {
open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n";
defined $perm or die "(f)stat failed on $pi_config: $!\n";
chmod($perm & 07777, $fh) or
die "(f)chmod failed on future $pi_config: $!\n";
- my $old;
- {
- local $/;
- $old = <$oh>;
- }
+ defined(my $old = do { local $/; <$oh> }) or die "read $pi_config: $!\n";
print $fh $old or die "failed to write: $!\n";
close $oh or die "failed to close $pi_config: $!\n";
my $pfx = "publicinbox.$name";
my @x = (qw/git config/, "--file=$pi_config_tmp");
-require File::Spec;
-$inboxdir = File::Spec->canonpath($inboxdir);
+$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir);
+die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0;
-die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s;
if (-f "$inboxdir/inbox.lock") {
if (!defined $version) {
$version = 2;
$ibx->{-skip_docdata} = $skip_docdata;
}
$ibx->init_inbox(0, $skip_epoch, $skip_artnum);
-require Cwd;
-my $tmp = Cwd::abs_path($inboxdir);
-defined($tmp) or die "failed to resolve $inboxdir: $!\n";
-$inboxdir = $tmp;
-die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s;
# needed for git prior to v2.1.0
umask(0077) if defined $perm;
+require PublicInbox::Spawn;
+PublicInbox::Spawn->import(qw(run_die));
+
foreach my $addr (@address) {
next if $seen{lc($addr)};
- PublicInbox::Import::run_die([@x, "--add", "$pfx.address", $addr]);
+ run_die([@x, "--add", "$pfx.address", $addr]);
}
-PublicInbox::Import::run_die([@x, "$pfx.url", $http_url]);
-PublicInbox::Import::run_die([@x, "$pfx.inboxdir", $inboxdir]);
+run_die([@x, "$pfx.url", $http_url]);
+run_die([@x, "$pfx.inboxdir", $inboxdir]);
if (defined($indexlevel)) {
- PublicInbox::Import::run_die([@x, "$pfx.indexlevel", $indexlevel]);
+ run_die([@x, "$pfx.indexlevel", $indexlevel]);
}
-PublicInbox::Import::run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne '';
+run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne '';
# needed for git prior to v2.1.0
if (defined $perm) {
rename $pi_config_tmp, $pi_config or
die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n";
-$auto_unlink->DESTROY;
-
-package UnlinkMe;
-use strict;
-
-sub new {
- my ($klass, $file) = @_;
- bless { file => $file }, $klass;
-}
-
-sub DESTROY {
- my $f = delete($_[0]->{file});
- unlink($f) if defined($f);
-}
-1;
+undef $auto_unlink; # trigger ->DESTROY
#!/usr/bin/perl -w
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used for training spam (via SpamAssassin) and removing messages from a
die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm';
my $spamc = PublicInbox::Spamcheck::Spamc->new;
-my $pi_config = PublicInbox::Config->new;
+my $pi_cfg = PublicInbox::Config->new;
my $err;
my $mime = PublicInbox::Eml->new(do{
- local $/;
- my $data = <STDIN>;
+ defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
if ($train ne 'rm') {
# spam is removed from all known inboxes since it is often Bcc:-ed
if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) {
- $pi_config->each_inbox(sub {
+ $pi_cfg->each_inbox(sub {
my ($ibx) = @_;
$ibx = PublicInbox::InboxWritable->new($ibx);
my $im = $ibx->importer(0);
for ($mime->header('Cc'), $mime->header('To')) {
foreach my $addr (PublicInbox::Address::emails($_)) {
$addr = lc($addr);
- $dests{$addr} //= $pi_config->lookup($addr) // 0;
+ $dests{$addr} //= $pi_cfg->lookup($addr) // 0;
}
}
next if $seen{"$ibx"}++;
remove_or_add($ibx, $train, $mime, $addr);
}
- my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_config, $mime);
+ my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime);
for my $ibx (@$dests) {
next if $seen{"$ibx"}++;
remove_or_add($ibx, $train, $mime, $ibx->{-primary_address});
#!/usr/bin/perl -w
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Mail delivery agent for public-inbox, run from your MTA upon mail delivery
$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
$ems->prepare(\$str);
my $eml = PublicInbox::Eml->new(\$str);
-my $config = PublicInbox::Config->new;
+my $cfg = PublicInbox::Config->new;
my $key = 'publicinboxmda.spamcheck';
my $default = 'PublicInbox::Spamcheck::Spamc';
-my $spamc = PublicInbox::Spamcheck::get($config, $key, $default);
+my $spamc = PublicInbox::Spamcheck::get($cfg, $key, $default);
my $dests = [];
my $recipient = $ENV{ORIGINAL_RECIPIENT};
if (defined $recipient) {
- my $ibx = $config->lookup($recipient); # first check
+ my $ibx = $cfg->lookup($recipient); # first check
push @$dests, $ibx if $ibx;
}
if (!scalar(@$dests)) {
- $dests = PublicInbox::MDA->inboxes_for_list_id($config, $eml);
+ $dests = PublicInbox::MDA->inboxes_for_list_id($cfg, $eml);
if (!scalar(@$dests) && !defined($recipient)) {
die "ORIGINAL_RECIPIENT not defined in ENV\n";
}
#!/usr/bin/perl -w
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Standalone NNTP server for public-inbox.
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used for purging messages entirely from a public-inbox. Currently
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
PublicInbox::AdminEdit::check_editable(\@ibxs);
-my $data = do { local $/; <STDIN> };
+defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
my $n_purged = 0;
#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
my $help = <<EOF;
usage: public-inbox-watch
use PublicInbox::Config;
use PublicInbox::DS;
use PublicInbox::Sigfd;
-use PublicInbox::Syscall qw($SFD_NONBLOCK);
+use PublicInbox::Syscall qw(SFD_NONBLOCK);
my $do_scan = 1;
GetOptions('scan!' => \$do_scan, # undocumented, testing only
'help|h' => \(my $show_help)) or do { print STDERR $help; exit 1 };
if ($show_help) { print $help; exit 0 };
-my $oldset = PublicInbox::Sigfd::block_signals();
+my $oldset = PublicInbox::DS::block_signals();
STDOUT->autoflush(1);
STDERR->autoflush(1);
local $0 = $0; # local since this script may be eval-ed
# --no-scan is only intended for testing atm, undocumented.
PublicInbox::DS::requeue($scan) if $do_scan;
- my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK);
- local %SIG = (%SIG, %$sig) if !$sigfd;
+ my $sigfd = PublicInbox::Sigfd->new($sig, SFD_NONBLOCK);
+ local @SIG{keys %$sig} = values(%$sig) unless $sigfd;
if (!$sigfd) {
- PublicInbox::Sigfd::sig_setmask($oldset);
+ PublicInbox::DS::sig_setmask($oldset);
PublicInbox::DS->SetLoopTimeout(1000);
}
$watch->watch($sig, $oldset) while ($watch);
#!perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
#!/usr/bin/perl -w
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ or later <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Enables using PublicInbox::WWW as a CGI script
#!/bin/sh
-# Copyright (C) 2008-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2008-2021 all contributors <meta@public-inbox.org>
# License: GPL-3.0+ <http://www.gnu.org/licenses/gpl-3.0.txt>
# This is installed as /etc/dc-dcvr on my system
# to use with postfix main.cf: mailbox_command = /etc/dc-dlvr "$EXTENSION"
#!/usr/bin/perl -w
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# ad-hoc tool for finding duplicates, unstable!
#!/usr/bin/perl -w
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Incremental (or one-shot) importer of a slrnpull news spool
my $spool = shift @ARGV or die usage();
my $recipient = $ENV{ORIGINAL_RECIPIENT};
defined $recipient or die usage();
-my $config = PublicInbox::Config->new;
-my $ibx = $config->lookup($recipient);
+my $cfg = PublicInbox::Config->new;
+my $ibx = $cfg->lookup($recipient);
my $git = $ibx->git;
my $im;
if ($ibx->version == 2) {
#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!/usr/bin/perl -w
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# One-off script to convert an slrnpull news spool to Maildir
#!/usr/bin/perl -w
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# A work-in-progress, but one day I hope this script is no longer
#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Useful for mapping article IDs from existing NNTP servers to MIDs
use strict;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
sub test_pkg {
my ($pkg) = @_;
- my $emails = \&{"${pkg}::emails"};
- my $names = \&{"${pkg}::names"};
+ my $emails = $pkg->can('emails');
+ my $names = $pkg->can('names');
+ my $pairs = $pkg->can('pairs');
is_deeply([qw(e@example.com e@example.org)],
[$emails->('User <e@example.com>, e@example.org')],
'address extraction works as expected');
+ is_deeply($pairs->('User <e@example.com>, e@example.org'),
+ [[qw(User e@example.com)], [undef, 'e@example.org']],
+ "pair extraction works ($pkg)");
+
is_deeply(['user@example.com'],
[$emails->('<user@example.com (Comment)>')],
'comment after domain accepted before >');
+ is_deeply($pairs->('<user@example.com (Comment)>'),
+ [[qw(Comment user@example.com)]], "comment as name ($pkg)");
- my @names = $names->(
- 'User <e@e>, e@e, "John A. Doe" <j@d>, <x@x>, <y@x> (xyz), '.
- 'U Ser <u@x> (do not use)');
+ my $s = 'User <e@e>, e@e, "John A. Doe" <j@d>, <x@x>, <y@x> (xyz), '.
+ 'U Ser <u@x> (do not use)';
+ my @names = $names->($s);
is_deeply(\@names, ['User', 'e', 'John A. Doe', 'x', 'xyz', 'U Ser'],
'name extraction works as expected');
+ is_deeply($pairs->($s), [ [ 'User', 'e@e' ], [ undef, 'e@e' ],
+ [ 'John A. Doe', 'j@d' ], [ undef, 'x@x' ],
+ [ 'xyz', 'y@x' ], [ 'U Ser', 'u@x' ] ],
+ "pairs extraction works for $pkg");
@names = $names->('"user@example.com" <user@example.com>');
is_deeply(['user'], \@names,
'address-as-name extraction works as expected');
+ is_deeply($pairs->('"user@example.com" <user@example.com>'),
+ [ [ 'user@example.com', 'user@example.com' ] ],
+ "pairs for $pkg");
{
my $backwards = 'u@example.com (John Q. Public)';
is_deeply(\@names, ['John Q. Public'], 'backwards name OK');
my @emails = $emails->($backwards);
is_deeply(\@emails, ['u@example.com'], 'backwards emails OK');
+
+ is_deeply($pairs->($backwards),
+ [ [ 'John Q. Public', 'u@example.com' ] ],
+ "backwards pairs $pkg");
}
- @names = $names->('"Quote Unneeded" <user@example.com>');
+ $s = '"Quote Unneeded" <user@example.com>';
+ @names = $names->($s);
is_deeply(['Quote Unneeded'], \@names, 'extra quotes dropped');
+ is_deeply($pairs->($s), [ [ 'Quote Unneeded', 'user@example.com' ] ],
+ "extra quotes dropped in pairs $pkg");
my @emails = $emails->('Local User <user>');
is_deeply([], \@emails , 'no address for local address');
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
use Test::More;
use PublicInbox::TestCommon;
use PublicInbox::Import;
-use_ok 'PublicInbox::Admin', qw(resolve_repo_dir);
+use_ok 'PublicInbox::Admin';
my ($tmpdir, $for_destroy) = tmpdir();
my $git_dir = "$tmpdir/v1";
my $v2_dir = "$tmpdir/v2";
my ($res, $err, $v);
PublicInbox::Import::init_bare($git_dir);
+*resolve_inboxdir = \&PublicInbox::Admin::resolve_inboxdir;
# v1
-is(resolve_repo_dir($git_dir), $git_dir, 'top-level GIT_DIR resolved');
-is(resolve_repo_dir("$git_dir/objects"), $git_dir, 'GIT_DIR/objects resolved');
+is(resolve_inboxdir($git_dir), $git_dir, 'top-level GIT_DIR resolved');
+is(resolve_inboxdir("$git_dir/objects"), $git_dir, 'GIT_DIR/objects resolved');
ok(chdir($git_dir), 'chdir GIT_DIR works');
-is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR');
+is(resolve_inboxdir(), $git_dir, 'resolve_inboxdir works in GIT_DIR');
ok(chdir("$git_dir/objects"), 'chdir GIT_DIR/objects works');
-is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR');
-$res = resolve_repo_dir(undef, \$v);
+is(resolve_inboxdir(), $git_dir, 'resolve_inboxdir works in GIT_DIR');
+$res = resolve_inboxdir(undef, \$v);
is($v, 1, 'version 1 detected');
is($res, $git_dir, 'detects directory along with version');
ok(chdir($no_vcs_dir), 'chdir to a non-inbox');
open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!";
- $res = eval { resolve_repo_dir() };
+ $res = eval { resolve_inboxdir() };
open STDERR, '>&', $olderr or die "restore stderr: $!";
is($res, undef, 'fails inside non-version-controlled dir');
ok(chdir($tmpdir), 'back to test-specific $tmpdir');
open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!";
- $res = eval { resolve_repo_dir($no_vcs_dir) };
+ $res = eval { resolve_inboxdir($no_vcs_dir) };
$err = $@;
open STDERR, '>&', $olderr or die "restore stderr: $!";
is($res, undef, 'fails on non-version-controlled dir');
PublicInbox::V2Writable->new($ibx, 1)->idx_init;
ok(-e "$v2_dir/inbox.lock", 'exists');
- is(resolve_repo_dir($v2_dir), $v2_dir,
- 'resolve_repo_dir works on v2_dir');
- ok(chdir($v2_dir), 'chdir v2_dir OK');
- is(resolve_repo_dir(), $v2_dir, 'resolve_repo_dir works inside v2_dir');
- $res = resolve_repo_dir(undef, \$v);
+ is(resolve_inboxdir($v2_dir), $v2_dir,
+ 'resolve_inboxdir works on v2_dir');
+ chdir($v2_dir) or BAIL_OUT "chdir v2_dir: $!";
+ is(resolve_inboxdir(), $v2_dir, 'resolve_inboxdir works inside v2_dir');
+ $res = resolve_inboxdir(undef, \$v);
is($v, 2, 'version 2 detected');
is($res, $v2_dir, 'detects directory along with version');
# TODO: should work from inside Xapian dirs, and git dirs, here...
+ PublicInbox::Import::init_bare("$v2_dir/git/0.git");
+ my $objdir = "$v2_dir/git/0.git/objects";
+ is($v2_dir, resolve_inboxdir($objdir, \$v), 'at $objdir');
+ is($v, 2, 'version 2 detected at $objdir');
+ chdir($objdir) or BAIL_OUT "chdir objdir: $!";
+ is(resolve_inboxdir(undef, \$v), $v2_dir, 'inside $objdir');
+ is($v, 2, 'version 2 detected inside $objdir');
}
-chdir '/';
+chdir '/' or BAIL_OUT "chdir: $!";
my @pairs = (
'1g' => 1024 ** 3,
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# FIXME: this test is too slow and most non-CGI-requirements
# should be moved over to things which use test_psgi
#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Parallel WWW checker
my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n";
--- /dev/null
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use Socket qw(AF_UNIX SOCK_STREAM MSG_EOR);
+pipe(my ($r, $w)) or BAIL_OUT;
+my ($send, $recv);
+require_ok 'PublicInbox::Spawn';
+my $SOCK_SEQPACKET = eval { Socket::SOCK_SEQPACKET() } // undef;
+use Time::HiRes qw(alarm);
+
+my $do_test = sub { SKIP: {
+ my ($type, $flag, $desc) = @_;
+ defined $type or skip 'SOCK_SEQPACKET missing', 7;
+ my ($s1, $s2);
+ my $src = 'some payload' x 40;
+ socketpair($s1, $s2, AF_UNIX, $type, 0) or BAIL_OUT $!;
+ my $sfds = [ fileno($r), fileno($w), fileno($s1) ];
+ $send->($s1, $sfds, $src, $flag);
+ my (@fds) = $recv->($s2, my $buf, length($src) + 1);
+ is($buf, $src, 'got buffer payload '.$desc);
+ my ($r1, $w1, $s1a);
+ my $opens = sub {
+ ok(open($r1, '<&=', $fds[0]), 'opened received $r');
+ ok(open($w1, '>&=', $fds[1]), 'opened received $w');
+ ok(open($s1a, '+>&=', $fds[2]), 'opened received $s1');
+ };
+ $opens->();
+ my @exp = stat $r;
+ my @cur = stat $r1;
+ is("$exp[0]\0$exp[1]", "$cur[0]\0$cur[1]", '$r dev/ino matches');
+ @exp = stat $w;
+ @cur = stat $w1;
+ is("$exp[0]\0$exp[1]", "$cur[0]\0$cur[1]", '$w dev/ino matches');
+ @exp = stat $s1;
+ @cur = stat $s1a;
+ is("$exp[0]\0$exp[1]", "$cur[0]\0$cur[1]", '$s1 dev/ino matches');
+ if (defined($SOCK_SEQPACKET) && $type == $SOCK_SEQPACKET) {
+ $r1 = $w1 = $s1a = undef;
+ $src = (',' x 1023) . '-' .('.' x 1024);
+ $send->($s1, $sfds, $src, $flag);
+ (@fds) = $recv->($s2, $buf, 1024);
+ is($buf, (',' x 1023) . '-', 'silently truncated buf');
+ $opens->();
+ $r1 = $w1 = $s1a = undef;
+
+ $s2->blocking(0);
+ @fds = $recv->($s2, $buf, length($src) + 1);
+ ok($!{EAGAIN}, "EAGAIN set by ($desc)");
+ is_deeply(\@fds, [ undef ], "EAGAIN $desc");
+ $s2->blocking(1);
+
+ my $alrm = 0;
+ local $SIG{ALRM} = sub { $alrm++ };
+ alarm(0.001);
+ @fds = $recv->($s2, $buf, length($src) + 1);
+ ok($!{EINTR}, "EINTR set by ($desc)");
+ is_deeply(\@fds, [ undef ], "EINTR $desc");
+ is($alrm, 1, 'SIGALRM hit');
+
+ close $s1;
+ @fds = $recv->($s2, $buf, length($src) + 1);
+ is_deeply(\@fds, [], "no FDs on EOF $desc");
+ is($buf, '', "buffer cleared on EOF ($desc)");
+
+ socketpair($s1, $s2, AF_UNIX, $type, 0) or BAIL_OUT $!;
+ $s1->blocking(0);
+ my $nsent = 0;
+ while (defined(my $n = $send->($s1, $sfds, $src, $flag))) {
+ $nsent += $n;
+ fail "sent 0 bytes" if $n == 0;
+ }
+ ok($!{EAGAIN}, "hit EAGAIN on send $desc");
+ ok($nsent > 0, 'sent some bytes');
+
+ socketpair($s1, $s2, AF_UNIX, $type, 0) or BAIL_OUT $!;
+ is($send->($s1, [], $src, $flag), length($src), 'sent w/o FDs');
+ $buf = 'nope';
+ @fds = $recv->($s2, $buf, length($src));
+ is(scalar(@fds), 0, 'no FDs received');
+ is($buf, $src, 'recv w/o FDs');
+
+ my $nr = 2 * 1024 * 1024;
+ while (1) {
+ vec(my $vec = '', $nr * 8 - 1, 1) = 1;
+ my $n = $send->($s1, [], $vec, $flag);
+ if (defined($n)) {
+ $n == length($vec) or
+ fail "short send: $n != ".length($vec);
+ diag "sent $nr, retrying with more";
+ $nr += 2 * 1024 * 1024;
+ } else {
+ ok($!{EMSGSIZE}, 'got EMSGSIZE');
+ # diag "$nr bytes hits EMSGSIZE";
+ last;
+ }
+ }
+ }
+} };
+
+my $send_ic = PublicInbox::Spawn->can('send_cmd4');
+my $recv_ic = PublicInbox::Spawn->can('recv_cmd4');
+SKIP: {
+ ($send_ic && $recv_ic) or skip 'Inline::C not installed/enabled', 12;
+ $send = $send_ic;
+ $recv = $recv_ic;
+ $do_test->(SOCK_STREAM, 0, 'Inline::C stream');
+ $do_test->($SOCK_SEQPACKET, MSG_EOR, 'Inline::C seqpacket');
+}
+
+SKIP: {
+ require_mods('Socket::MsgHdr', 13);
+ require_ok 'PublicInbox::CmdIPC4';
+ $send = PublicInbox::CmdIPC4->can('send_cmd4');
+ $recv = PublicInbox::CmdIPC4->can('recv_cmd4');
+ $do_test->(SOCK_STREAM, 0, 'MsgHdr stream');
+ $do_test->($SOCK_SEQPACKET, MSG_EOR, 'MsgHdr seqpacket');
+ SKIP: {
+ ($send_ic && $recv_ic) or
+ skip 'Inline::C not installed/enabled', 12;
+ $recv = $recv_ic;
+ $do_test->(SOCK_STREAM, 0, 'Inline::C -> MsgHdr stream');
+ $do_test->($SOCK_SEQPACKET, 0, 'Inline::C -> MsgHdr seqpacket');
+ }
+}
+
+done_testing;
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
'url' => [ 'http://example.com/meta' ],
-primary_address => 'meta@public-inbox.org',
'name' => 'meta',
- feedmax => 25,
-httpbackend_limiter => undef,
nntpserver => undef,
}, "lookup matches expected output");
'inboxdir' => '/home/pi/test-main.git',
'domain' => 'public-inbox.org',
'name' => 'test',
- feedmax => 25,
'url' => [ 'http://example.com/test' ],
-httpbackend_limiter => undef,
nntpserver => undef,
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
PublicInbox::Import::init_bare($ibx->{inboxdir});
ok(umask(077), 'set restrictive umask');
-ok(PublicInbox::Import::run_die([qw(git) , "--git-dir=$ibx->{inboxdir}",
- qw(config core.sharedRepository 0644)]), 'set sharedRepository');
+xsys_e(qw(git) , "--git-dir=$ibx->{inboxdir}",
+ qw(config core.sharedRepository 0644));
$ibx = PublicInbox::Inbox->new($ibx);
my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
my $mime = PublicInbox::Eml->new(<<'EOF');
--- /dev/null
+++ b/lib/PublicInbox/MailHeader.pm
@@ -0,0 +1,55 @@
-+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
++# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::MailHeader;
+use strict;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use Test::More;
use_ok 'PublicInbox::DirIdle';
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# Licensed the same as Danga::Socket (and Perl5)
# License: GPL-1.0+ or Artistic-1.0-Perl
# <https://www.gnu.org/licenses/gpl-1.0.txt>
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# Licensed the same as Danga::Socket (and Perl5)
# License: GPL-1.0+ or Artistic-1.0-Perl
# <https://www.gnu.org/licenses/gpl-1.0.txt>
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# Licensed the same as Danga::Socket (and Perl5)
# License: GPL-1.0+ or Artistic-1.0-Perl
# <https://www.gnu.org/licenses/gpl-1.0.txt>
pipe($x, $y) or die;
is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($r), EPOLLIN), 0, 'add EPOLLIN');
my $events = [];
-my $n = $p->epoll_wait(9, 0, $events);
+$p->epoll_wait(9, 0, $events);
is_deeply($events, [], 'no events set');
-is($n, 0, 'nothing ready, yet');
is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($w), EPOLLOUT|EPOLLONESHOT), 0,
'add EPOLLOUT|EPOLLONESHOT');
-$n = $p->epoll_wait(9, -1, $events);
-is($n, 1, 'got POLLOUT event');
-is($events->[0]->[0], fileno($w), '$w ready');
+$p->epoll_wait(9, -1, $events);
+is(scalar(@$events), 1, 'got POLLOUT event');
+is($events->[0], fileno($w), '$w ready');
-$n = $p->epoll_wait(9, 0, $events);
-is($n, 0, 'nothing ready after oneshot');
+$p->epoll_wait(9, 0, $events);
+is(scalar(@$events), 0, 'nothing ready after oneshot');
is_deeply($events, [], 'no events set after oneshot');
syswrite($w, '1') == 1 or die;
for my $t (0..1) {
- $n = $p->epoll_wait(9, $t, $events);
- is($events->[0]->[0], fileno($r), "level-trigger POLLIN ready #$t");
- is($n, 1, "only event ready #$t");
+ $p->epoll_wait(9, $t, $events);
+ is($events->[0], fileno($r), "level-trigger POLLIN ready #$t");
+ is(scalar(@$events), 1, "only event ready #$t");
}
syswrite($y, '1') == 1 or die;
is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($x), EPOLLIN|EPOLLONESHOT), 0,
'EPOLLIN|EPOLLONESHOT add');
-is($p->epoll_wait(9, -1, $events), 2, 'epoll_wait has 2 ready');
-my @fds = sort(map { $_->[0] } @$events);
+$p->epoll_wait(9, -1, $events);
+is(scalar @$events, 2, 'epoll_wait has 2 ready');
+my @fds = sort @$events;
my @exp = sort((fileno($r), fileno($x)));
is_deeply(\@fds, \@exp, 'got both ready FDs');
is($p->epoll_ctl(EPOLL_CTL_DEL, fileno($r), 0), 0, 'EPOLL_CTL_DEL OK');
-$n = $p->epoll_wait(9, 0, $events);
-is($n, 0, 'nothing ready after EPOLL_CTL_DEL');
+$p->epoll_wait(9, 0, $events);
+is(scalar @$events, 0, 'nothing ready after EPOLL_CTL_DEL');
done_testing;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# edit frontend behavior test (t/replace.t for backend)
use strict;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
'epoll_ctl socket EPOLLOUT');
my @events;
-is(epoll_wait($epfd, 100, 10000, \@events), 1, 'epoll_wait returns');
+epoll_wait($epfd, 100, 10000, \@events);
is(scalar(@events), 1, 'got one event');
-is($events[0]->[0], fileno($w), 'got expected FD');
-is($events[0]->[1], EPOLLOUT, 'got expected event');
+is($events[0], fileno($w), 'got expected FD');
close $w;
-is(epoll_wait($epfd, 100, 0, \@events), 0, 'epoll_wait timeout');
+epoll_wait($epfd, 100, 0, \@events);
+is(@events, 0, 'epoll_wait timeout');
done_testing;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Config;
+use PublicInbox::Search;
+use PublicInbox::InboxWritable;
+use Fcntl qw(:seek);
+require_git(2.6);
+require_mods(qw(json DBD::SQLite Search::Xapian));
+use_ok 'PublicInbox::ExtSearch';
+use_ok 'PublicInbox::ExtSearchIdx';
+use_ok 'PublicInbox::OverIdx';
+my $sock = tcp_server();
+my $host_port = $sock->sockhost . ':' . $sock->sockport;
+my ($home, $for_destroy) = tmpdir();
+local $ENV{HOME} = $home;
+mkdir "$home/.public-inbox" or BAIL_OUT $!;
+my $cfg_path = "$home/.public-inbox/config";
+open my $fh, '>', $cfg_path or BAIL_OUT $!;
+print $fh <<EOF or BAIL_OUT $!;
+[publicinboxMda]
+ spamcheck = none
+EOF
+close $fh or BAIL_OUT $!;
+my $v2addr = 'v2test@example.com';
+my $v1addr = 'v1test@example.com';
+ok(run_script([qw(-init -Lbasic -V2 v2test --newsgroup v2.example),
+ "$home/v2test", 'http://example.com/v2test', $v2addr ]), 'v2test init');
+my $env = { ORIGINAL_RECIPIENT => $v2addr };
+my $eml = eml_load('t/utf8.eml');
+
+$eml->header_set('List-Id', '<v2.example.com>');
+open($fh, '+>', undef) or BAIL_OUT $!;
+$fh->autoflush(1);
+print $fh $eml->as_string or BAIL_OUT $!;
+seek($fh, 0, SEEK_SET) or BAIL_OUT $!;
+
+run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda';
+
+ok(run_script([qw(-init -V1 v1test --newsgroup v1.example), "$home/v1test",
+ 'http://example.com/v1test', $v1addr ]), 'v1test init');
+
+$eml->header_set('List-Id', '<v1.example.com>');
+seek($fh, 0, SEEK_SET) or BAIL_OUT $!;
+truncate($fh, 0) or BAIL_OUT $!;
+print $fh $eml->as_string or BAIL_OUT $!;
+seek($fh, 0, SEEK_SET) or BAIL_OUT $!;
+
+$env = { ORIGINAL_RECIPIENT => $v1addr };
+run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda';
+
+run_script([qw(-index -Lbasic), "$home/v1test"]) or BAIL_OUT "index $?";
+
+ok(run_script([qw(-extindex --all), "$home/extindex"]), 'extindex init');
+{
+ my $es = PublicInbox::ExtSearch->new("$home/extindex");
+ ok($es->has_threadid, '->has_threadid');
+}
+
+{ # TODO: -extindex should write this to config
+ open $fh, '>>', $cfg_path or BAIL_OUT $!;
+ print $fh <<EOF or BAIL_OUT $!;
+; for ->ALL
+[extindex "all"]
+ topdir = $home/extindex
+EOF
+ close $fh or BAIL_OUT $!;
+
+ my $pi_cfg = PublicInbox::Config->new;
+ $pi_cfg->fill_all;
+ ok($pi_cfg->ALL, '->ALL');
+ my $ibx = $pi_cfg->{-by_newsgroup}->{'v2.example'};
+ my $ret = $pi_cfg->ALL->nntp_xref_for($ibx, $ibx->over->get_art(1));
+ is_deeply($ret, { 'v1.example' => 1, 'v2.example' => 1 },
+ '->nntp_xref_for');
+}
+
+SKIP: {
+ require_mods(qw(Net::NNTP), 1);
+ my ($out, $err) = ("$home/nntpd.out.log", "$home/nntpd.err.log");
+ my $cmd = [ '-nntpd', '-W0', "--stdout=$out", "--stderr=$err" ];
+ my $td = start_script($cmd, undef, { 3 => $sock });
+ my $n = Net::NNTP->new($host_port);
+ my @xp = $n->xpath('<testmessage@example.com>');
+ is_deeply(\@xp, [ qw(v1.example/1 v2.example/1) ]);
+ $n->group('v1.example');
+ my $res = $n->head(1);
+ @$res = grep(/^Xref: /, @$res);
+ like($res->[0], qr/ v1\.example:1 v2\.example:1/, 'nntp_xref works');
+}
+
+my $es = PublicInbox::ExtSearch->new("$home/extindex");
+{
+ my $smsg = $es->over->get_art(1);
+ ok($smsg, 'got first article');
+ is($es->over->get_art(2), undef, 'only one added');
+ my $xref3 = $es->over->get_xref3(1);
+ like($xref3->[0], qr/\A\Qv2.example\E:1:/, 'order preserved 1');
+ like($xref3->[1], qr/\A\Qv1.example\E:1:/, 'order preserved 2');
+ is(scalar(@$xref3), 2, 'only to entries');
+}
+
+if ('inbox edited') {
+ my ($in, $out, $err);
+ $in = $out = $err = '';
+ my $opt = { 0 => \$in, 1 => \$out, 2 => \$err };
+ my $env = { MAIL_EDITOR => "$^X -i -p -e 's/test message/BEST MSG/'" };
+ my $cmd = [ qw(-edit -Ft/utf8.eml), "$home/v2test" ];
+ ok(run_script($cmd, $env, $opt), '-edit');
+ ok(run_script([qw(-extindex --all), "$home/extindex"], undef, $opt),
+ 'extindex again');
+ like($err, qr/discontiguous range/, 'warned about discontiguous range');
+ my $msg1 = $es->over->get_art(1) or BAIL_OUT 'msg1 missing';
+ my $msg2 = $es->over->get_art(2) or BAIL_OUT 'msg2 missing';
+ is($msg1->{mid}, $msg2->{mid}, 'edited message indexed');
+ isnt($msg1->{blob}, $msg2->{blob}, 'blobs differ');
+ my $eml2 = $es->smsg_eml($msg2);
+ like($eml2->body, qr/BEST MSG/, 'edited body in #2');
+ unlike($eml2->body, qr/test message/, 'old body discarded in #2');
+ my $eml1 = $es->smsg_eml($msg1);
+ like($eml1->body, qr/test message/, 'original body in #1');
+ my $x1 = $es->over->get_xref3(1);
+ my $x2 = $es->over->get_xref3(2);
+ is(scalar(@$x1), 1, 'original only has one xref3');
+ is(scalar(@$x2), 1, 'new message has one xref3');
+ isnt($x1->[0], $x2->[0], 'xref3 differs');
+
+ my $mset = $es->mset('b:"BEST MSG"');
+ is($mset->size, 1, 'new message found');
+ $mset = $es->mset('b:"test message"');
+ is($mset->size, 1, 'old message found');
+ delete @$es{qw(git over xdb)}; # fork preparation
+
+ my $pi_cfg = PublicInbox::Config->new;
+ $pi_cfg->fill_all;
+ is(scalar($pi_cfg->ALL->mset('s:Testing')->items), 2,
+ '2 results in ->ALL');
+ my $res = {};
+ my $nr = 0;
+ $pi_cfg->each_inbox(sub {
+ $nr++;
+ my ($ibx) = @_;
+ local $SIG{__WARN__} = sub {}; # FIXME support --reindex
+ my $mset = $ibx->isrch->mset('s:Testing');
+ $res->{$ibx->eidx_key} = $ibx->isrch->mset_to_smsg($ibx, $mset);
+ });
+ is($nr, 2, 'two inboxes');
+ my $exp = {};
+ for my $v (qw(v1 v2)) {
+ my $ibx = $pi_cfg->lookup_newsgroup("$v.example");
+ my $smsg = $ibx->over->get_art(1);
+ $smsg->psgi_cull;
+ $exp->{"$v.example"} = [ $smsg ];
+ }
+ is_deeply($res, $exp, 'isearch limited results');
+ $pi_cfg = $res = $exp = undef;
+
+ open my $rmfh, '+>', undef or BAIL_OUT $!;
+ $rmfh->autoflush(1);
+ print $rmfh $eml2->as_string or BAIL_OUT $!;
+ seek($rmfh, 0, SEEK_SET) or BAIL_OUT $!;
+ $opt->{0} = $rmfh;
+ ok(run_script([qw(-learn rm --all)], undef, $opt), '-learn rm');
+
+ ok(run_script([qw(-extindex --all), "$home/extindex"], undef, undef),
+ 'extindex after rm');
+ is($es->over->get_art(2), undef, 'doc #2 gone');
+ $mset = $es->mset('b:"BEST MSG"');
+ is($mset->size, 0, 'new message gone');
+}
+
+my $misc = $es->misc;
+my @it = $misc->mset('')->items;
+is(scalar(@it), 2, 'two inboxes');
+like($it[0]->get_document->get_data, qr/v2test/, 'docdata matched v2');
+like($it[1]->get_document->get_data, qr/v1test/, 'docdata matched v1');
+
+my $cfg = PublicInbox::Config->new;
+my $schema_version = PublicInbox::Search::SCHEMA_VERSION();
+my $f = "$home/extindex/ei$schema_version/over.sqlite3";
+my $oidx = PublicInbox::OverIdx->new($f);
+if ('inject w/o indexing') {
+ use PublicInbox::Import;
+ my $v1ibx = $cfg->lookup_name('v1test');
+ my $last_v1_commit = $v1ibx->mm->last_commit;
+ my $v2ibx = $cfg->lookup_name('v2test');
+ my $last_v2_commit = $v2ibx->mm->last_commit_xap($schema_version, 0);
+ my $git0 = PublicInbox::Git->new("$v2ibx->{inboxdir}/git/0.git");
+ chomp(my $cmt = $git0->qx(qw(rev-parse HEAD^0)));
+ is($last_v2_commit, $cmt, 'v2 index up-to-date');
+
+ my $v2im = PublicInbox::Import->new($git0, undef, undef, $v2ibx);
+ $v2im->{lock_path} = undef;
+ $v2im->{path_type} = 'v2';
+ $v2im->add(eml_load('t/mda-mime.eml'));
+ $v2im->done;
+ chomp(my $tip = $git0->qx(qw(rev-parse HEAD^0)));
+ isnt($tip, $cmt, '0.git v2 updated');
+
+ # inject a message w/o updating index
+ rename("$home/v1test/public-inbox", "$home/v1test/skip-index") or
+ BAIL_OUT $!;
+ open(my $eh, '<', 't/iso-2202-jp.eml') or BAIL_OUT $!;
+ run_script(['-mda', '--no-precheck'], $env, { 0 => $eh}) or
+ BAIL_OUT '-mda';
+ rename("$home/v1test/skip-index", "$home/v1test/public-inbox") or
+ BAIL_OUT $!;
+
+ my ($in, $out, $err);
+ $in = $out = $err = '';
+ my $opt = { 0 => \$in, 1 => \$out, 2 => \$err };
+ ok(run_script([qw(-extindex -v -v --all), "$home/extindex"],
+ undef, undef), 'extindex noop');
+ $es->{xdb}->reopen;
+ my $mset = $es->mset('mid:199707281508.AAA24167@hoyogw.example');
+ is($mset->size, 0, 'did not attempt to index unindexed v1 message');
+ $mset = $es->mset('mid:multipart-html-sucks@11');
+ is($mset->size, 0, 'did not attempt to index unindexed v2 message');
+ ok(run_script([qw(-index --all)]), 'indexed v1 and v2 inboxes');
+
+ isnt($v1ibx->mm->last_commit, $last_v1_commit, '-index v1 worked');
+ isnt($v2ibx->mm->last_commit_xap($schema_version, 0),
+ $last_v2_commit, '-index v2 worked');
+ ok(run_script([qw(-extindex --all), "$home/extindex"]),
+ 'extindex updates');
+
+ $es->{xdb}->reopen;
+ $mset = $es->mset('mid:199707281508.AAA24167@hoyogw.example');
+ is($mset->size, 1, 'got v1 message');
+ $mset = $es->mset('mid:multipart-html-sucks@11');
+ is($mset->size, 1, 'got v2 message');
+}
+
+if ('reindex catches missed messages') {
+ my $v2ibx = $cfg->lookup_name('v2test');
+ my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0);
+ my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0);
+ my $eml = eml_load('t/data/0001.patch');
+ $im->add($eml);
+ $im->done;
+ my $cmt_b = $v2ibx->mm->last_commit_xap($schema_version, 0);
+ isnt($cmt_a, $cmt_b, 'v2 0.git HEAD updated');
+ $oidx->dbh;
+ my $uv = $v2ibx->uidvalidity;
+ my $lc_key = "lc-v2:v2.example//$uv;0";
+ is($oidx->eidx_meta($lc_key, $cmt_b), $cmt_a,
+ 'update lc-v2 meta, old is as expected');
+ my $max = $oidx->max;
+ $oidx->dbh_close;
+ ok(run_script([qw(-extindex), "$home/extindex", $v2ibx->{inboxdir}]),
+ '-extindex noop');
+ is($oidx->max, $max, '->max unchanged');
+ is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 unchanged');
+ $oidx->dbh_close;
+ my $opt = { 2 => \(my $err = '') };
+ ok(run_script([qw(-extindex --reindex), "$home/extindex",
+ $v2ibx->{inboxdir}], undef, $opt),
+ '--reindex for unseen');
+ is($oidx->max, $max + 1, '->max bumped');
+ is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 stays unchanged');
+ my @err = split(/^/, $err);
+ is(scalar(@err), 1, 'only one warning') or diag "err=$err";
+ like($err[0], qr/I: reindex_unseen/, 'got reindex_unseen message');
+ my $new = $oidx->get_art($max + 1);
+ is($new->{subject}, $eml->header('Subject'), 'new message added');
+
+ $es->{xdb}->reopen;
+ my $mset = $es->mset("mid:$new->{mid}");
+ is($mset->size, 1, 'previously unseen, now indexed in Xapian');
+
+ ok($im->remove($eml), 'remove new message from v2 inbox');
+ $im->done;
+ my $cmt_c = $v2ibx->mm->last_commit_xap($schema_version, 0);
+ is($oidx->eidx_meta($lc_key, $cmt_c), $cmt_b,
+ 'bump lc-v2 meta again to skip v2 remove');
+ $err = '';
+ $oidx->dbh_close;
+ ok(run_script([qw(-extindex --reindex), "$home/extindex",
+ $v2ibx->{inboxdir}], undef, $opt),
+ '--reindex for stale');
+ @err = split(/^/, $err);
+ is(scalar(@err), 1, 'only one warning') or diag "err=$err";
+ like($err[0], qr/\(#$new->{num}\): stale/, 'got stale message warning');
+ is($oidx->get_art($new->{num}), undef,
+ 'stale message gone from over');
+ is_deeply($oidx->get_xref3($new->{num}), [],
+ 'stale message has no xref3');
+ $es->{xdb}->reopen;
+ $mset = $es->mset("mid:$new->{mid}");
+ is($mset->size, 0, 'stale mid gone Xapian');
+}
+
+if ('reindex catches content bifurcation') {
+ use PublicInbox::MID qw(mids);
+ my $v2ibx = $cfg->lookup_name('v2test');
+ my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0);
+ my $eml = eml_load('t/data/message_embed.eml');
+ my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0);
+ $im->add($eml);
+ $im->done;
+ my $cmt_b = $v2ibx->mm->last_commit_xap($schema_version, 0);
+ my $uv = $v2ibx->uidvalidity;
+ my $lc_key = "lc-v2:v2.example//$uv;0";
+ $oidx->dbh;
+ is($oidx->eidx_meta($lc_key, $cmt_b), $cmt_a,
+ 'update lc-v2 meta, old is as expected');
+ my $mid = mids($eml)->[0];
+ my $smsg = $v2ibx->over->next_by_mid($mid, \(my $id), \(my $prev));
+ my $oldmax = $oidx->max;
+ my $x3_orig = $oidx->get_xref3(3);
+ is(scalar(@$x3_orig), 1, '#3 has one xref');
+ $oidx->add_xref3(3, $smsg->{num}, $smsg->{blob}, 'v2.example');
+ my $x3 = $oidx->get_xref3(3);
+ is(scalar(@$x3), 2, 'injected xref3');
+ $oidx->commit_lazy;
+ my $opt = { 2 => \(my $err = '') };
+ ok(run_script([qw(-extindex --all), "$home/extindex"], undef, $opt),
+ 'extindex --all is noop');
+ is($err, '', 'no warnings in index');
+ $oidx->dbh;
+ is($oidx->max, $oldmax, 'oidx->max unchanged');
+ $oidx->dbh_close;
+ ok(run_script([qw(-extindex --reindex --all), "$home/extindex"],
+ undef, $opt), 'extindex --reindex');
+ $oidx->dbh;
+ ok($oidx->max > $oldmax, 'oidx->max bumped');
+ like($err, qr/split into 2 due to deduplication change/,
+ 'bifurcation noted');
+ my $added = $oidx->get_art($oidx->max);
+ is($added->{blob}, $smsg->{blob}, 'new blob indexed');
+ is_deeply(["v2.example:$smsg->{num}:$smsg->{blob}"],
+ $oidx->get_xref3($added->{num}),
+ 'xref3 corrected for bifurcated message');
+ is_deeply($oidx->get_xref3(3), $x3_orig, 'xref3 restored for #3');
+}
+
+if ('--reindex --rethread') {
+ my $before = $oidx->dbh->selectrow_array(<<'');
+SELECT MAX(tid) FROM over WHERE num > 0
+
+ my $opt = {};
+ ok(run_script([qw(-extindex --reindex --rethread --all),
+ "$home/extindex"], undef, $opt),
+ '--rethread');
+ my $after = $oidx->dbh->selectrow_array(<<'');
+SELECT MIN(tid) FROM over WHERE num > 0
+
+ # actual rethread logic is identical to v1/v2 and tested elsewhere
+ ok($after > $before, '--rethread updates MIN(tid)');
+}
+
+if ('remove v1test and test gc') {
+ xsys([qw(git config --unset publicinbox.v1test.inboxdir)],
+ { GIT_CONFIG => $cfg_path });
+ my $opt = { 2 => \(my $err = '') };
+ ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $opt),
+ 'extindex --gc');
+ like($err, qr/^I: remove #1 v1\.example /ms, 'removed v1 message');
+ is(scalar(grep(!/^I:/, split(/^/m, $err))), 0,
+ 'no non-informational messages');
+ $misc->{xdb}->reopen;
+ @it = $misc->mset('')->items;
+ is(scalar(@it), 1, 'only one inbox left');
+}
+
+done_testing;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Ensure FakeInotify can pick up rename(2) and link(2) operations
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
{
# check initial feed
{
- my $feed = string_feed({ -inbox => $ibx });
+ my $feed = string_feed({ ibx => $ibx });
SKIP: {
skip 'XML::TreePP missing', 3 unless $have_xml_treepp;
my $t = XML::TreePP->new->parse($feed);
# check spam shows up
{
- my $spammy_feed = string_feed({ -inbox => $ibx });
+ my $spammy_feed = string_feed({ ibx => $ibx });
SKIP: {
skip 'XML::TreePP missing', 2 unless $have_xml_treepp;
my $t = XML::TreePP->new->parse($spammy_feed);
# spam no longer shows up
{
- my $feed = string_feed({ -inbox => $ibx });
+ my $feed = string_feed({ ibx => $ibx });
SKIP: {
skip 'XML::TreePP missing', 2 unless $have_xml_treepp;
my $t = XML::TreePP->new->parse($feed);
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
];
my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir,
altid => $altid });
- $f = PublicInbox::Filter::RubyLang->new(-inbox => $ibx);
+ $f = PublicInbox::Filter::RubyLang->new(ibx => $ibx);
$msg = <<'EOF';
X-Mail-Count: 12
Message-ID: <a@b>
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use PublicInbox::TestCommon;
+use Test::More;
+use Fcntl qw(:seek);
+use IO::Handle ();
+use POSIX qw(_exit);
+use Cwd qw(abs_path);
+require_mods('PublicInbox::Gcf2');
+use_ok 'PublicInbox::Gcf2';
+use PublicInbox::Import;
+my ($tmpdir, $for_destroy) = tmpdir();
+
+my $gcf2 = PublicInbox::Gcf2::new();
+is(ref($gcf2), 'PublicInbox::Gcf2', '::new works');
+my $COPYING = 'dba13ed2ddf783ee8118c6a581dbf75305f816a3';
+open my $agpl, '<', 'COPYING' or BAIL_OUT "AGPL-3 missing: $!";
+$agpl = do { local $/; <$agpl> };
+
+PublicInbox::Import::init_bare($tmpdir);
+my $fi_data = './t/git.fast-import-data';
+my $rdr = {};
+open $rdr->{0}, '<', $fi_data or BAIL_OUT $!;
+xsys([qw(git fast-import --quiet)], { GIT_DIR => $tmpdir }, $rdr);
+is($?, 0, 'fast-import succeeded');
+$gcf2->add_alternate("$tmpdir/objects");
+
+{
+ my ($r, $w);
+ pipe($r, $w) or BAIL_OUT $!;
+ my $tree = 'fdbc43725f21f485051c17463b50185f4c3cf88c';
+ $gcf2->cat_oid(fileno($w), $tree);
+ close $w;
+ is("$tree tree 30\n", <$r>, 'tree header ok');
+ $r = do { local $/; <$r> };
+ is(chop($r), "\n", 'got trailing newline');
+ is(length($r), 30, 'tree length matches');
+}
+
+chomp(my $objdir = xqx([qw(git rev-parse --git-path objects)]));
+if ($objdir =~ /\A--git-path\n/) { # git <2.5
+ chomp($objdir = xqx([qw(git rev-parse --git-dir)]));
+ $objdir .= '/objects';
+}
+if ($objdir && -d $objdir) {
+ $objdir = abs_path($objdir);
+ open my $alt, '>>', "$tmpdir/objects/info/alternates" or
+ BAIL_OUT $!;
+ print $alt $objdir, "\n" or BAIL_OUT $!;
+ close $alt or BAIL_OUT $!;
+
+ # calling gcf2->add_alternate on an already-added path won't
+ # cause alternates to be reloaded, so we do
+ # $gcf2->add_alternate($objdir) later on instead of
+ # $gcf2->add_alternate("$tmpdir/objects");
+ # $objdir = "$tmpdir/objects";
+} else {
+ $objdir = undef
+}
+
+my $nr = $ENV{TEST_LEAK_NR};
+my $cat = $ENV{TEST_LEAK_CAT} // 10;
+diag "checking for leaks... (TEST_LEAK_NR=$nr TEST_LEAK_CAT=$cat)" if $nr;
+
+SKIP: {
+ skip 'not in git worktree', 21 unless defined($objdir);
+ $gcf2->add_alternate($objdir);
+ eval { $gcf2->add_alternate($objdir) };
+ ok(!$@, 'no error adding alternate redundantly');
+ if ($nr) {
+ diag "adding alternate $nr times redundantly";
+ $gcf2->add_alternate($objdir) for (1..$nr);
+ diag 'done adding redundant alternates';
+ }
+
+ open my $fh, '+>', undef or BAIL_OUT "open: $!";
+ $fh->autoflush(1);
+
+ ok(!$gcf2->cat_oid(fileno($fh), 'invalid'), 'invalid fails');
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ is(do { local $/; <$fh> }, '', 'nothing written');
+
+ open $fh, '+>', undef or BAIL_OUT "open: $!";
+ ok(!$gcf2->cat_oid(fileno($fh), '0'x40), 'z40 fails');
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ is(do { local $/; <$fh> }, '', 'nothing written for z40');
+
+ open $fh, '+>', undef or BAIL_OUT "open: $!";
+ my $ck_copying = sub {
+ my ($desc) = @_;
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ is(<$fh>, "$COPYING blob 34520\n", "got expected header $desc");
+ my $buf = do { local $/; <$fh> };
+ is(chop($buf), "\n", 'got trailing \\n');
+ is($buf, $agpl, "AGPL matches ($desc)");
+ };
+ ok($gcf2->cat_oid(fileno($fh), $COPYING), 'cat_oid normal');
+ $ck_copying->('regular file');
+
+ $gcf2 = PublicInbox::Gcf2::new();
+ $gcf2->add_alternate("$tmpdir/objects");
+ open $fh, '+>', undef or BAIL_OUT "open: $!";
+ ok($gcf2->cat_oid(fileno($fh), $COPYING), 'cat_oid alternate');
+ $ck_copying->('alternates after reopen');
+
+ $^O eq 'linux' or skip('pipe tests are Linux-only', 14);
+ for my $blk (1, 0) {
+ my ($r, $w);
+ pipe($r, $w) or BAIL_OUT $!;
+ fcntl($w, 1031, 4096) or
+ skip('Linux too old for F_SETPIPE_SZ', 14);
+ $w->blocking($blk);
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ truncate($fh, 0) or BAIL_OUT "truncate: $!";
+ defined(my $pid = fork) or BAIL_OUT "fork: $!";
+ if ($pid == 0) {
+ close $w;
+ tick; # wait for parent to block on writev
+ my $buf = do { local $/; <$r> };
+ print $fh $buf or _exit(1);
+ _exit(0);
+ }
+ ok($gcf2->cat_oid(fileno($w), $COPYING), "cat blocking=$blk");
+ close $w or BAIL_OUT "close: $!";
+ is(waitpid($pid, 0), $pid, 'child exited');
+ is($?, 0, 'no error in child');
+ $ck_copying->("pipe blocking($blk)");
+
+ pipe($r, $w) or BAIL_OUT $!;
+ fcntl($w, 1031, 4096) or BAIL_OUT $!;
+ $w->blocking($blk);
+ close $r;
+ local $SIG{PIPE} = 'IGNORE';
+ eval { $gcf2->cat_oid(fileno($w), $COPYING) };
+ like($@, qr/writev error:/, 'got writev error');
+ }
+}
+
+if ($nr) {
+ open my $null, '>', '/dev/null' or BAIL_OUT "open /dev/null: $!";
+ my $fd = fileno($null);
+ local $SIG{PIPE} = 'IGNORE';
+ my ($r, $w);
+ pipe($r, $w);
+ close $r;
+ my $broken = fileno($w);
+ for (1..$nr) {
+ my $obj = PublicInbox::Gcf2::new();
+ if (defined($objdir)) {
+ $obj->add_alternate($objdir);
+ for (1..$cat) {
+ $obj->cat_oid($fd, $COPYING);
+ eval { $obj->cat_oid($broken, $COPYING) };
+ $obj->cat_oid($fd, '0'x40);
+ $obj->cat_oid($fd, 'invalid');
+ }
+ }
+ }
+}
+done_testing;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use PublicInbox::TestCommon;
+use Test::More;
+use Cwd qw(getcwd);
+use PublicInbox::Import;
+use PublicInbox::DS;
+
+require_mods('PublicInbox::Gcf2');
+use_ok 'PublicInbox::Gcf2Client';
+my ($tmpdir, $for_destroy) = tmpdir();
+my $git_a = "$tmpdir/a.git";
+my $git_b = "$tmpdir/b.git";
+PublicInbox::Import::init_bare($git_a);
+PublicInbox::Import::init_bare($git_b);
+my $fi_data = './t/git.fast-import-data';
+my $rdr = {};
+open $rdr->{0}, '<', $fi_data or BAIL_OUT $!;
+xsys([qw(git fast-import --quiet)], { GIT_DIR => $git_a }, $rdr);
+is($?, 0, 'fast-import succeeded');
+
+my $tree = 'fdbc43725f21f485051c17463b50185f4c3cf88c';
+my $called = 0;
+my $err_f = "$tmpdir/err";
+{
+ PublicInbox::DS->Reset;
+ open my $err, '>>', $err_f or BAIL_OUT $!;
+ my $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
+ $gcf2c->gcf2_async(\"$tree $git_a\n", sub {
+ my ($bref, $oid, $type, $size, $arg) = @_;
+ is($oid, $tree, 'got expected OID');
+ is($size, 30, 'got expected length');
+ is($type, 'tree', 'got tree type');
+ is(length($$bref), 30, 'got a tree');
+ is($arg, 'hi', 'arg passed');
+ $called++;
+ }, 'hi');
+ $gcf2c->cat_async_step($gcf2c->{inflight});
+
+ open $err, '<', $err_f or BAIL_OUT $!;
+ my $estr = do { local $/; <$err> };
+ is($estr, '', 'nothing in stderr');
+
+ my $trunc = substr($tree, 0, 39);
+ $gcf2c->gcf2_async(\"$trunc $git_a\n", sub {
+ my ($bref, $oid, $type, $size, $arg) = @_;
+ is(undef, $bref, 'missing bref is undef');
+ is($oid, $trunc, 'truncated OID printed');
+ is($type, 'missing', 'type is "missing"');
+ is($size, undef, 'size is undef');
+ is($arg, 'bye', 'arg passed when missing');
+ $called++;
+ }, 'bye');
+ $gcf2c->cat_async_step($gcf2c->{inflight});
+
+ open $err, '<', $err_f or BAIL_OUT $!;
+ $estr = do { local $/; <$err> };
+ like($estr, qr/retrying/, 'warned about retry');
+
+ # try failed alternates lookup
+ PublicInbox::DS->Reset;
+ open $err, '>', $err_f or BAIL_OUT $!;
+ $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
+ $gcf2c->gcf2_async(\"$tree $git_b\n", sub {
+ my ($bref, $oid, $type, $size, $arg) = @_;
+ is(undef, $bref, 'missing bref from alt is undef');
+ $called++;
+ });
+ $gcf2c->cat_async_step($gcf2c->{inflight});
+ open $err, '<', $err_f or BAIL_OUT $!;
+ $estr = do { local $/; <$err> };
+ like($estr, qr/retrying/, 'warned about retry before alt update');
+
+ # now try successful alternates lookup
+ open my $alt, '>>', "$git_b/objects/info/alternates" or BAIL_OUT $!;
+ print $alt "$git_a/objects\n" or BAIL_OUT $!;
+ close $alt or BAIL_OUT;
+ my $expect = xqx(['git', "--git-dir=$git_a", qw(cat-file tree), $tree]);
+ $gcf2c->gcf2_async(\"$tree $git_a\n", sub {
+ my ($bref, $oid, $type, $size, $arg) = @_;
+ is($oid, $tree, 'oid match on alternates retry');
+ is($$bref, $expect, 'tree content matched');
+ $called++;
+ });
+ $gcf2c->cat_async_step($gcf2c->{inflight});
+}
+is($called, 4, 'gcf2_async callbacks hit');
+done_testing;
#!/usr/bin/perl -w
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
is(length($$x), $size, 'read correct number of bytes');
my $ref = $gcf->qx(qw(cat-file blob), $buf);
+ is($?, 0, 'no error on scalar success');
my @ref = $gcf->qx(qw(cat-file blob), $buf);
+ is($?, 0, 'no error on wantarray success');
my $nl = scalar @ref;
ok($nl > 1, "qx returned array length of $nl");
+ is(join('', @ref), $ref, 'qx array and scalar context both work');
$gcf->qx(qw(repack -adq));
ok($gcf->packed_bytes > 0, 'packed size is positive');
+ $gcf->qx(qw(rev-parse --verify bogus));
+ isnt($?, 0, '$? set on failure'.$?);
}
SKIP: {
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# corner case tests for the generic PSGI server
# Usage: plackup [OPTIONS] /path/to/this/file
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# note: our HTTP server should be standalone and capable of running
# generic PSGI/Plack apps.
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Tests for binding Unix domain sockets
use strict;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
use_ok 'PublicInbox::IdxStack';
my $oid_a = '03c21563cf15c241687966b5b2a3f37cdc193316';
my $oid_b = '963caad026055ab9bcbe3ee9550247f9d8840feb';
+my $cmt_a = 'df8e4a0612545d53672036641e9f076efc94c2f6';
+my $cmt_b = '3ba7c9fa4a083c439e768882c571c2026a981ca5';
my $stk = PublicInbox::IdxStack->new;
is($stk->read_prepare, $stk, 'nothing');
is($stk->pop_rec, undef, 'undef on empty');
$stk = PublicInbox::IdxStack->new;
-$stk->push_rec('m', 1234, 5678, $oid_a);
+$stk->push_rec('m', 1234, 5678, $oid_a, $cmt_a);
is($stk->read_prepare, $stk, 'read_prepare');
is($stk->num_records, 1, 'num_records');
-is_deeply([$stk->pop_rec], ['m', 1234, 5678, $oid_a], 'pop once');
+is_deeply([$stk->pop_rec], ['m', 1234, 5678, $oid_a, $cmt_a], 'pop once');
is($stk->pop_rec, undef, 'undef on empty');
$stk = PublicInbox::IdxStack->new;
-$stk->push_rec('m', 1234, 5678, $oid_a);
-$stk->push_rec('d', 1234, 5678, $oid_b);
+$stk->push_rec('m', 1234, 5678, $oid_a, $cmt_a);
+$stk->push_rec('d', 1234, 5678, $oid_b, $cmt_b);
is($stk->read_prepare, $stk, 'read_prepare');
is($stk->num_records, 2, 'num_records');
-is_deeply([$stk->pop_rec], ['d', 1234, 5678, $oid_b], 'pop');
-is_deeply([$stk->pop_rec], ['m', 1234, 5678, $oid_a], 'pop-pop');
+is_deeply([$stk->pop_rec], ['d', 1234, 5678, $oid_b, $cmt_b], 'pop');
+is_deeply([$stk->pop_rec], ['m', 1234, 5678, $oid_a, $cmt_a], 'pop-pop');
is($stk->pop_rec, undef, 'empty');
SKIP: {
while (<$fh>) {
chomp;
my ($at, $ct, $H) = split(/\./);
- $stk //= PublicInbox::IdxStack->new($H);
+ $stk //= PublicInbox::IdxStack->new;
# not bothering to parse blobs here, just using commit OID
# as a blob OID since they're the same size + format
- $stk->push_rec('m', $at + 0, $ct + 0, $H);
- push(@expect, [ 'm', $at, $ct, $H ]);
+ $stk->push_rec('m', $at + 0, $ct + 0, $H, $H);
+ push(@expect, [ 'm', $at, $ct, $H, $H ]);
}
$stk or skip('nothing from git log', 3);
is($stk->read_prepare, $stk, 'read_prepare');
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# unit tests (no network) for IMAP, see t/imapd.t for end-to-end tests
use strict;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
$q = $parse->($s = qq{BEFORE 2-Oct-1993});
is_deeply($q->{sql}, \" AND ts <= $t0", 'BEFORE SQL');
$q = $parse->("FROM z $s");
- is($q->{xap}, qq{f:"z" ts:..$t0}, 'BEFORE Xapian');
+ is($q->{xap}, qq{f:"z" rt:..$t0}, 'BEFORE Xapian');
$q = $parse->($s = qq{SINCE 2-Oct-1993});
is_deeply($q->{sql}, \" AND ts >= $t0", 'SINCE SQL');
$q = $parse->("FROM z $s");
- is($q->{xap}, qq{f:"z" ts:$t0..}, 'SINCE Xapian');
+ is($q->{xap}, qq{f:"z" rt:$t0..}, 'SINCE Xapian');
$q = $parse->($s = qq{ON 2-Oct-1993});
is_deeply($q->{sql}, \" AND ts >= $t0 AND ts <= $t1", 'ON SQL');
$q = $parse->("FROM z $s");
- is($q->{xap}, qq{f:"z" ts:$t0..$t1}, 'ON Xapian');
+ is($q->{xap}, qq{f:"z" rt:$t0..$t1}, 'ON Xapian');
}
{
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use Test::More;
use strict;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# end-to-end IMAP tests, see unit tests in t/imap.t, too
use strict;
my $have_inotify = eval { require Linux::Inotify2; 1 };
-my $pi_config = PublicInbox::Config->new;
-$pi_config->each_inbox(sub {
+my $pi_cfg = PublicInbox::Config->new;
+$pi_cfg->each_inbox(sub {
my ($ibx) = @_;
my $env = { ORIGINAL_RECIPIENT => $ibx->{-primary_address} };
my $name = $ibx->{name};
SKIP: {
# do any clients use non-UID IMAP SEARCH?
- skip 'Xapian missing', 2 if $level eq 'basic';
+ skip 'Xapian missing', 3 if $level eq 'basic';
my $x = $mic->search('all');
is_deeply($x, [1, 2, 3], 'MSN SEARCH works before rm');
$x = $mic->search(qw(header subject embedded));
is_deeply($x, [2], 'MSN SEARCH on Subject works before rm');
+ $x = $mic->search('FROM scraper@example.com');
+ is_deeply($x, [], "MSN SEARCH miss won't trigger warnings");
}
{
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
use PublicInbox::Smsg;
use PublicInbox::Git;
use PublicInbox::Import;
-use PublicInbox::Spawn qw(spawn);
use Fcntl qw(:DEFAULT SEEK_SET);
use PublicInbox::TestCommon;
use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2
if ($v2) {
like($smsg->{blob}, qr/\A[a-f0-9]{40}\z/, 'got last object_id');
- my $raw_email = $smsg->{-raw_email};
- is($mime->as_string, $$raw_email, 'string matches');
- is($smsg->{raw_bytes}, length($$raw_email), 'length matches');
my @cmd = ('git', "--git-dir=$git->{git_dir}", qw(hash-object --stdin));
open my $in, '+<', undef or BAIL_OUT "open(+<): $!";
print $in $mime->as_string or die "write failed: $!";
$in->flush or die "flush failed: $!";
- seek($in, 0, SEEK_SET);
- open my $out, '+<', undef or BAIL_OUT "open(+<): $!";
- my $pid = spawn(\@cmd, {}, { 0 => $in, 1 => $out });
- is(waitpid($pid, 0), $pid, 'waitpid succeeds on hash-object');
+ seek($in, 0, SEEK_SET) or die "seek: $!";
+ chomp(my $hashed_obj = xqx(\@cmd, undef, { 0 => $in }));
is($?, 0, 'hash-object');
- seek($out, 0, SEEK_SET);
- chomp(my $hashed_obj = <$out>);
is($hashed_obj, $smsg->{blob}, "blob object_id matches exp");
}
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use Test::More;
use PublicInbox::TestCommon;
$sidx->set_metadata_once;
$sidx->idx_release; # allow watching on lockfile
}
- my $pi_config = PublicInbox::Config->new(\<<EOF);
+ my $pi_cfg = PublicInbox::Config->new(\<<EOF);
publicinbox.inbox-idle.inboxdir=$inboxdir
publicinbox.inbox-idle.indexlevel=basic
publicinbox.inbox-idle.address=test\@example.com
EOF
my $ident = 'whatever';
- $pi_config->each_inbox(sub { shift->subscribe_unlock($ident, $obj) });
- my $ii = PublicInbox::InboxIdle->new($pi_config);
+ $pi_cfg->each_inbox(sub { shift->subscribe_unlock($ident, $obj) });
+ my $ii = PublicInbox::InboxIdle->new($pi_cfg);
ok($ii, 'InboxIdle created');
SKIP: {
skip('inotify or kqueue missing', 1) unless $ii->{sock};
PublicInbox::SearchIdx->new($ibx)->index_sync if $V == 1;
$ii->event_step;
is(scalar @{$obj->{called}}, 1, 'called on unlock');
- $pi_config->each_inbox(sub { shift->unsubscribe_unlock($ident) });
+ $pi_cfg->each_inbox(sub { shift->unsubscribe_unlock($ident) });
ok($im->add(eml_load('t/data/0001.patch')), "$V added #2");
$im->done;
PublicInbox::SearchIdx->new($ibx)->index_sync if $V == 1;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
use Test::More;
use PublicInbox::TestCommon;
-use PublicInbox::Import;
use PublicInbox::Config;
use PublicInbox::Admin;
use File::Path qw(remove_tree);
print $w $data or die;
close $w or die;
my $cmd = ['git', "--git-dir=$v1dir", 'fast-import', '--quiet'];
- PublicInbox::Import::run_die($cmd, undef, { 0 => $r });
+ xsys_e($cmd, undef, { 0 => $r });
}
run_script(['-index', '--skip-docdata', $v1dir]) or die 'v1 index failed';
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
local $ENV{PI_TEST_VERSION} = 1;
require './t/indexlevels-mirror.t';
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
$import_index_incremental->($PI_TEST_VERSION, 'basic', $mime);
SKIP: {
- require PublicInbox::Search;
- PublicInbox::Search::load_xapian() or
- skip('Xapian perl binding missing', 2);
+ require_mods(qw(Search::Xapian), 2);
foreach my $l (qw(medium full)) {
$import_index_incremental->($PI_TEST_VERSION, $l, $mime);
}
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use Fcntl qw(SEEK_SET);
+use Digest::SHA qw(sha1_hex);
+require_mods(qw(Storable||Sereal));
+require_ok 'PublicInbox::IPC';
+state $once = eval <<'';
+package PublicInbox::IPC;
+use strict;
+use Digest::SHA qw(sha1_hex);
+sub test_array { qw(test array) }
+sub test_scalar { 'scalar' }
+sub test_scalarref { \'scalarref' }
+sub test_undef { undef }
+sub test_die { shift; die @_; 'unreachable' }
+sub test_pid { $$ }
+sub test_write_each_fd {
+ my ($self, @args) = @_;
+ for my $fd (0..2) {
+ print { $self->{$fd} } "i=$fd $$ ", @args, "\n";
+ $self->{$fd}->flush;
+ }
+}
+sub test_sha {
+ my ($self, $buf) = @_;
+ print { $self->{1} } sha1_hex($buf), "\n";
+ $self->{1}->flush;
+}
+1;
+
+my $ipc = bless {}, 'PublicInbox::IPC';
+my @t = qw(array scalar scalarref undef);
+my $test = sub {
+ my $x = shift;
+ my @res;
+ for my $type (@t) {
+ my $m = "test_$type";
+ my @ret = $ipc->ipc_do($m);
+ my @exp = $ipc->$m;
+ is_deeply(\@ret, \@exp, "wantarray $m $x");
+
+ $ipc->ipc_do($m);
+
+ $ipc->ipc_async($m, [], sub { push @res, \@_ }, \$m);
+
+ my $ret = $ipc->ipc_do($m);
+ my $exp = $ipc->$m;
+ is_deeply($ret, $exp, "!wantarray $m $x");
+
+ is_deeply(\@res, [ [ \$m, \@exp ] ], "async $m $x");
+ @res = ();
+ }
+ $ipc->ipc_async_wait(-1);
+ is_deeply(\@res, [], 'no leftover results');
+ $ipc->ipc_async('test_die', ['die test'],
+ sub { push @res, \@_ }, 'die arg');
+ $ipc->ipc_async_wait(1);
+ is(scalar(@res), 1, 'only one result');
+ is(scalar(@{$res[0]}), 2, 'result has 2-element array');
+ is($res[0]->[0], 'die arg', 'got async die arg '.$x);
+ is(ref($res[0]->[1]), 'PublicInbox::IPC::Die',
+ "exception type $x");
+ {
+ my $nr = PublicInbox::IPC::PIPE_BUF();
+ my $count = 0;
+ my $cb = sub { ++$count };
+ $ipc->ipc_async('test_undef', [], $cb) for (1..$nr);
+ $ipc->ipc_async_wait(-1);
+ is($count, $nr, "$x async runs w/o deadlock");
+ }
+
+ my $ret = eval { $ipc->test_die('phail') };
+ my $exp = $@;
+ $ret = eval { $ipc->ipc_do('test_die', 'phail') };
+ my $err = $@;
+ my %lines;
+ for ($err, $exp) {
+ s/ line (\d+).*//s and $lines{$1}++;
+ }
+ is(scalar keys %lines, 1, 'line numbers match');
+ is((values %lines)[0], 2, '2 hits on same line number');
+ is($err, $exp, "$x die matches");
+ is($ret, undef, "$x die did not return");
+
+ eval { $ipc->test_die(['arrayref']) };
+ $exp = $@;
+ $ret = eval { $ipc->ipc_do('test_die', ['arrayref']) };
+ $err = $@;
+ is_deeply($err, $exp, 'die with unblessed ref');
+ is(ref($err), 'ARRAY', 'got an array ref');
+
+ $exp = bless ['blessed'], 'PublicInbox::WTF';
+ $ret = eval { $ipc->ipc_do('test_die', $exp) };
+ $err = $@;
+ is_deeply($err, $exp, 'die with blessed ref');
+ is(ref($err), 'PublicInbox::WTF', 'got blessed ref');
+};
+$test->('local');
+
+{
+ my $pid = $ipc->ipc_worker_spawn('test worker');
+ ok($pid > 0 && kill(0, $pid), 'worker spawned and running');
+ defined($pid) or BAIL_OUT 'no spawn, no test';
+ is($ipc->ipc_do('test_pid'), $pid, 'worker pid returned');
+ $test->('worker');
+ {
+ my ($tmp, $for_destroy) = tmpdir();
+ $ipc->ipc_lock_init("$tmp/lock");
+ is($ipc->ipc_do('test_pid'), $pid, 'worker pid returned');
+ }
+ $ipc->ipc_worker_stop;
+ ok(!kill(0, $pid) && $!{ESRCH}, 'worker stopped');
+}
+$ipc->ipc_worker_stop; # idempotent
+
+# work queues
+pipe(my ($ra, $wa)) or BAIL_OUT $!;
+pipe(my ($rb, $wb)) or BAIL_OUT $!;
+pipe(my ($rc, $wc)) or BAIL_OUT $!;
+open my $warn, '+>', undef or BAIL_OUT;
+$warn->autoflush(0);
+local $SIG{__WARN__} = sub { print $warn "PID:$$ ", @_ };
+my @ppids;
+open my $agpl, '<', 'COPYING' or BAIL_OUT "AGPL-3 missing: $!";
+my $big = do { local $/; <$agpl> } // BAIL_OUT "read: $!";
+close $agpl or BAIL_OUT "close: $!";
+
+for my $t ('local', 'worker', 'worker again') {
+ $ipc->wq_do('test_write_each_fd', [ $wa, $wb, $wc ], 'hello world');
+ my $i = 0;
+ for my $fh ($ra, $rb, $rc) {
+ my $buf = readline($fh);
+ is(chop($buf), "\n", "trailing CR ($t)");
+ like($buf, qr/\Ai=$i \d+ hello world\z/, "got expected ($t)");
+ $i++;
+ }
+ $ipc->wq_do('test_die', [ $wa, $wb, $wc ]);
+ $ipc->wq_do('test_sha', [ $wa, $wb ], 'hello world');
+ is(readline($rb), sha1_hex('hello world')."\n", "SHA small ($t)");
+ {
+ my $bigger = $big x 10;
+ $ipc->wq_do('test_sha', [ $wa, $wb ], $bigger);
+ my $exp = sha1_hex($bigger)."\n";
+ undef $bigger;
+ is(readline($rb), $exp, "SHA big ($t)");
+ }
+ my $ppid = $ipc->wq_workers_start('wq', 1);
+ push(@ppids, $ppid);
+}
+
+# wq_do works across fork (siblings can feed)
+SKIP: {
+ skip 'Socket::MsgHdr or Inline::C missing', 3 if !$ppids[0];
+ is_deeply(\@ppids, [$$, undef, undef],
+ 'parent pid returned in wq_workers_start');
+ my $pid = fork // BAIL_OUT $!;
+ if ($pid == 0) {
+ use POSIX qw(_exit);
+ $ipc->wq_do('test_write_each_fd', [ $wa, $wb, $wc ], $$);
+ _exit(0);
+ } else {
+ my $i = 0;
+ my ($wpid, @rest) = keys %{$ipc->{-wq_workers}};
+ is(scalar(@rest), 0, 'only one worker');
+ for my $fh ($ra, $rb, $rc) {
+ my $buf = readline($fh);
+ is(chop($buf), "\n", "trailing CR #$i");
+ like($buf, qr/^i=$i $wpid $pid\z/,
+ 'got expected from sibling');
+ $i++;
+ }
+ is(waitpid($pid, 0), $pid, 'waitpid complete');
+ is($?, 0, 'child wq producer exited');
+ }
+}
+
+$ipc->wq_close;
+SKIP: {
+ skip 'Socket::MsgHdr or Inline::C missing', 11 if !$ppids[0];
+ seek($warn, 0, SEEK_SET) or BAIL_OUT;
+ my @warn = <$warn>;
+ is(scalar(@warn), 3, 'warned 3 times');
+ like($warn[0], qr/ wq_do: /, '1st warned from wq_do');
+ like($warn[1], qr/ wq_worker: /, '2nd warned from wq_worker');
+ is($warn[2], $warn[1], 'worker did not die');
+
+ $SIG{__WARN__} = 'DEFAULT';
+ is($ipc->wq_workers_start('wq', 1), $$, 'workers started again');
+ is($ipc->wq_workers, 1, '1 worker started');
+ SKIP: {
+ $ipc->WQ_MAX_WORKERS > 1 or
+ skip 'Inline::C or Socket::MsgHdr not available', 4;
+ $ipc->wq_worker_incr;
+ is($ipc->wq_workers, 2, 'worker count bumped');
+ $ipc->wq_worker_decr;
+ $ipc->wq_worker_decr_wait(10);
+ is($ipc->wq_workers, 1, 'worker count lowered');
+ is($ipc->wq_workers(2), 2, 'worker count set');
+ is($ipc->wq_workers, 2, 'worker count stayed set');
+ }
+ $ipc->wq_close;
+ is($ipc->wq_workers, undef, 'workers undef after close');
+}
+
+done_testing;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Ensure KQNotify can pick up rename(2) and link(2) operations
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use PublicInbox::TestCommon;
+local $ENV{TEST_LEI_ONESHOT} = '1';
+require './t/lei.t';
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Config;
+use File::Path qw(rmtree);
+use Fcntl qw(SEEK_SET);
+use PublicInbox::Spawn qw(which);
+require_git 2.6;
+require_mods(qw(json DBD::SQLite Search::Xapian));
+my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
+my ($home, $for_destroy) = tmpdir();
+my $err_filter;
+my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/
+ http://czquwvybam4bgbro.onion/meta/
+ http://ou63pmih66umazou.onion/meta/);
+my $lei = sub {
+ my ($cmd, $env, $xopt) = @_;
+ $out = $err = '';
+ if (!ref($cmd)) {
+ ($env, $xopt) = grep { (!defined) || ref } @_;
+ $cmd = [ grep { defined && !ref } @_ ];
+ }
+ my $res = run_script(['lei', @$cmd], $env, $xopt // $opt);
+ $err_filter and
+ $err = join('', grep(!/$err_filter/, split(/^/m, $err)));
+ $res;
+};
+
+delete local $ENV{XDG_DATA_HOME};
+delete local $ENV{XDG_CONFIG_HOME};
+local $ENV{GIT_COMMITTER_EMAIL} = 'lei@example.com';
+local $ENV{GIT_COMMITTER_NAME} = 'lei user';
+local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
+local $ENV{HOME} = $home;
+local $ENV{FOO} = 'BAR';
+mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
+my $home_trash = [ "$home/.local", "$home/.config" ];
+my $cleanup = sub { rmtree([@$home_trash, @_]) };
+my $config_file = "$home/.config/lei/config";
+my $store_dir = "$home/.local/share/lei";
+
+my $test_help = sub {
+ ok(!$lei->(), 'no args fails');
+ is($? >> 8, 1, '$? is 1');
+ is($out, '', 'nothing in stdout');
+ like($err, qr/^usage:/sm, 'usage in stderr');
+
+ for my $arg (['-h'], ['--help'], ['help'], [qw(daemon-pid --help)]) {
+ ok($lei->($arg), "lei @$arg");
+ like($out, qr/^usage:/sm, "usage in stdout (@$arg)");
+ is($err, '', "nothing in stderr (@$arg)");
+ }
+
+ for my $arg ([''], ['--halp'], ['halp'], [qw(daemon-pid --halp)]) {
+ ok(!$lei->($arg), "lei @$arg");
+ is($? >> 8, 1, '$? set correctly');
+ isnt($err, '', 'something in stderr');
+ is($out, '', 'nothing in stdout');
+ }
+ ok($lei->(qw(init -h)), 'init -h');
+ like($out, qr! \Q$home\E/\.local/share/lei/store\b!,
+ 'actual path shown in init -h');
+ ok($lei->(qw(init -h), { XDG_DATA_HOME => '/XDH' }),
+ 'init with XDG_DATA_HOME');
+ like($out, qr! /XDH/lei/store\b!, 'XDG_DATA_HOME in init -h');
+ is($err, '', 'no errors from init -h');
+
+ ok($lei->(qw(config -h)), 'config-h');
+ like($out, qr! \Q$home\E/\.config/lei/config\b!,
+ 'actual path shown in config -h');
+ ok($lei->(qw(config -h), { XDG_CONFIG_HOME => '/XDC' }),
+ 'config with XDG_CONFIG_HOME');
+ like($out, qr! /XDC/lei/config\b!, 'XDG_CONFIG_HOME in config -h');
+ is($err, '', 'no errors from config -h');
+};
+
+my $ok_err_info = sub {
+ my ($msg) = @_;
+ is(grep(!/^I:/, split(/^/, $err)), 0, $msg) or
+ diag "$msg: err=$err";
+};
+
+my $test_init = sub {
+ $cleanup->();
+ ok($lei->('init'), 'init w/o args');
+ $ok_err_info->('after init w/o args');
+ ok($lei->('init'), 'idempotent init w/o args');
+ $ok_err_info->('after idempotent init w/o args');
+
+ ok(!$lei->('init', "$home/x"), 'init conflict');
+ is(grep(/^E:/, split(/^/, $err)), 1, 'got error on conflict');
+ ok(!-e "$home/x", 'nothing created on conflict');
+ $cleanup->();
+
+ ok($lei->('init', "$home/x"), 'init conflict resolved');
+ $ok_err_info->('init w/ arg');
+ ok($lei->('init', "$home/x"), 'init idempotent w/ path');
+ $ok_err_info->('init idempotent w/ arg');
+ ok(-d "$home/x", 'created dir');
+ $cleanup->("$home/x");
+
+ ok(!$lei->('init', "$home/x", "$home/2"), 'too many args fails');
+ like($err, qr/too many/, 'noted excessive');
+ ok(!-e "$home/x", 'x not created on excessive');
+ for my $d (@$home_trash) {
+ my $base = (split(m!/!, $d))[-1];
+ ok(!-d $d, "$base not created");
+ }
+ is($out, '', 'nothing in stdout on init failure');
+};
+
+my $test_config = sub {
+ $cleanup->();
+ ok($lei->(qw(config a.b c)), 'config set var');
+ is($out.$err, '', 'no output on var set');
+ ok($lei->(qw(config -l)), 'config -l');
+ is($err, '', 'no errors on listing');
+ is($out, "a.b=c\n", 'got expected output');
+ ok(!$lei->(qw(config -f), "$home/.config/f", qw(x.y z)),
+ 'config set var with -f fails');
+ like($err, qr/not supported/, 'not supported noted');
+ ok(!-f "$home/config/f", 'no file created');
+};
+
+my $setup_publicinboxes = sub {
+ state $done = '';
+ return if $done eq $home;
+ use PublicInbox::InboxWritable;
+ for my $V (1, 2) {
+ run_script([qw(-init), "-V$V", "t$V",
+ '--newsgroup', "t.$V",
+ "$home/t$V", "http://example.com/t$V",
+ "t$V\@example.com" ]) or BAIL_OUT "init v$V";
+ }
+ my $cfg = PublicInbox::Config->new;
+ my $seen = 0;
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
+ my $V = $ibx->version;
+ my @eml = glob('t/*.eml');
+ push(@eml, 't/data/0001.patch') if $V == 2;
+ for (@eml) {
+ next if $_ eq 't/psgi_v2-old.eml'; # dup mid
+ $im->add(eml_load($_)) or BAIL_OUT "v$V add $_";
+ $seen++;
+ }
+ $im->done;
+ if ($V == 1) {
+ run_script(['-index', $ibx->{inboxdir}]) or
+ BAIL_OUT 'index v1';
+ }
+ });
+ $done = $home;
+ $seen || BAIL_OUT 'no imports';
+};
+
+my $test_external_remote = sub {
+ my ($url, $k) = @_;
+SKIP: {
+ my $nr = 4;
+ skip "$k unset", $nr if !$url;
+ which('curl') or skip 'no curl', $nr;
+ which('torsocks') or skip 'no torsocks', $nr if $url =~ m!\.onion/!;
+ $lei->('ls-external');
+ for my $e (split(/^/ms, $out)) {
+ $e =~ s/\s+boost.*//s;
+ $lei->('forget-external', '-q', $e) or
+ fail "error forgetting $e: $err"
+ }
+ $lei->('add-external', $url);
+ my $mid = '20140421094015.GA8962@dcvr.yhbt.net';
+ ok($lei->('q', "m:$mid"), "query $url");
+ is($err, '', "no errors on $url");
+ my $res = PublicInbox::Config->json->decode($out);
+ is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url");
+ ok($lei->('q', "m:$mid", 'd:..20101002'), 'no results, no error');
+ like($err, qr/404/, 'noted 404');
+ is($out, "[null]\n", 'got null results');
+ $lei->('forget-external', $url);
+} # /SKIP
+}; # /sub
+
+my $test_external = sub {
+ $setup_publicinboxes->();
+ $cleanup->();
+ $lei->('ls-external');
+ is($out.$err, '', 'ls-external no output, yet');
+ ok(!-e $config_file && !-e $store_dir,
+ 'nothing created by ls-external');
+
+ ok(!$lei->('add-external', "$home/nonexistent"),
+ "fails on non-existent dir");
+ $lei->('ls-external');
+ is($out.$err, '', 'ls-external still has no output');
+ my $cfg = PublicInbox::Config->new;
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ ok($lei->(qw(add-external -q), $ibx->{inboxdir}),
+ 'added external');
+ is($out.$err, '', 'no output');
+ });
+ ok(-s $config_file && -e $store_dir,
+ 'add-external created config + store');
+ my $lcfg = PublicInbox::Config->new($config_file);
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ is($lcfg->{"external.$ibx->{inboxdir}.boost"}, 0,
+ "configured boost on $ibx->{name}");
+ });
+ $lei->('ls-external');
+ like($out, qr/boost=0\n/s, 'ls-external has output');
+ ok($lei->(qw(add-external -q https://EXAMPLE.com/ibx)), 'add remote');
+ is($err, '', 'no warnings after add-external');
+ $lei->('ls-external');
+ like($out, qr!https://example\.com/ibx/!s, 'added canonical URL');
+ is($err, '', 'no warnings on ls-external');
+ ok($lei->(qw(forget-external -q https://EXAMPLE.com/ibx)),
+ 'forget');
+ $lei->('ls-external');
+ unlike($out, qr!https://example\.com/ibx/!s, 'removed canonical URL');
+
+ ok(!$lei->(qw(q s:prefix -o /dev/null -f maildir)), 'bad maildir');
+ like($err, qr!/dev/null exists and is not a directory!,
+ 'error shown');
+ is($? >> 8, 1, 'errored out with exit 1');
+
+ ok(!$lei->(qw(q s:prefix -f mboxcl2 -o), $home), 'bad mbox');
+ like($err, qr!\Q$home\E exists and is not a writable file!,
+ 'error shown');
+ is($? >> 8, 1, 'errored out with exit 1');
+
+ ok(!$lei->(qw(q s:prefix -o /dev/stdout -f Mbox2)), 'bad format');
+ like($err, qr/bad mbox --format=mbox2/, 'error shown');
+ is($? >> 8, 1, 'errored out with exit 1');
+
+ # note, on a Bourne shell users should be able to use either:
+ # s:"use boolean prefix"
+ # "s:use boolean prefix"
+ # or use single quotes, it should not matter. Users only need
+ # to know shell quoting rules, not Xapian quoting rules.
+ # No double-quoting should be imposed on users on the CLI
+ $lei->('q', 's:use boolean prefix');
+ like($out, qr/search: use boolean prefix/, 'phrase search got result');
+ require IO::Uncompress::Gunzip;
+ for my $sfx ('', '.gz') {
+ my $f = "$home/mbox$sfx";
+ $lei->('q', '-o', "mboxcl2:$f", 's:use boolean prefix');
+ my $cat = $sfx eq '' ? sub {
+ open my $mb, '<', $f or fail "no mbox: $!";
+ <$mb>
+ } : sub {
+ my $z = IO::Uncompress::Gunzip->new($f, MultiStream=>1);
+ <$z>;
+ };
+ my @s = grep(/^Subject:/, $cat->());
+ is(scalar(@s), 1, "1 result in mbox$sfx");
+ $lei->('q', '-a', '-o', "mboxcl2:$f", 's:see attachment');
+ is($err, '', 'no errors from augment');
+ @s = grep(/^Subject:/, my @wtf = $cat->());
+ is(scalar(@s), 2, "2 results in mbox$sfx");
+
+ $lei->('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent');
+ is($err, '', "no errors on no results ($sfx)");
+
+ my @s2 = grep(/^Subject:/, $cat->());
+ is_deeply(\@s2, \@s,
+ "same 2 old results w/ --augment and bad search $sfx");
+
+ $lei->('q', '-o', "mboxcl2:$f", 's:nonexistent');
+ my @res = $cat->();
+ is_deeply(\@res, [], "clobber w/o --augment $sfx");
+ }
+ ok(!$lei->('q', '-o', "$home/mbox", 's:nope'),
+ 'fails if mbox format unspecified');
+ ok(!$lei->(qw(q --no-local s:see)), '--no-local');
+ is($? >> 8, 1, 'proper exit code');
+ like($err, qr/no local or remote.+? to search/, 'no inbox');
+ my %e = (
+ TEST_LEI_EXTERNAL_HTTPS => 'https://public-inbox.org/meta/',
+ TEST_LEI_EXTERNAL_ONION => $onions[int(rand(scalar(@onions)))],
+ );
+ for my $k (keys %e) {
+ my $url = $ENV{$k} // '';
+ $url = $e{$k} if $url eq '1';
+ $test_external_remote->($url, $k);
+ }
+};
+
+my $test_lei_common = sub {
+ $test_help->();
+ $test_config->();
+ $test_init->();
+ $test_external->();
+};
+
+if ($ENV{TEST_LEI_ONESHOT}) {
+ require_ok 'PublicInbox::LEI';
+ # force sun_path[108] overflow, ($lei->() filters out this path)
+ my $xrd = "$home/1shot-test".('.sun_path' x 108);
+ local $ENV{XDG_RUNTIME_DIR} = $xrd;
+ $err_filter = qr!\Q$xrd!;
+ $test_lei_common->();
+} else {
+SKIP: { # real socket
+ eval { require Socket::MsgHdr; 1 } // do {
+ require PublicInbox::Spawn;
+ PublicInbox::Spawn->can('send_cmd4');
+ } // skip 'Socket::MsgHdr or Inline::C missing or unconfigured', 115;
+ local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
+ my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/5.seq.sock";
+ my $err_log = "$ENV{XDG_RUNTIME_DIR}/lei/errors.log";
+
+ ok($lei->('daemon-pid'), 'daemon-pid');
+ is($err, '', 'no error from daemon-pid');
+ like($out, qr/\A[0-9]+\n\z/s, 'pid returned') or BAIL_OUT;
+ chomp(my $pid = $out);
+ ok(kill(0, $pid), 'pid is valid');
+ ok(-S $sock, 'sock created');
+
+ $test_lei_common->();
+ is(-s $err_log, 0, 'nothing in errors.log');
+ open my $efh, '>>', $err_log or BAIL_OUT $!;
+ print $efh "phail\n" or BAIL_OUT $!;
+ close $efh or BAIL_OUT $!;
+
+ ok($lei->('daemon-pid'), 'daemon-pid');
+ chomp(my $pid_again = $out);
+ is($pid, $pid_again, 'daemon-pid idempotent');
+ like($err, qr/phail/, 'got mock "phail" error previous run');
+
+ ok($lei->(qw(daemon-kill)), 'daemon-kill');
+ is($out, '', 'no output from daemon-kill');
+ is($err, '', 'no error from daemon-kill');
+ for (0..100) {
+ kill(0, $pid) or last;
+ tick();
+ }
+ ok(-S $sock, 'sock still exists');
+ ok(!kill(0, $pid), 'pid gone after stop');
+
+ ok($lei->(qw(daemon-pid)), 'daemon-pid');
+ chomp(my $new_pid = $out);
+ ok(kill(0, $new_pid), 'new pid is running');
+ ok(-S $sock, 'sock still exists');
+
+ for my $sig (qw(-0 -CHLD)) {
+ ok($lei->('daemon-kill', $sig), "handles $sig");
+ }
+ is($out.$err, '', 'no output on innocuous signals');
+ ok($lei->('daemon-pid'), 'daemon-pid');
+ chomp $out;
+ is($out, $new_pid, 'PID unchanged after -0/-CHLD');
+
+ if ('socket inaccessible') {
+ chmod 0000, $sock or BAIL_OUT "chmod 0000: $!";
+ ok($lei->('help'), 'connect fail, one-shot fallback works');
+ like($err, qr/\bconnect\(/, 'connect error noted');
+ like($out, qr/^usage: /, 'help output works');
+ chmod 0700, $sock or BAIL_OUT "chmod 0700: $!";
+ }
+ unlink $sock or BAIL_OUT "unlink($sock) $!";
+ for (0..100) {
+ kill('CHLD', $new_pid) or last;
+ tick();
+ }
+ ok(!kill(0, $new_pid), 'daemon exits after unlink');
+ # success over socket, can't test without
+}; # SKIP
+} # else
+
+done_testing;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Eml;
+use PublicInbox::Smsg;
+require_mods(qw(DBD::SQLite));
+use_ok 'PublicInbox::LeiDedupe';
+my $eml = eml_load('t/plack-qp.eml');
+my $mid = $eml->header_raw('Message-ID');
+my $different = eml_load('t/msg_iter-order.eml');
+$different->header_set('Message-ID', $mid);
+my $smsg = bless { ds => time }, 'PublicInbox::Smsg';
+$smsg->populate($eml);
+$smsg->{$_} //= '' for (qw(to cc references)) ;
+
+my $check_storable = sub {
+ my ($x) = @_;
+ SKIP: {
+ require_mods('Storable', 1);
+ my $dup = Storable::thaw(Storable::freeze($x));
+ is_deeply($dup, $x, "$x->[3] round-trips through storable");
+ }
+};
+
+my $lei = { opt => { dedupe => 'none' } };
+my $dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
+$dd->prepare_dedupe;
+ok(!$dd->is_dup($eml), '1st is_dup w/o dedupe');
+ok(!$dd->is_dup($eml), '2nd is_dup w/o dedupe');
+ok(!$dd->is_dup($different), 'different is_dup w/o dedupe');
+ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 1');
+ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 2');
+
+for my $strat (undef, 'content') {
+ $lei->{opt}->{dedupe} = $strat;
+ $dd = PublicInbox::LeiDedupe->new($lei);
+ $check_storable->($dd);
+ $dd->prepare_dedupe;
+ my $desc = $strat // 'default';
+ ok(!$dd->is_dup($eml), "1st is_dup with $desc dedupe");
+ ok($dd->is_dup($eml), "2nd seen with $desc dedupe");
+ ok(!$dd->is_dup($different), "different is_dup with $desc dedupe");
+ ok(!$dd->is_smsg_dup($smsg), "is_smsg_dup pass w/ $desc dedupe");
+ ok($dd->is_smsg_dup($smsg), "is_smsg_dup reject w/ $desc dedupe");
+}
+$lei->{opt}->{dedupe} = 'bogus';
+eval { PublicInbox::LeiDedupe->new($lei) };
+like($@, qr/unsupported.*bogus/, 'died on bogus strategy');
+
+$lei->{opt}->{dedupe} = 'mid';
+$dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
+$dd->prepare_dedupe;
+ok(!$dd->is_dup($eml), '1st is_dup with mid dedupe');
+ok($dd->is_dup($eml), '2nd seen with mid dedupe');
+ok($dd->is_dup($different), 'different seen with mid dedupe');
+ok(!$dd->is_smsg_dup($smsg), 'smsg mid dedupe pass');
+ok($dd->is_smsg_dup($smsg), 'smsg mid dedupe reject');
+
+$lei->{opt}->{dedupe} = 'oid';
+$dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
+$dd->prepare_dedupe;
+
+# --augment won't have OIDs:
+ok(!$dd->is_dup($eml), '1st is_dup with oid dedupe (augment)');
+ok($dd->is_dup($eml), '2nd seen with oid dedupe (augment)');
+ok(!$dd->is_dup($different), 'different is_dup with mid dedupe (augment)');
+$different->header_set('Status', 'RO');
+ok($dd->is_dup($different), 'different seen with oid dedupe Status removed');
+
+ok(!$dd->is_dup($eml, '01d'), '1st is_dup with oid dedupe');
+ok($dd->is_dup($different, '01d'), 'different content ignored if oid matches');
+ok($dd->is_dup($eml, '01D'), 'case insensitive oid comparison :P');
+ok(!$dd->is_dup($eml, '01dbad'), 'case insensitive oid comparison :P');
+
+$smsg->{blob} = 'dead';
+ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe pass');
+ok($dd->is_smsg_dup($smsg), 'smsg dedupe reject');
+
+done_testing;
--- /dev/null
+#!perl -w
+use strict;
+use v5.10.1;
+use Test::More;
+my $cls = 'PublicInbox::LeiExternal';
+require_ok $cls;
+my $canon = $cls->can('_canonicalize');
+my $exp = 'https://example.com/my-inbox/';
+is($canon->('https://example.com/my-inbox'), $exp, 'trailing slash added');
+is($canon->('https://example.com/my-inbox//'), $exp, 'trailing slash removed');
+is($canon->('https://example.com//my-inbox/'), $exp, 'leading slash removed');
+is($canon->('https://EXAMPLE.com/my-inbox/'), $exp, 'lowercased');
+is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent',
+ 'non-existent pathname canonicalized');
+is($canon->('/this//path/'), '/this/path', 'extra slashes gone');
+is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved');
+
+done_testing;
--- /dev/null
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use POSIX qw(_exit);
+require_ok 'PublicInbox::LeiOverview';
+
+my $ovv = bless {}, 'PublicInbox::LeiOverview';
+$ovv->ovv_out_lk_init;
+my $lock_path = $ovv->{lock_path};
+ok(-f $lock_path, 'lock init');
+undef $ovv;
+ok(!-f $lock_path, 'lock DESTROY');
+
+$ovv = bless {}, 'PublicInbox::LeiOverview';
+$ovv->ovv_out_lk_init;
+$lock_path = $ovv->{lock_path};
+ok(-f $lock_path, 'lock init #2');
+my $pid = fork // BAIL_OUT "fork $!";
+if ($pid == 0) {
+ undef $ovv;
+ _exit(0);
+}
+is(waitpid($pid, 0), $pid, 'child exited');
+is($?, 0, 'no error in child process');
+ok(-f $lock_path, 'lock was not destroyed by child');
+undef $ovv;
+ok(!-f $lock_path, 'lock DESTROY #2');
+
+done_testing;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+require_mods(qw(DBD::SQLite Search::Xapian));
+require_git 2.6;
+require_ok 'PublicInbox::LeiStore';
+require_ok 'PublicInbox::ExtSearch';
+my ($home, $for_destroy) = tmpdir();
+my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
+my $store_dir = "$home/lst";
+local $ENV{GIT_COMMITTER_EMAIL} = 'lei@example.com';
+local $ENV{GIT_COMMITTER_NAME} = 'lei user';
+my $lst = PublicInbox::LeiStore->new($store_dir, { creat => 1 });
+ok($lst, '->new');
+my $smsg = $lst->add_eml(eml_load('t/data/0001.patch'));
+like($smsg->{blob}, qr/\A[0-9a-f]+\z/, 'add returned OID');
+my $eml = eml_load('t/data/0001.patch');
+is($lst->add_eml($eml), undef, 'idempotent');
+$lst->done;
+is_deeply([$lst->mbox_keywords($eml)], [], 'no keywords');
+$eml->header_set('Status', 'RO');
+is_deeply([$lst->mbox_keywords($eml)], ['seen'], 'seen extracted');
+$eml->header_set('X-Status', 'A');
+is_deeply([$lst->mbox_keywords($eml)], [qw(answered seen)],
+ 'seen+answered extracted');
+$eml->header_set($_) for qw(Status X-Status);
+
+is_deeply([$lst->maildir_keywords('/foo:2,')], [], 'Maildir no keywords');
+is_deeply([$lst->maildir_keywords('/foo:2,S')], ['seen'], 'Maildir seen');
+is_deeply([$lst->maildir_keywords('/foo:2,RS')], ['answered', 'seen'],
+ 'Maildir answered + seen');
+is_deeply([$lst->maildir_keywords('/foo:2,RSZ')], ['answered', 'seen'],
+ 'Maildir answered + seen w/o Z');
+{
+ my $es = $lst->search;
+ my $msgs = $es->over->query_xover(0, 1000);
+ is(scalar(@$msgs), 1, 'one message');
+ is($msgs->[0]->{blob}, $smsg->{blob}, 'blob matches');
+ my $mset = $es->mset("mid:$msgs->[0]->{mid}");
+ is($mset->size, 1, 'search works');
+ is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ],
+ 'mset_to_artnums');
+ my @kw = $es->msg_keywords(($mset->items)[0]);
+ is_deeply(\@kw, [], 'no flags');
+}
+
+for my $parallel (0, 1) {
+ $lst->{priv_eidx}->{parallel} = $parallel;
+ my $docids = $lst->set_eml_keywords($eml, qw(seen draft));
+ is(scalar @$docids, 1, 'set keywords on one doc');
+ $lst->done;
+ my @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [qw(draft seen)], 'kw matches');
+
+ $docids = $lst->add_eml_keywords($eml, qw(seen draft));
+ $lst->done;
+ is(scalar @$docids, 1, 'idempotently added keywords to doc');
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop');
+
+ $docids = $lst->remove_eml_keywords($eml, qw(seen draft));
+ is(scalar @$docids, 1, 'removed from one doc');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'kw matches after remove');
+
+ $docids = $lst->remove_eml_keywords($eml, qw(answered));
+ is(scalar @$docids, 1, 'removed from one doc (idempotently)');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'kw matches after remove (idempotent)');
+
+ $docids = $lst->add_eml_keywords($eml, qw(answered));
+ is(scalar @$docids, 1, 'added to empty doc');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, ['answered'], 'kw matches after add');
+
+ $docids = $lst->set_eml_keywords($eml);
+ is(scalar @$docids, 1, 'set to clobber');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'set clobbers all');
+
+ my $set = eml_load('t/plack-qp.eml');
+ $set->header_set('Message-ID', "<set\@$parallel>");
+ my $ret = $lst->set_eml($set, 'seen');
+ is(ref $ret, 'PublicInbox::Smsg', 'initial returns smsg');
+ my $ids = $lst->set_eml($set, qw(seen));
+ is_deeply($ids, [ $ret->{num} ], 'set_eml idempotent');
+ $ids = $lst->set_eml($set, qw(seen answered));
+ is_deeply($ids, [ $ret->{num} ], 'set_eml to change kw');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($ids->[0]);
+ is_deeply(\@kw, [qw(answered seen)], 'set changed kw');
+}
+
+SKIP: {
+ require_mods(qw(Storable), 1);
+ ok($lst->can('ipc_do'), 'ipc_do works if we have Storable');
+ $eml->header_set('Message-ID', '<ipc-test@example>');
+ my $pid = $lst->ipc_worker_spawn('lei-store');
+ ok($pid > 0, 'got a worker');
+ my $smsg = $lst->ipc_do('set_eml', $eml, qw(seen));
+ is(ref($smsg), 'PublicInbox::Smsg', 'set_eml works over ipc');
+ my $ids = $lst->ipc_do('set_eml', $eml, qw(seen));
+ is_deeply($ids, [ $smsg->{num} ], 'docid returned');
+
+ $eml->header_set('Message-ID');
+ my $no_mid = $lst->ipc_do('set_eml', $eml, qw(seen));
+ my $wait = $lst->ipc_do('done');
+ my @kw = $lst->search->msg_keywords($no_mid->{num});
+ is_deeply(\@kw, [qw(seen)], 'ipc set changed kw');
+
+ is(ref($smsg), 'PublicInbox::Smsg', 'no mid works ipc');
+ $ids = $lst->ipc_do('set_eml', $eml, qw(seen));
+ is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/ ipc');
+ $lst->ipc_do('done');
+ $lst->ipc_worker_stop;
+ $ids = $lst->ipc_do('set_eml', $eml, qw(seen answered));
+ is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/o ipc');
+ $wait = $lst->ipc_do('done');
+ @kw = $lst->search->msg_keywords($no_mid->{num});
+ is_deeply(\@kw, [qw(answered seen)], 'set changed kw w/o ipc');
+}
+
+done_testing;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Eml;
+use Fcntl qw(SEEK_SET);
+use PublicInbox::Spawn qw(popen_rd which);
+use List::Util qw(shuffle);
+require_mods(qw(DBD::SQLite));
+require PublicInbox::MboxReader;
+require PublicInbox::LeiOverview;
+use_ok 'PublicInbox::LeiToMail';
+my $from = "Content-Length: 10\nSubject: x\n\nFrom hell\n";
+my $noeol = "Subject: x\n\nFrom hell";
+my $crlf = $noeol;
+$crlf =~ s/\n/\r\n/g;
+my $kw = [qw(seen answered flagged)];
+my $smsg = { kw => $kw, blob => '0'x40 };
+my @MBOX = qw(mboxcl2 mboxrd mboxcl mboxo);
+for my $mbox (@MBOX) {
+ my $m = "eml2$mbox";
+ my $cb = PublicInbox::LeiToMail->can($m);
+ my $s = $cb->(PublicInbox::Eml->new($from), $smsg);
+ is(substr($$s, -1, 1), "\n", "trailing LF in normal $mbox");
+ my $eml = PublicInbox::Eml->new($s);
+ is($eml->header('Status'), 'OR', "Status: set by $m");
+ is($eml->header('X-Status'), 'AF', "X-Status: set by $m");
+ if ($mbox eq 'mboxcl2') {
+ like($eml->body_raw, qr/^From /, "From not escaped $m");
+ } else {
+ like($eml->body_raw, qr/^>From /, "From escaped once by $m");
+ }
+ my @cl = $eml->header('Content-Length');
+ if ($mbox =~ /mboxcl/) {
+ is(scalar(@cl), 1, "$m only has one Content-Length header");
+ is($cl[0] + length("\n"),
+ length($eml->body_raw), "$m Content-Length matches");
+ } else {
+ is(scalar(@cl), 0, "$m clobbered Content-Length");
+ }
+ $s = $cb->(PublicInbox::Eml->new($noeol), $smsg);
+ is(substr($$s, -1, 1), "\n",
+ "trailing LF added by $m when original lacks EOL");
+ $eml = PublicInbox::Eml->new($s);
+ if ($mbox eq 'mboxcl2') {
+ is($eml->body_raw, "From hell\n", "From not escaped by $m");
+ } else {
+ is($eml->body_raw, ">From hell\n", "From escaped once by $m");
+ }
+ $s = $cb->(PublicInbox::Eml->new($crlf), $smsg);
+ is(substr($$s, -2, 2), "\r\n",
+ "trailing CRLF added $m by original lacks EOL");
+ $eml = PublicInbox::Eml->new($s);
+ if ($mbox eq 'mboxcl2') {
+ is($eml->body_raw, "From hell\r\n", "From not escaped by $m");
+ } else {
+ is($eml->body_raw, ">From hell\r\n", "From escaped once by $m");
+ }
+ if ($mbox =~ /mboxcl/) {
+ is($eml->header('Content-Length') + length("\r\n"),
+ length($eml->body_raw), "$m Content-Length matches");
+ } elsif ($mbox eq 'mboxrd') {
+ $s = $cb->($eml, $smsg);
+ $eml = PublicInbox::Eml->new($s);
+ is($eml->body_raw,
+ ">>From hell\r\n\r\n", "From escaped again by $m");
+ }
+}
+
+my ($tmpdir, $for_destroy) = tmpdir();
+local $ENV{TMPDIR} = $tmpdir;
+open my $err, '>>', "$tmpdir/lei.err" or BAIL_OUT $!;
+my $lei = { 2 => $err };
+my $buf = <<'EOM';
+From: x@example.com
+Subject: x
+
+blah
+EOM
+my $fn = "$tmpdir/x.mbox";
+my ($mbox) = shuffle(@MBOX); # pick one, shouldn't matter
+my $wcb_get = sub {
+ my ($fmt, $dst) = @_;
+ delete $lei->{dedupe};
+ $lei->{ovv} = bless {
+ fmt => $fmt,
+ dst => $dst
+ }, 'PublicInbox::LeiOverview';
+ my $l2m = PublicInbox::LeiToMail->new($lei);
+ SKIP: {
+ require_mods('Storable', 1);
+ my $dup = Storable::thaw(Storable::freeze($l2m));
+ is_deeply($dup, $l2m, "$fmt round-trips through storable");
+ }
+ my $zpipe = $l2m->pre_augment($lei);
+ $l2m->do_augment($lei);
+ $l2m->post_augment($lei, $zpipe);
+ my $cb = $l2m->write_cb($lei);
+ delete $lei->{1};
+ $cb;
+};
+
+my $deadbeef = { blob => 'deadbeef', kw => [ qw(seen) ] };
+my $orig = do {
+ my $wcb = $wcb_get->($mbox, $fn);
+ is(ref $wcb, 'CODE', 'write_cb returned callback');
+ ok(-f $fn && !-s _, 'empty file created');
+ $wcb->(\(my $dup = $buf), $deadbeef);
+ undef $wcb;
+ open my $fh, '<', $fn or BAIL_OUT $!;
+ my $raw = do { local $/; <$fh> };
+ like($raw, qr/^blah\n/sm, 'wrote content');
+ unlink $fn or BAIL_OUT $!;
+
+ local $lei->{opt} = { jobs => 2 };
+ $wcb = $wcb_get->($mbox, $fn);
+ ok(-f $fn && !-s _, 'truncated mbox destination');
+ $wcb->(\($dup = $buf), $deadbeef);
+ undef $wcb;
+ open $fh, '<', $fn or BAIL_OUT $!;
+ is(do { local $/; <$fh> }, $raw, 'jobs > 1');
+ $raw;
+};
+for my $zsfx (qw(gz bz2 xz)) { # XXX should we support zst, zz, lzo, lzma?
+ my $zsfx2cmd = PublicInbox::LeiToMail->can('zsfx2cmd');
+ SKIP: {
+ my $cmd = eval { $zsfx2cmd->($zsfx, 0, $lei) };
+ skip $@, 3 if $@;
+ my $dc_cmd = eval { $zsfx2cmd->($zsfx, 1, $lei) };
+ ok($dc_cmd, "decompressor for .$zsfx");
+ my $f = "$fn.$zsfx";
+ my $wcb = $wcb_get->($mbox, $f);
+ $wcb->(\(my $dup = $buf), $deadbeef);
+ undef $wcb;
+ my $uncompressed = xqx([@$dc_cmd, $f]);
+ is($uncompressed, $orig, "$zsfx works unlocked");
+
+ local $lei->{opt} = { jobs => 2 }; # for atomic writes
+ unlink $f or BAIL_OUT "unlink $!";
+ $wcb = $wcb_get->($mbox, $f);
+ $wcb->(\($dup = $buf), $deadbeef);
+ undef $wcb;
+ is(xqx([@$dc_cmd, $f]), $orig, "$zsfx matches with lock");
+
+ local $lei->{opt} = { augment => 1 };
+ $wcb = $wcb_get->($mbox, $f);
+ $wcb->(\($dup = $buf . "\nx\n"), $deadbeef);
+ undef $wcb; # commit
+
+ my $cat = popen_rd([@$dc_cmd, $f]);
+ my @raw;
+ PublicInbox::MboxReader->$mbox($cat,
+ sub { push @raw, shift->as_string });
+ like($raw[1], qr/\nblah\n\nx\n\z/s, "augmented $zsfx");
+ like($raw[0], qr/\nblah\n\z/s, "original preserved $zsfx");
+
+ local $lei->{opt} = { augment => 1, jobs => 2 };
+ $wcb = $wcb_get->($mbox, $f);
+ $wcb->(\($dup = $buf . "\ny\n"), $deadbeef);
+ undef $wcb; # commit
+
+ my @raw3;
+ $cat = popen_rd([@$dc_cmd, $f]);
+ PublicInbox::MboxReader->$mbox($cat,
+ sub { push @raw3, shift->as_string });
+ my $y = pop @raw3;
+ is_deeply(\@raw3, \@raw, 'previous messages preserved');
+ like($y, qr/\nblah\n\ny\n\z/s, "augmented $zsfx (atomic)");
+ }
+}
+
+my $as_orig = sub {
+ my ($eml) = @_;
+ $eml->header_set('Status');
+ $eml->as_string;
+};
+
+unlink $fn or BAIL_OUT $!;
+if ('default deduplication uses content_hash') {
+ my $wcb = $wcb_get->('mboxo', $fn);
+ $deadbeef->{kw} = [];
+ $wcb->(\(my $x = $buf), $deadbeef) for (1..2);
+ undef $wcb; # undef to commit changes
+ my $cmp = '';
+ open my $fh, '<', $fn or BAIL_OUT $!;
+ PublicInbox::MboxReader->mboxo($fh, sub { $cmp .= $as_orig->(@_) });
+ is($cmp, $buf, 'only one message written');
+
+ local $lei->{opt} = { augment => 1 };
+ $wcb = $wcb_get->('mboxo', $fn);
+ $wcb->(\($x = $buf . "\nx\n"), $deadbeef) for (1..2);
+ undef $wcb; # undef to commit changes
+ open $fh, '<', $fn or BAIL_OUT $!;
+ my @x;
+ PublicInbox::MboxReader->mboxo($fh, sub { push @x, $as_orig->(@_) });
+ is(scalar(@x), 2, 'augmented mboxo');
+ is($x[0], $cmp, 'original message preserved');
+ is($x[1], $buf . "\nx\n", 'new message appended');
+}
+
+{ # stdout support
+ open my $tmp, '+>', undef or BAIL_OUT $!;
+ local $lei->{1} = $tmp;
+ my $wcb = $wcb_get->('mboxrd', '/dev/stdout');
+ $wcb->(\(my $x = $buf), $deadbeef);
+ undef $wcb; # commit
+ seek($tmp, 0, SEEK_SET) or BAIL_OUT $!;
+ my $cmp = '';
+ PublicInbox::MboxReader->mboxrd($tmp, sub { $cmp .= $as_orig->(@_) });
+ is($cmp, $buf, 'message written to stdout');
+}
+
+SKIP: { # FIFO support
+ use POSIX qw(mkfifo);
+ my $fn = "$tmpdir/fifo";
+ mkfifo($fn, 0600) or skip("mkfifo not supported: $!", 1);
+ my $cat = popen_rd([which('cat'), $fn]);
+ my $wcb = $wcb_get->('mboxo', $fn);
+ $wcb->(\(my $x = $buf), $deadbeef);
+ undef $wcb; # commit
+ my $cmp = '';
+ PublicInbox::MboxReader->mboxo($cat, sub { $cmp .= $as_orig->(@_) });
+ is($cmp, $buf, 'message written to FIFO');
+}
+
+{ # Maildir support
+ my $md = "$tmpdir/maildir/";
+ my $wcb = $wcb_get->('maildir', $md);
+ is(ref($wcb), 'CODE', 'got Maildir callback');
+ my $b4dc0ffee = { blob => 'badc0ffee', kw => [] };
+ $wcb->(\(my $x = $buf), $b4dc0ffee);
+
+ my @f;
+ PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @f, shift });
+ open my $fh, $f[0] or BAIL_OUT $!;
+ is(do { local $/; <$fh> }, $buf, 'wrote to Maildir');
+
+ $wcb = $wcb_get->('maildir', $md);
+ my $deadcafe = { blob => 'deadcafe', kw => [] };
+ $wcb->(\($x = $buf."\nx\n"), $deadcafe);
+
+ my @x = ();
+ PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @x, shift });
+ is(scalar(@x), 1, 'wrote one new file');
+ ok(!-f $f[0], 'old file clobbered');
+ open $fh, $x[0] or BAIL_OUT $!;
+ is(do { local $/; <$fh> }, $buf."\nx\n", 'wrote new file to Maildir');
+
+ local $lei->{opt}->{augment} = 1;
+ $wcb = $wcb_get->('maildir', $md);
+ $wcb->(\($x = $buf."\ny\n"), $deadcafe);
+ $wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe
+ @f = ();
+ PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @f, shift });
+ is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there');
+ my @new = grep(!/\A\Q$x[0]\E\z/, @f);
+ is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');
+ open $fh, $x[0] or BAIL_OUT $!;
+ is(do { local $/; <$fh> }, $buf."\nx\n", 'old file untouched');
+ open $fh, $new[0] or BAIL_OUT $!;
+ is(do { local $/; <$fh> }, $buf."\ny\n", 'new file written');
+}
+
+done_testing;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use List::Util qw(shuffle max);
+use PublicInbox::TestCommon;
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Eml;
+use PublicInbox::InboxWritable;
+require_mods(qw(DBD::SQLite Search::Xapian));
+require_git 2.6;
+require_ok 'PublicInbox::LeiXSearch';
+my ($home, $for_destroy) = tmpdir();
+my @ibx;
+for my $V (1..2) {
+ for my $i (3..6) {
+ my $ibx = PublicInbox::InboxWritable->new({
+ inboxdir => "$home/v$V-$i",
+ name => "test-v$V-$i",
+ version => $V,
+ indexlevel => 'medium',
+ -primary_address => "v$V-$i\@example.com",
+ }, { nproc => int(rand(8)) + 1 });
+ push @ibx, $ibx;
+ my $im = $ibx->importer(0);
+ for my $j (0..9) {
+ my $eml = PublicInbox::Eml->new(<<EOF);
+From: x\@example.com
+To: $ibx->{-primary_address}
+Date: Fri, 02 Oct 1993 0$V:0$i:0$j +0000
+Subject: v${V}i${i}j$j
+Message-ID: <v${V}i${i}j$j\@example>
+
+${V}er ${i}on j$j
+EOF
+ $im->add($eml);
+ }
+ $im->done;
+ }
+}
+my $first = shift @ibx; is($first->{name}, 'test-v1-3', 'first plucked');
+my $last = pop @ibx; is($last->{name}, 'test-v2-6', 'last plucked');
+my $eidx = PublicInbox::ExtSearchIdx->new("$home/eidx");
+$eidx->attach_inbox($first);
+$eidx->attach_inbox($last);
+$eidx->eidx_sync({fsync => 0});
+my $es = PublicInbox::ExtSearch->new("$home/eidx");
+my $lxs = PublicInbox::LeiXSearch->new;
+for my $ibxish (shuffle($es, @ibx)) {
+ $lxs->prepare_external($ibxish);
+}
+for my $loc ($lxs->locals) {
+ $lxs->attach_external($loc);
+}
+my $nr = $lxs->xdb->get_doccount;
+my $mset = $lxs->mset('d:19931002..19931003', { limit => $nr });
+is($mset->size, $nr, 'got all messages');
+my @msgs;
+for my $mi ($mset->items) {
+ if (my $smsg = $lxs->smsg_for($mi)) {
+ push @msgs, $smsg;
+ } else {
+ diag "E: ${\$mi->get_docid} missing";
+ }
+}
+is(scalar(@msgs), $nr, 'smsgs retrieved for all');
+
+$mset = $lxs->recent(undef, { limit => 1 });
+is($mset->size, 1, 'one result');
+my $max = max(map { $_->{docid} } @msgs);
+is($lxs->smsg_for(($mset->items)[0])->{docid}, $max,
+ 'got highest docid');
+
+my @ibxish = $lxs->locals;
+is(scalar(@ibxish), scalar(@ibx) + 1, 'got locals back');
+is($lxs->search, $lxs, '->search works');
+is($lxs->over, undef, '->over fails');
+
+done_testing;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use List::Util qw(shuffle);
+use PublicInbox::Eml;
+use Fcntl qw(SEEK_SET);
+require_ok 'PublicInbox::MboxReader';
+require_ok 'PublicInbox::LeiToMail';
+my %raw = (
+ hdr_only => "From: header-only\@example.com\n\n",
+ small_from => "From: small-from\@example.com\n\nFrom hell\n",
+ small => "From: small\@example.com\n\nfrom hell\n",
+ big_hdr_only => "From: big-header\@example.com\n" .
+ (('A: '.('a' x 72)."\n") x 1000)."\n",
+ big_body => "From: big-body\@example.com\n\n".
+ (('b: '.('b' x 72)."\n") x 1000) .
+ "From hell\n",
+ big_all => "From: big-all\@example.com\n".
+ (("A: ".('a' x 72)."\n") x 1000). "\n" .
+ (("b: ".('b' x 72)."\n") x 1000) .
+ "From hell\n",
+);
+
+if ($ENV{TEST_EXTRA}) {
+ for my $fn (glob('t/*.eml'), glob('t/*/*.{patch,eml}')) {
+ $raw{$fn} = eml_load($fn)->as_string;
+ }
+}
+
+my $reader = PublicInbox::MboxReader->new;
+my $check_fmt = sub {
+ my $fmt = shift;
+ my @order = shuffle(keys %raw);
+ my $eml2mbox = PublicInbox::LeiToMail->can("eml2$fmt");
+ open my $fh, '+>', undef or BAIL_OUT "open: $!";
+ for my $k (@order) {
+ my $eml = PublicInbox::Eml->new($raw{$k});
+ my $buf = $eml2mbox->($eml);
+ print $fh $$buf or BAIL_OUT "print $!";
+ }
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ $reader->$fmt($fh, sub {
+ my ($eml) = @_;
+ $eml->header_set('Status');
+ $eml->header_set('Lines');
+ my $cur = shift @order;
+ my @cl = $eml->header_raw('Content-Length');
+ if ($fmt =~ /\Amboxcl/) {
+ is(scalar(@cl), 1, "Content-Length set $fmt $cur");
+ my $raw = $eml->body_raw;
+ my $adj = 0;
+ if ($fmt eq 'mboxcl') {
+ my @from = ($raw =~ /^(From )/smg);
+ $adj = scalar(@from);
+ }
+ is(length($raw), $cl[0] - $adj,
+ "Content-Length is correct $fmt $cur");
+ # clobber for ->as_string comparison below
+ $eml->header_set('Content-Length');
+ } else {
+ is(scalar(@cl), 0, "Content-Length unset $fmt $cur");
+ }
+ my $orig = PublicInbox::Eml->new($raw{$cur});
+ is($eml->as_string, $orig->as_string,
+ "read back original $fmt $cur");
+ });
+};
+my @mbox = qw(mboxrd mboxo mboxcl mboxcl2);
+for my $fmt (@mbox) { $check_fmt->($fmt) }
+s/\n/\r\n/sg for (values %raw);
+for my $fmt (@mbox) { $check_fmt->($fmt) }
+
+SKIP: {
+ use PublicInbox::Spawn qw(popen_rd);
+ use Time::HiRes qw(alarm);
+ my $fh = popen_rd([ $^X, '-E', <<'' ]);
+say "From x@y Fri Oct 2 00:00:00 1993";
+print "a: b\n\n", "x" x 70000, "\n\n";
+say "From x@y Fri Oct 2 00:00:00 2010";
+print "Final: bit\n\n", "Incomplete\n\n";
+exit 1
+
+ my @x;
+ eval { $reader->mboxrd($fh, sub { push @x, shift->as_string }) };
+ like($@, qr/error closing mbox/, 'detects error reading from pipe');
+ is(scalar(@x), 1, 'only saw one message');
+ is(scalar(grep(/Final/, @x)), 0, 'no incomplete bit');
+}
+
+done_testing;
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
EOF
ok(run_script(['-mda'], $env, $opt), 'message delivered');
}
- my $config = PublicInbox::Config->new;
- my $ibx = $config->lookup_name($v);
+ my $cfg = PublicInbox::Config->new;
+ my $ibx = $cfg->lookup_name($v);
# make sure all serials are searchable:
for my $i (1..2) {
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
#!perl -w
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
# Artistic or GPL-1+ <https://www.gnu.org/licenses/gpl-1.0.txt>
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::InboxWritable;
+require_mods(qw(Search::Xapian DBD::SQLite));
+use_ok 'PublicInbox::MiscSearch';
+use_ok 'PublicInbox::MiscIdx';
+
+my ($tmp, $for_destroy) = tmpdir();
+my $eidx = { xpfx => "$tmp/eidx", -no_fsync => 1 }; # mock ExtSearchIdx
+{
+ mkdir "$tmp/v1" or BAIL_OUT "mkdir $!";
+ open my $fh, '>', "$tmp/v1/description" or BAIL_OUT "open: $!";
+ print $fh "Everything sucks this year\n" or BAIL_OUT "print $!";
+ close $fh or BAIL_OUT "close $!";
+}
+{
+ my $v1 = PublicInbox::InboxWritable->new({
+ inboxdir => "$tmp/v1",
+ name => 'hope',
+ address => [ 'nope@example.com' ],
+ indexlevel => 'basic',
+ version => 1,
+ });
+ $v1->init_inbox;
+ my $mi = PublicInbox::MiscIdx->new($eidx);
+ $mi->begin_txn;
+ $mi->index_ibx($v1);
+ $mi->commit_txn;
+}
+
+my $ms = PublicInbox::MiscSearch->new("$tmp/eidx/misc");
+my $mset = $ms->mset('"everything sucks today"');
+is(scalar($mset->items), 0, 'no match on description phrase');
+
+$mset = $ms->mset('"everything sucks this year"');
+is(scalar($mset->items), 1, 'match phrase on description');
+
+$mset = $ms->mset('everything sucks');
+is(scalar($mset->items), 1, 'match words in description');
+
+$mset = $ms->mset('nope@example.com');
+is(scalar($mset->items), 1, 'match full address');
+
+$mset = $ms->mset('nope');
+is(scalar($mset->items), 1, 'match partial address');
+
+$mset = $ms->mset('hope');
+is(scalar($mset->items), 1, 'match name');
+my $mi = ($mset->items)[0];
+my $doc = $mi->get_document;
+is($doc->get_data, '{}', 'stored empty data');
+
+done_testing;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
my %mid2num;
my %num2mid;
my @mids = qw(a@b c@d e@f g@h aa@bb aa@cc);
-is_deeply([$d->minmax], [undef,undef], "empty min max on new DB");
+is_deeply([$d->minmax], [0,0], "zero min max on new DB");
foreach my $mid (@mids) {
my $n = $d->mid_insert($mid);
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
require_mods(qw(DBD::SQLite Data::Dumper));
use_ok 'PublicInbox::NNTP';
use_ok 'PublicInbox::Inbox';
+use PublicInbox::Config;
{
sub quote_str {
{ # test setting NNTP headers in HEAD and ARTICLE requests
my $u = 'https://example.com/a/';
- my $ng = PublicInbox::Inbox->new({ name => 'test',
+ my $ibx = PublicInbox::Inbox->new({ name => 'test',
inboxdir => 'test.git',
address => 'a@example.com',
-primary_address => 'a@example.com',
newsgroup => 'test',
domain => 'example.com',
url => [ '//example.com/a' ]});
- is($ng->base_url, $u, 'URL expanded');
+ is($ibx->base_url, $u, 'URL expanded');
my $mid = 'a@b';
my $mime = PublicInbox::Eml->new("Message-ID: <$mid>\r\n\r\n");
my $hdr = $mime->header_obj;
my $mock_self = {
- nntpd => { grouplist => [], servername => 'example.com' },
- ng => $ng,
+ nntpd => {
+ servername => 'example.com',
+ pi_cfg => bless {}, 'PublicInbox::Config',
+ },
+ ibx => $ibx,
};
- my $smsg = { num => 1, mid => $mid, nntp => $mock_self, -ibx => $ng };
+ my $smsg = { num => 1, mid => $mid, nntp => $mock_self, -ibx => $ibx };
PublicInbox::NNTP::set_nntp_headers($hdr, $smsg);
is_deeply([ $mime->header('Message-ID') ], [ "<$mid>" ],
'Message-ID unchanged');
- is_deeply([ $mime->header('Archived-At') ], [ "<${u}a\@b/>" ],
- 'Archived-At: set');
- is_deeply([ $mime->header('List-Archive') ], [ "<$u>" ],
- 'List-Archive: set');
- is_deeply([ $mime->header('List-Post') ], [ '<mailto:a@example.com>' ],
- 'List-Post: set');
is_deeply([ $mime->header('Newsgroups') ], [ 'test' ],
'Newsgroups: set');
is_deeply([ $mime->header('Xref') ], [ 'example.com test:1' ],
'Xref: set');
- $ng->{-base_url} = 'http://mirror.example.com/m/';
+ $ibx->{-base_url} = 'http://mirror.example.com/m/';
$smsg->{num} = 2;
PublicInbox::NNTP::set_nntp_headers($hdr, $smsg);
is_deeply([ $mime->header('Message-ID') ], [ "<$mid>" ],
'Message-ID unchanged');
- is_deeply([ $mime->header('Archived-At') ],
- [ "<${u}a\@b/>", '<http://mirror.example.com/m/a@b/>' ],
- 'Archived-At: appended');
is_deeply([ $mime->header('Xref') ], [ 'example.com test:2' ],
'Old Xref: clobbered');
}
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
local $ENV{PI_TEST_VERSION} = 2;
require './t/nntpd.t';
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
--- /dev/null
+#!perl -w
+use strict;
+use v5.10.1;
+use Test::More;
+require_ok 'PublicInbox::OnDestroy';
+my @x;
+my $od = PublicInbox::OnDestroy->new(sub { push @x, 'hi' });
+is_deeply(\@x, [], 'not called, yet');
+undef $od;
+is_deeply(\@x, [ 'hi' ], 'no args works');
+$od = PublicInbox::OnDestroy->new(sub { $x[0] = $_[0] }, 'bye');
+is_deeply(\@x, [ 'hi' ], 'nothing changed while alive');
+undef $od;
+is_deeply(\@x, [ 'bye' ], 'arg passed');
+$od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y));
+undef $od;
+is_deeply(\@x, [ 'x', 'y' ], '2 args passed');
+
+open my $tmp, '+>>', undef or BAIL_OUT $!;
+$tmp->autoflush(1);
+$od = PublicInbox::OnDestroy->new(1, sub { print $tmp "$$ DESTROY\n" });
+undef $od;
+is(-s $tmp, 0, '$tmp is empty on pid mismatch');
+$od = PublicInbox::OnDestroy->new($$, sub { $tmp = $$ });
+undef $od;
+is($tmp, $$, '$tmp set to $$ by callback');
+
+if (my $nr = $ENV{TEST_LEAK_NR}) {
+ for (0..$nr) {
+ $od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y));
+ }
+}
+
+done_testing;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
'WAL journal_mode not clobbered if manually set');
}
+# ext index additions
+$over->eidx_prep;
+{
+ my @arg = qw(1349 2019 adeadba7cafe example.key);
+ ok($over->add_xref3(@arg), 'first add');
+ ok($over->add_xref3(@arg), 'add idempotent');
+ my $xref3 = $over->get_xref3(1349);
+ is_deeply($xref3, [ 'example.key:2019:adeadba7cafe' ], 'xref3 works');
+
+ @arg = qw(1349 2018 deadbeefcafe example.kee);
+ ok($over->add_xref3(@arg), 'add another xref3');
+ $xref3 = $over->get_xref3(1349);
+ is_deeply($xref3, [ 'example.key:2019:adeadba7cafe',
+ 'example.kee:2018:deadbeefcafe' ],
+ 'xref3 works forw two');
+
+ @arg = qw(1349 adeadba7cafe example.key);
+ is($over->remove_xref3(@arg), 1, 'remove first');
+ $xref3 = $over->get_xref3(1349);
+ is_deeply($xref3, [ 'example.kee:2018:deadbeefcafe' ],
+ 'confirm removal successful');
+ $over->rollback_lazy;
+}
+
done_testing();
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
my $pfx = 'http://example.com/test';
ok(run_script(['-init', 'test', $inboxdir, "$pfx/", $addr]),
'initialized repo');
-PublicInbox::Import::run_die([qw(git config -f), $pi_config,
- 'publicinbox.test.newsgroup', 'inbox.test']);
+xsys_e(qw(git config -f), $pi_config,
+ qw(publicinbox.test.newsgroup inbox.test));
open my $fh, '>', "$inboxdir/description" or die "open: $!\n";
print $fh "test for public-inbox\n";
close $fh or die "close: $!\n";
-# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
use PublicInbox::Import;
use PublicInbox::Git;
use PublicInbox::Config;
-my $config = PublicInbox::Config->new(\<<EOF);
+my $cfg = PublicInbox::Config->new(\<<EOF);
$cfgpfx.address=$addr
$cfgpfx.inboxdir=$maindir
EOF
$im->done;
}
-my $www = PublicInbox::WWW->new($config);
+my $www = PublicInbox::WWW->new($cfg);
my $app = builder(sub {
enable('Head');
mount('/a' => builder(sub { sub { $www->call(@_) } }));
$res = $cb->(GET('/a/test/blah%40example.com/raw'));
is($res->code, 200, 'OK with URLMap mount');
- like($res->content, qr!^List-Archive: <http://[^/]+/a/test/>!m,
- 'List-Archive set in /raw mboxrd');
like($res->content,
- qr!^Archived-At: <http://[^/]+/a/test/blah\@example\.com/>!m,
- 'Archived-At set in /raw mboxrd');
+ qr/^Message-Id: <blah\@example\.com>\n/sm,
+ 'headers appear in /raw');
# redirects
$res = $cb->(GET('/a/test/m/blah%40example.com.html'));
SKIP: {
require_mods(qw(DBD::SQLite Search::Xapian IO::Uncompress::Gunzip), 3);
- my $ibx = $config->lookup_name('test');
+ my $ibx = $cfg->lookup_name('test');
require_ok 'PublicInbox::SearchIdx';
PublicInbox::SearchIdx->new($ibx, 1)->index_sync;
test_psgi($app, sub {
my $gz = $res->content;
my $raw;
IO::Uncompress::Gunzip::gunzip(\$gz => \$raw);
- like($raw, qr!^List-Archive: <http://[^/]+/a/test/>!m,
- 'List-Archive set in /t.mbox.gz mboxrd');
- like($raw,
- qr!^Archived-At:\x20
- <http://[^/]+/a/test/blah\@example\.com/>!mx,
- 'Archived-At set in /t.mbox.gz mboxrd');
+ like($raw, qr!^Message-Id:\x20<blah\@example\.com>\n!sm,
+ 'headers appear in /t.mbox.gz mboxrd');
});
}
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
PublicInbox::SearchIdx->new($ibx, 1)->index_sync;
my $cfgpfx = "publicinbox.test";
-my $config = PublicInbox::Config->new(\<<EOF);
+my $cfg = PublicInbox::Config->new(\<<EOF);
$cfgpfx.address=git\@vger.kernel.org
$cfgpfx.inboxdir=$tmpdir
EOF
-my $www = PublicInbox::WWW->new($config);
+my $www = PublicInbox::WWW->new($cfg);
test_psgi(sub { $www->call(@_) }, sub {
my ($cb) = @_;
my $res;
$xdb->set_metadata('has_threadid', '0');
$sidx->idx_release;
}
- $config->each_inbox(sub { delete $_[0]->{search} });
+ $cfg->each_inbox(sub { delete $_[0]->{search} });
$res = $cb->(GET('/test/?q=s:test'));
is($res->code, 200, 'successful search w/o has_threadid');
unlike($html, qr/download mbox\.gz: .*?"full threads"/s,
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
'"From_" line stored to test old bug workaround');
my $cfgpfx = "publicinbox.v2test";
-my $cfg = <<EOF;
+my $cfg = PublicInbox::Config->new(\<<EOF);
$cfgpfx.address=$ibx->{-primary_address}
$cfgpfx.inboxdir=$inboxdir
EOF
-my $config = PublicInbox::Config->new(\$cfg);
-my $www = PublicInbox::WWW->new($config);
+my $www = PublicInbox::WWW->new($cfg);
my ($res, $raw, @from_);
my $client0 = sub {
my ($cb) = @_;
like($raw, qr/^hello ghosts$/m, 'got third message');
@from_ = ($raw =~ m/^From /mg);
is(scalar(@from_), 3, 'three From_ lines');
- $config->each_inbox(sub { $_[0]->search->reopen });
+ $cfg->each_inbox(sub { $_[0]->search->reopen });
SKIP: {
eval { require IO::Uncompress::Gunzip };
$im->done;
my @h = $mime->header('Message-ID');
is_deeply($exp, \@h, 'reused existing Message-ID');
- $config->each_inbox(sub { $_[0]->search->reopen });
+ $cfg->each_inbox(sub { $_[0]->search->reopen });
}
my $client2 = sub {
ok($im->add($mime), "added attachment $body");
}
$im->done;
- $config->each_inbox(sub { $_[0]->search->reopen });
+ $cfg->each_inbox(sub { $_[0]->search->reopen });
}
my $client3 = sub {
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
test_replace(2, 'basic', $opt = { %$opt, rotate_bytes => 1 });
SKIP: {
- require PublicInbox::Search;
- PublicInbox::Search::load_xapian() or skip 'Search::Xapian missing', 8;
+ require_mods(qw(Search::Xapian), 8);
for my $l (qw(medium)) {
test_replace(2, $l, {});
$opt = { pre => \&pad_msgs };
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!/usr/bin/perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Parallel test runner which preloads code and reuses worker processes
my $skip = '';
if (open my $fh, '<', $log) {
my @not_ok = grep(!/^(?:ok |[ \t]*#)/ms, <$fh>);
- pop @not_ok if $not_ok[-1] =~ /^[0-9]+\.\.[0-9]+$/;
+ my $last = $not_ok[-1] // '';
+ pop @not_ok if $last =~ /^[0-9]+\.\.[0-9]+$/;
my $pfx = "# $log: ";
print $OLDERR map { $pfx.$_ } @not_ok;
seek($fh, 0, SEEK_SET) or die "seek: $!";
-# Copyright (C) 2017-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2017-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
}
{
- my $crlf_adjust = \&PublicInbox::SearchIdx::crlf_adjust;
+ my $crlf_adjust = \&PublicInbox::Smsg::crlf_adjust;
is($crlf_adjust->("hi\r\nworld\r\n"), 0, 'no adjustment needed');
is($crlf_adjust->("hi\nworld\n"), 2, 'LF-only counts two CR');
is($crlf_adjust->("hi\r\nworld\n"), 1, 'CRLF/LF-mix 1 counts 1 CR');
like($smsg->{to}, qr/\blist\@example\.com\b/, 'to appears');
my $doc = $m->get_document;
my $col = PublicInbox::Search::BYTES();
- my $bytes = PublicInbox::Smsg::get_val($doc, $col);
+ my $bytes = PublicInbox::Search::int_val($doc, $col);
like($bytes, qr/\A[0-9]+\z/, '$bytes stored as digit');
ok($bytes > 0, '$bytes is > 0');
is($bytes, $smsg->{bytes}, 'bytes Xapian value matches Over');
$col = PublicInbox::Search::UID();
- my $uid = PublicInbox::Smsg::get_val($doc, $col);
+ my $uid = PublicInbox::Search::int_val($doc, $col);
is($uid, $smsg->{num}, 'UID column matches {num}');
is($uid, $m->get_docid, 'UID column matches docid');
}
});
done_testing();
-
-1;
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use_ok 'PublicInbox::SharedKV';
+my ($tmpdir, $for_destroy) = tmpdir();
+local $ENV{TMPDIR} = $tmpdir;
+my $skv = PublicInbox::SharedKV->new;
+my $skv_tmpdir = $skv->{tmpdir};
+ok(-d $skv_tmpdir, 'created a temporary dir');
+$skv->dbh;
+my $dead = "\xde\xad";
+my $beef = "\xbe\xef";
+my $cafe = "\xca\xfe";
+ok($skv->set($dead, $beef), 'set');
+is($skv->get($dead), $beef, 'get');
+ok($skv->set($dead, $beef), 'set idempotent');
+ok(!$skv->set_maybe($dead, $cafe), 'set_maybe ignores');
+ok($skv->set_maybe($cafe, $dead), 'set_maybe sets');
+is($skv->xchg($dead, $cafe), $beef, 'xchg');
+is($skv->get($dead), $cafe, 'get after xchg');
+is($skv->xchg($dead, undef), $cafe, 'xchg to undef');
+is($skv->get($dead), undef, 'get after xchg to undef');
+is($skv->get($cafe), $dead, 'get after set_maybe');
+ok($skv->index_values, 'index_values works');
+is($skv->replace_values($dead, $cafe), 1, 'replaced one by value');
+is($skv->get($cafe), $cafe, 'value updated');
+is($skv->replace_values($dead, $cafe), 0, 'replaced none by value');
+is($skv->xchg($dead, $cafe), undef, 'xchg from undef');
+is($skv->count, 2, 'count works');
+
+my %seen;
+my $sth = $skv->each_kv_iter;
+while (my ($k, $v) = $sth->fetchrow_array) {
+ $seen{$k} = $v;
+}
+is($seen{$dead}, $cafe, '$dead has expected value');
+is($seen{$cafe}, $cafe, '$cafe has expected value');
+is(scalar keys %seen, 2, 'iterated through all');
+
+is($skv->replace_values($cafe, $dead), 2, 'replaced 2 by value');
+is($skv->delete_by_val('bogus'), 0, 'delete_by_val misses');
+is($skv->delete_by_val($dead), 2, 'delete_by_val hits');
+is($skv->delete_by_val($dead), 0, 'delete_by_val misses again');
+
+undef $skv;
+ok(!-d $skv_tmpdir, 'temporary dir gone');
+$skv = PublicInbox::SharedKV->new("$tmpdir/dir", 'base');
+ok(-e "$tmpdir/dir/base.sqlite3", 'file created');
+
+done_testing;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
use strict;
use Test::More;
use IO::Handle;
use POSIX qw(:signal_h);
use Errno qw(ENOSYS);
-use PublicInbox::Syscall qw($SFD_NONBLOCK);
+use PublicInbox::Syscall qw(SFD_NONBLOCK);
require_ok 'PublicInbox::Sigfd';
SKIP: {
}
$sigfd = undef;
- my $nbsig = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK);
- ok($nbsig, 'Sigfd->new $SFD_NONBLOCK works');
+ my $nbsig = PublicInbox::Sigfd->new($sig, SFD_NONBLOCK);
+ ok($nbsig, 'Sigfd->new SFD_NONBLOCK works');
is($nbsig->wait_once, undef, 'nonblocking ->wait_once');
ok($! == Errno::EAGAIN, 'got EAGAIN');
kill('HUP', $$) or die "kill $!";
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
select(undef, undef, undef, 0.01) while 1;
}
EOF
- my $oldset = PublicInbox::Sigfd::block_signals();
+ my $oldset = PublicInbox::DS::block_signals();
my $rd = popen_rd([$^X, '-e', $script]);
diag 'waiting for child to reap grandchild...';
chomp(my $line = readline($rd));
ok(kill('CHLD', $pid), 'sent SIGCHLD to child');
is(readline($rd), "HI\n", '$SIG{CHLD} works in child');
ok(close $rd, 'popen_rd close works');
- PublicInbox::Sigfd::sig_setmask($oldset);
+ PublicInbox::DS::sig_setmask($oldset);
}
{
isnt($?, 0, '$? set properly: '.$?);
}
+{ # ->CLOSE vs ->DESTROY waitpid caller distinction
+ my @c;
+ my $fh = popen_rd(['true'], undef, { cb => sub { @c = caller } });
+ ok(close($fh), '->CLOSE fired and successful');
+ ok(scalar(@c), 'callback fired by ->CLOSE');
+ ok(grep(!m[/PublicInbox/DS\.pm\z], @c), 'callback not invoked by DS');
+
+ @c = ();
+ $fh = popen_rd(['true'], undef, { cb => sub { @c = caller } });
+ undef $fh; # ->DESTROY
+ ok(scalar(@c), 'callback fired by ->DESTROY');
+ ok(grep(!m[/PublicInbox/ProcessPipe\.pm\z], @c),
+ 'callback not invoked by ProcessPipe');
+}
+
+{ # children don't wait on siblings
+ use POSIX qw(_exit);
+ pipe(my ($r, $w)) or BAIL_OUT $!;
+ my $cb = sub { warn "x=$$\n" };
+ my $fh = popen_rd(['cat'], undef, { 0 => $r, cb => $cb });
+ my $pp = tied *$fh;
+ my $pid = fork // BAIL_OUT $!;
+ local $SIG{__WARN__} = sub { _exit(1) };
+ if ($pid == 0) {
+ local $SIG{__DIE__} = sub { _exit(2) };
+ undef $fh;
+ _exit(0);
+ }
+ waitpid($pid, 0);
+ is($?, 0, 'forked process exited');
+ my @w;
+ local $SIG{__WARN__} = sub { push @w, @_ };
+ close $w;
+ close $fh;
+ is($?, 0, 'cat exited');
+ is_deeply(\@w, [ "x=$$\n" ], 'callback fired from owner');
+}
+
SKIP: {
eval {
require BSD::Resource;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
}
$im->done;
my $over = $ibx->over;
- my @tid = $over->dbh->selectall_array('SELECT DISTINCT(tid) FROM over');
+ my $dbh = $over->dbh;
+ my $tid = $dbh->selectall_arrayref('SELECT DISTINCT(tid) FROM over');
+ my @tid = map { $_->[0] } @$tid;
is(scalar(@tid), 1, "only one thread initially ($desc)");
$over->dbh_close;
run_script([qw(-index --reindex --rethread), $ibx->{inboxdir}]) or
BAIL_OUT 'rethread';
- @tid = $over->dbh->selectall_array('SELECT DISTINCT(tid) FROM over');
+ $tid = $dbh->selectall_arrayref('SELECT DISTINCT(tid) FROM over');
+ @tid = map { $_->[0] } @$tid;
is(scalar(@tid), 1, "only one thread after rethread ($desc)");
}
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# we can index a message from a mirror which bypasses dedupe.
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
$mime->header_set('Message-ID', "<$y>");
$mime->header_set('References', "<$x>");
ok($im->add($mime), 'add excessively long References');
- $im->barrier;
+ $im->done;
my $msgs = $ibx->over->get_thread('x'x244);
is(2, scalar(@$msgs), 'got both messages');
is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid');
is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref');
is($msgs->[1]->{mid}, 'y'x244, 'stored truncated mid(2)');
- $im->done;
}
my $tmp = {
-# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
$cfgpfx.altid=serial:alerts:file=msgmap.sqlite3
publicinboxwatch.watchspam=maildir:$spamdir
EOF
- my $config = PublicInbox::Config->new(\$orig);
- my $ibx = $config->lookup_name($v);
+ my $cfg = PublicInbox::Config->new(\$orig);
+ my $ibx = $cfg->lookup_name($v);
ok($ibx, 'found inbox by name');
- my $w = PublicInbox::Watch->new($config);
+ my $w = PublicInbox::Watch->new($cfg);
for my $i (1..2) {
$w->scan('full');
}
}
$w->scan('full');
- $config = PublicInbox::Config->new(\$orig);
- $ibx = $config->lookup_name($v);
+ $cfg = PublicInbox::Config->new(\$orig);
+ $ibx = $cfg->lookup_name($v);
is($ibx->search->reopen->mset('b:spam')->size, 0, 'spam removed');
is_deeply([], \@warn, 'no warnings');
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
{
my @w;
local $SIG{__WARN__} = sub { push @w, @_ };
- my $config = PublicInbox::Config->new(\<<EOF);
+ my $cfg = PublicInbox::Config->new(\<<EOF);
$cfgpfx.address=$addr
$cfgpfx.inboxdir=$git_dir
$cfgpfx.watch=maildir:$spamdir
publicinboxlearn.watchspam=maildir:$spamdir
EOF
- my $wm = PublicInbox::Watch->new($config);
+ my $wm = PublicInbox::Watch->new($cfg);
is(scalar grep(/is a spam folder/, @w), 1, 'got warning about spam');
is_deeply($wm->{mdmap}, { "$spamdir/cur" => 'watchspam' },
'only got the spam folder to watch');
close $fh or BAIL_OUT $!;
}
-my $config = PublicInbox::Config->new($cfg_path);
-PublicInbox::Watch->new($config)->scan('full');
+my $cfg = PublicInbox::Config->new($cfg_path);
+PublicInbox::Watch->new($cfg)->scan('full');
my $git = PublicInbox::Git->new($git_dir);
my @list = $git->qx(qw(rev-list refs/heads/master));
is(scalar @list, 1, 'one revision in rev-list');
};
$write_spam->();
is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam');
-PublicInbox::Watch->new($config)->scan('full');
+PublicInbox::Watch->new($cfg)->scan('full');
@list = $git->qx(qw(rev-list refs/heads/master));
is(scalar @list, 2, 'two revisions in rev-list');
@list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
the body of a message to majordomo\@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
@list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
is(scalar @list, 1, 'tree has one file');
my $mref = $git->cat_file('HEAD:'.$list[0]);
is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam');
$write_spam->();
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
@list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
is(scalar @list, 0, 'tree is empty');
@list = $git->qx(qw(rev-list refs/heads/master));
my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock
local $ENV{PATH} = $fail_path;
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
- $config->{'publicinboxwatch.spamcheck'} = 'spamc';
+ $cfg->{'publicinboxwatch.spamcheck'} = 'spamc';
{
local $SIG{__WARN__} = sub {}; # quiet spam check warning
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
}
@list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
is(scalar @list, 0, 'tree has no files spamc checked');
my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock
local $ENV{PATH} = $main_path;
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
- $config->{'publicinboxwatch.spamcheck'} = 'spamc';
+ $cfg->{'publicinboxwatch.spamcheck'} = 'spamc';
@list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
@list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
is(scalar @list, 1, 'tree has one file after spamc checked');
$delivered++;
};
PublicInbox::DS->Reset;
- my $ii = PublicInbox::InboxIdle->new($config);
+ my $ii = PublicInbox::InboxIdle->new($cfg);
my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup';
- $config->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) });
+ $cfg->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) });
PublicInbox::DS->SetPostLoopCallback(sub { $delivered == 0 });
# wait for -watch to setup inotify watches
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
$cfgpfx.filter=PublicInbox::Filter::Vger
publicinboxlearn.watchspam=maildir:$spamdir
EOF
-my $config = PublicInbox::Config->new(\$orig);
-my $ibx = $config->lookup_name('test');
+my $cfg = PublicInbox::Config->new(\$orig);
+my $ibx = $cfg->lookup_name('test');
ok($ibx, 'found inbox by name');
-PublicInbox::Watch->new($config)->scan('full');
+PublicInbox::Watch->new($cfg)->scan('full');
my $total = scalar @{$ibx->over->recent};
is($total, 1, 'got one revision');
};
$write_spam->();
is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam');
-PublicInbox::Watch->new($config)->scan('full');
+PublicInbox::Watch->new($cfg)->scan('full');
is_deeply($ibx->over->recent, [], 'deleted file');
is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
the body of a message to majordomo\@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
my $msgs = $ibx->over->recent;
is(scalar(@$msgs), 1, 'got one file back');
my $mref = $ibx->msg_by_smsg($msgs->[0]);
is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam');
$write_spam->();
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
$msgs = $ibx->over->recent;
is(scalar(@$msgs), 0, 'inbox is empty again');
is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock
local $ENV{PATH} = $fail_path;
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
- $config->{'publicinboxwatch.spamcheck'} = 'spamc';
+ $cfg->{'publicinboxwatch.spamcheck'} = 'spamc';
{
local $SIG{__WARN__} = sub {}; # quiet spam check warning
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
}
my $msgs = $ibx->over->recent;
is(scalar(@$msgs), 0, 'inbox is still empty');
my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock
local $ENV{PATH} = $main_path;
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
- $config->{'publicinboxwatch.spamcheck'} = 'spamc';
- PublicInbox::Watch->new($config)->scan('full');
+ $cfg->{'publicinboxwatch.spamcheck'} = 'spamc';
+ PublicInbox::Watch->new($cfg)->scan('full');
my $msgs = $ibx->over->recent;
is(scalar(@$msgs), 1, 'inbox has one mail after spamc OK-ed a message');
my $mref = $ibx->msg_by_smsg($msgs->[0]);
like($$mref, qr/something\n\z/s, 'message scrubbed on import');
- delete $config->{'publicinboxwatch.spamcheck'};
+ delete $cfg->{'publicinboxwatch.spamcheck'};
}
{
open my $fh, '<', $patch or die "failed to open $patch: $!\n";
$msg = do { local $/; <$fh> };
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
my $post = $ibx->search->reopen->mset('dfpost:6e006fd7');
is($post->size, 1, 'diff postimage found');
my $pre = $ibx->search->mset('dfpre:090d998b6c2c');
my $v1pfx = "publicinbox.v1";
my $v1addr = 'v1-public@example.com';
PublicInbox::Import::init_bare($v1repo);
- my $cfg2 = <<EOF;
+ my $raw = <<EOF;
$orig$v1pfx.address=$v1addr
$v1pfx.inboxdir=$v1repo
$v1pfx.watch=maildir:$maildir
EOF
- my $config = PublicInbox::Config->new(\$cfg2);
+ my $cfg = PublicInbox::Config->new(\$raw);
my $both = <<EOF;
From: user\@example.com
To: $addr, $v1addr
both
EOF
PublicInbox::Emergency->new($maildir)->prepare(\$both);
- PublicInbox::Watch->new($config)->scan('full');
+ PublicInbox::Watch->new($cfg)->scan('full');
my $mset = $ibx->search->reopen->mset('m:both@b.com');
my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
- my $v1 = $config->lookup_name('v1');
+ my $v1 = $cfg->lookup_name('v1');
my $msg = $v1->git->cat_file($msgs->[0]->{blob});
is($both, $$msg, 'got original message back from v1');
$msg = $ibx->git->cat_file($msgs->[0]->{blob});
X-Mailing-List: no@example.com
Message-ID: <do.not.want@example.com>
EOF
- my $cfg = $orig."$cfgpfx.listid=i.want.you.to.want.me\n";
+ my $raw = $orig."$cfgpfx.listid=i.want.you.to.want.me\n";
PublicInbox::Emergency->new($maildir)->prepare(\$want);
PublicInbox::Emergency->new($maildir)->prepare(\$do_not_want);
- my $config = PublicInbox::Config->new(\$cfg);
- PublicInbox::Watch->new($config)->scan('full');
- $ibx = $config->lookup_name('test');
+ my $cfg = PublicInbox::Config->new(\$raw);
+ PublicInbox::Watch->new($cfg)->scan('full');
+ $ibx = $cfg->lookup_name('test');
my $num = $ibx->mm->num_for('do.want@example.com');
ok(defined $num, 'List-ID matched for watch');
$num = $ibx->mm->num_for('do.not.want@example.com');
is($num, undef, 'unaccepted List-ID matched for watch');
- $cfg = $orig."$cfgpfx.watchheader=X-Mailing-List:no\@example.com\n";
- $config = PublicInbox::Config->new(\$cfg);
- PublicInbox::Watch->new($config)->scan('full');
- $ibx = $config->lookup_name('test');
+ $raw = $orig."$cfgpfx.watchheader=X-Mailing-List:no\@example.com\n";
+ $cfg = PublicInbox::Config->new(\$raw);
+ PublicInbox::Watch->new($cfg)->scan('full');
+ $ibx = $cfg->lookup_name('test');
$num = $ibx->mm->num_for('do.not.want@example.com');
ok(defined $num, 'X-Mailing-List matched');
}
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
PublicInbox::Emergency->new($maildir)->prepare(\$msg_cc);
PublicInbox::Emergency->new($maildir)->prepare(\$msg_none);
-my $cfg = <<EOF;
+my $raw = <<EOF;
$cfgpfx.address=$addr
$cfgpfx.inboxdir=$inboxdir
$cfgpfx.watch=maildir:$maildir
$cfgpfx.watchheader=To:$addr
$cfgpfx.watchheader=Cc:$addr
EOF
-my $config = PublicInbox::Config->new(\$cfg);
-PublicInbox::Watch->new($config)->scan('full');
-my $ibx = $config->lookup_name('test');
+my $cfg = PublicInbox::Config->new(\$raw);
+PublicInbox::Watch->new($cfg)->scan('full');
+my $ibx = $cfg->lookup_name('test');
ok($ibx, 'found inbox by name');
my $num = $ibx->mm->num_for('to@a.com');
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# manifest.js.gz generation and grok-pull integration test
use strict;
use PublicInbox::Spawn qw(which);
use PublicInbox::TestCommon;
use PublicInbox::Import;
-require_mods(qw(URI::Escape Plack::Builder Digest::SHA
+require_mods(qw(json URI::Escape Plack::Builder Digest::SHA
IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny));
require PublicInbox::WwwListing;
require PublicInbox::ManifestJsGz;
-my $json = do {
- no warnings 'once';
- $PublicInbox::ManifestJsGz::json;
-} or plan skip_all => "JSON module missing";
+use PublicInbox::Config;
+my $json = PublicInbox::Config::json();
use_ok 'PublicInbox::Git';
my ($tmpdir, $for_destroy) = tmpdir();
my $bare = PublicInbox::Git->new("$tmpdir/bare.git");
PublicInbox::Import::init_bare($bare->{git_dir});
-is(PublicInbox::ManifestJsGz::fingerprint($bare), undef,
- 'empty repo has no fingerprint');
+is($bare->manifest_entry, undef, 'empty repo has no manifest entry');
{
my $fi_data = './t/git.fast-import-data';
open my $fh, '<', $fi_data or die "open $fi_data: $!";
'fast-import');
}
-like(PublicInbox::ManifestJsGz::fingerprint($bare), qr/\A[a-f0-9]{40}\z/,
+like($bare->manifest_entry->{fingerprint}, qr/\A[a-f0-9]{40}\z/,
'got fingerprint with non-empty repo');
sub tiny_test {
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
my $mime_ctx = {
env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
- -inbox => $ibx,
+ ibx => $ibx,
www => Plack::Util::inline_object(style => sub {''}),
obuf => \(my $mime_buf = ''),
mhref => '../',
--- /dev/null
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Eml;
+use File::Path qw(mkpath);
+use IO::Handle (); # autoflush
+use POSIX qw(_exit);
+use Cwd qw(getcwd abs_path);
+use File::Spec;
+my $many_root = $ENV{TEST_MANY_ROOT} or
+ plan skip_all => 'TEST_MANY_ROOT not defined';
+my $cwd = getcwd();
+mkpath($many_root);
+-d $many_root or BAIL_OUT "$many_root: $!";
+$many_root = abs_path($many_root);
+$many_root =~ m!\A\Q$cwd\E/! and BAIL_OUT "$many_root must not be in $cwd";
+require_git 2.6;
+require_mods(qw(DBD::SQLite Search::Xapian));
+use_ok 'PublicInbox::V2Writable';
+my $nr_inbox = $ENV{NR_INBOX} // 10;
+my $nproc = $ENV{NPROC} || PublicInbox::V2Writable::detect_nproc() || 2;
+my $indexlevel = $ENV{TEST_INDEXLEVEL} // 'basic';
+diag "NR_INBOX=$nr_inbox NPROC=$nproc TEST_INDEXLEVEL=$indexlevel";
+diag "TEST_MANY_ROOT=$many_root";
+my $level_cfg = $indexlevel eq 'full' ? '' : "\tindexlevel = $indexlevel\n";
+my $pfx = "$many_root/$nr_inbox-$indexlevel";
+mkpath($pfx);
+open my $cfg_fh, '>>', "$pfx/config" or BAIL_OUT $!;
+$cfg_fh->autoflush(1);
+my $v2_init_add = sub {
+ my ($i) = @_;
+ my $ibx = PublicInbox::Inbox->new({
+ inboxdir => "$pfx/test-$i",
+ name => "test-$i",
+ newsgroup => "inbox.comp.test.foo.test-$i",
+ address => [ "test-$i\@example.com" ],
+ url => [ "//example.com/test-$i" ],
+ version => 2,
+ });
+ $ibx->{indexlevel} = $indexlevel if $level_cfg ne '';
+ my $entry = <<EOF;
+[publicinbox "$ibx->{name}"]
+ address = $ibx->{-primary_address}
+ url = $ibx->{url}->[0]
+ newsgroup = $ibx->{newsgroup}
+ inboxdir = $ibx->{inboxdir}
+EOF
+ $entry .= $level_cfg;
+ print $cfg_fh $entry or die $!;
+ my $v2w = PublicInbox::V2Writable->new($ibx, { nproc => 0 });
+ $v2w->init_inbox(0);
+ $v2w->add(PublicInbox::Eml->new(<<EOM));
+Date: Sat, 02 Oct 2010 00:00:00 +0000
+From: Lorelei <l\@example.com>
+To: test-$i\@example.com
+Message-ID: <20101002-000000-$i\@example.com>
+Subject: hello world $i
+
+hi
+EOM
+ $v2w->done;
+};
+
+my @children;
+for my $i (1..$nproc) {
+ my ($r, $w);
+ pipe($r, $w) or BAIL_OUT $!;
+ my $pid = fork;
+ if ($pid == 0) {
+ close $w;
+ while (my $i = <$r>) {
+ chomp $i;
+ $v2_init_add->($i);
+ }
+ _exit(0);
+ }
+ defined $pid or BAIL_OUT "fork: $!";
+ close $r or BAIL_OUT $!;
+ push @children, [ $w, $pid ];
+ $w->autoflush(1);
+}
+
+for my $i (0..$nr_inbox) {
+ print { $children[$i % @children]->[0] } "$i\n" or BAIL_OUT $!;
+}
+
+for my $c (@children) {
+ close $c->[0] or BAIL_OUT "close $!";
+}
+my $i = 0;
+for my $c (@children) {
+ my $pid = waitpid($c->[1], 0);
+ is($?, 0, ++$i.' exited ok');
+}
+ok(close($cfg_fh), 'config written');
+done_testing;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Ensure buffering behavior in -httpd doesn't cause runaway memory use
#!perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Expensive test to validate compression and TLS.
use strict;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# ensure mbsync and offlineimap compatibility
use strict;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Expensive test to validate compression and TLS.
use strict;
--- /dev/null
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use POSIX qw(WTERMSIG WIFSIGNALED SIGPIPE);
+require_mods(qw(json DBD::SQLite Search::Xapian));
+# XXX this needs an already configured lei instance with many messages
+
+my $do_test = sub {
+ my $env = shift // {};
+ for my $out ([], [qw(-f mboxcl2)]) {
+ pipe(my ($r, $w)) or BAIL_OUT $!;
+ open my $err, '+>', undef or BAIL_OUT $!;
+ my $opt = { run_mode => 0, 1 => $w, 2 => $err };
+ my $cmd = [qw(lei q -t), @$out, 'bytes:1..'];
+ my $tp = start_script($cmd, $env, $opt);
+ close $w;
+ sysread($r, my $buf, 1);
+ close $r; # trigger SIGPIPE
+ $tp->join;
+ ok(WIFSIGNALED($?), "signaled @$out");
+ is(WTERMSIG($?), SIGPIPE, "got SIGPIPE @$out");
+ seek($err, 0, 0);
+ my @err = grep(!m{mkdir /dev/null\b}, <$err>);
+ is_deeply(\@err, [], "no errors @$out");
+ }
+};
+
+$do_test->();
+$do_test->({XDG_RUNTIME_DIR => '/dev/null'});
+
+done_testing;
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Idle client memory usage test, particularly after EXAMINE when
# Message Sequence Numbers are loaded
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Note: this may be altered as-needed to demonstrate improvements.
# See history in git for this file.
#!perl -w
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Integration test to validate compression.
-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
my $ctx = {
env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
- -inbox => $ibx,
+ ibx => $ibx,
www => Plack::Util::inline_object(style => sub {''}),
};
my ($mime, $res, $oid, $type);
-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# real-world testing of search threading
diag "enquire: ".timestr($elapsed)." for $n";
$elapsed = timeit(1, sub {
- PublicInbox::View::thread_results({-inbox => $ibx}, $msgs);
+ PublicInbox::View::thread_results({ibx => $ibx}, $msgs);
});
diag "thread_results ".timestr($elapsed);
#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;