From: Eric Wong Date: Wed, 22 Apr 2020 06:44:20 +0000 (+0000) Subject: Merge branch '1.4.0-tag-merge' X-Git-Tag: v1.5.0~50 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=4bebfa0c80ad7f4596a7dca98b39121470a42af0;hp=6d33ac52f252dc2d3a9e11ee7f7c46e1e1458e69 Merge branch '1.4.0-tag-merge' * 1.4.0-tag-merge: public-inbox 1.4.0 --- diff --git a/Documentation/RelNotes/v1.4.0.eml b/Documentation/RelNotes/v1.4.0.eml index ae7c1457..845895b5 100644 --- a/Documentation/RelNotes/v1.4.0.eml +++ b/Documentation/RelNotes/v1.4.0.eml @@ -1,9 +1,11 @@ +Date: Fri, 17 Apr 2020 08:48:59 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.4.0 -Message-Id: <20200417084800.public-inbox-1.4.0-rele@sed> +Message-ID: <20200417084800.public-inbox-1.4.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline This release focuses on reproducibility improvements and bugfixes for corner-cases. Busy instances of PublicInbox::WWW diff --git a/Documentation/RelNotes/v1.5.0.eml b/Documentation/RelNotes/v1.5.0.eml new file mode 100644 index 00000000..4b01eef2 --- /dev/null +++ b/Documentation/RelNotes/v1.5.0.eml @@ -0,0 +1,13 @@ +From: Eric Wong +To: meta@public-inbox.org +Subject: [WIP] public-inbox 1.5.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline + +TBD + +Please report bugs via plain-text mail to: meta@public-inbox.org + +See archives at https://public-inbox.org/meta/ for all history. +See https://public-inbox.org/TODO for what the future holds. diff --git a/Documentation/mknews.perl b/Documentation/mknews.perl index adb83832..a9dede00 100755 --- a/Documentation/mknews.perl +++ b/Documentation/mknews.perl @@ -127,7 +127,7 @@ sub atom_start { require PublicInbox::WwwAtomStream; # WwwAtomStream stats this dir for mtime my $astream = PublicInbox::WwwAtomStream->new($ctx); - delete $ctx->{emit_header}; + delete $astream->{emit_header}; my $ibx = $ctx->{-inbox}; my $title = PublicInbox::WwwAtomStream::title_tag($ibx->description); my $updated = PublicInbox::WwwAtomStream::feed_updated(gmtime($mtime)); diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index 4c9994dc..f3b6c8b7 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -88,7 +88,8 @@ Default: none; only for L users watchheader = List-Id: If specified, L will only process mail matching -the given header. Multiple values are not currently supported. +the given header. If specified multiple times, mail will be processed +if it matches any of the values. Default: none; only for L users @@ -287,6 +288,10 @@ or /usr/share/cgit/ See L +=item publicinbox.indexMaxSize + +See L + =item publicinbox.wwwlisting Enable a HTML listing style when the root path of the URL '/' is accessed. diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index dede5d2e..398ac516 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -66,6 +66,12 @@ is detected. This is intended to be used in mirrors after running L or L to ensure data is expunged from mirrors. +=item --max-size SIZE + +Sets or overrides L on a +per-invocation basis. See L +below. + =back =head1 FILES @@ -76,6 +82,23 @@ C<$GIT_DIR/public-inbox/> directory. v2 inboxes are described in L. +=head1 CONFIGURATION + +=over 8 + +=item publicinbox.indexMaxSize + +Prevents indexing of messages larger than the specified size +value. A single suffix modifier of C, C or C is +supported, thus the value of C<1m> to prevents indexing of +messages larger than one megabyte. + +This is useful for avoiding memory exhaustion in mirrors. + +Default: none + +=back + =head1 ENVIRONMENT =over 8 diff --git a/Documentation/technical/data_structures.txt b/Documentation/technical/data_structures.txt index 08dfc7ac..46d5acff 100644 --- a/Documentation/technical/data_structures.txt +++ b/Documentation/technical/data_structures.txt @@ -61,15 +61,13 @@ Per-message classes There may be hundreds or thousands of these objects in memory at-a-time, so fields are pruned if unneeded. -* PublicInbox::SearchThread::Msg - container for message threading +* PublicInbox::SearchThread::Msg - subclass of Smsg Common abbreviation: $cont or $node Used by: PublicInbox::WWW - The container we use for a non-recursive[1] variant of + The structure we use for a non-recursive[1] variant of JWZ's algorithm: . - This holds a $smsg and is only used for message threading. - This wrapper class may go away in the future and handled - directly by PublicInbox::Smsg to save memory. + Nowadays, this is a re-blessed $smsg with additional fields. As with $smsg objects, there may be hundreds or thousands of these objects in memory at-a-time. diff --git a/Documentation/technical/memory.txt b/Documentation/technical/memory.txt new file mode 100644 index 00000000..bb1c92fd --- /dev/null +++ b/Documentation/technical/memory.txt @@ -0,0 +1,50 @@ +semi-automatic memory management in public-inbox +------------------------------------------------ + +The majority of public-inbox is implemented in Perl 5, a +language and interpreter not particularly known for being +memory-efficient. + +We strive to keep processes small to improve locality, allow +the kernel to cache more files, and to be a good neighbor to +other processes running on the machine. Taking advantage of +automatic reference counting (ARC) in Perl allows us +deterministically release memory back to the heap. + +We start with a simple data model with few circular +references. This both eases human understanding and reduces +the likelyhood of bugs. + +Knowing the relative sizes and quantities of our data +structures, we limit the scope of allocations as much as +possible and keep large allocations shortest-lived. This +minimizes both the cognitive overhead on humans in addition +to reducing memory pressure on the machine. + +Short-lived non-immortal closures (aka "anonymous subs") are +avoided in long-running daemons unless required for +compatibility with PSGI. Closures are memory-intensive and +may make allocation lifetimes less obvious to humans. They +are also the source of memory leaks in older versions of +Perl, including 5.16.3 found in enterprise distros. + +We also use Perl's `delete' and `undef' built-ins to drop +reference counts sooner than scope allows. These functions +are required to break the few reference cycles we have that +would otherwise lead to leaks. + +Of note, `undef' may be used in two ways: + +1. to free(3) the underlying buffer: + + undef $scalar; + +2. to reset a buffer but reduce realloc(3) on subsequent growth: + + $scalar = ""; # useful when repeated appending + $scalar = undef; # usually not needed + +In the future, our internal data model will be further +flattened and simplified to reduce the overhead imposed by +small objects. Large allocations may also be avoided by +optionally using Inline::C. diff --git a/Documentation/txt2pre b/Documentation/txt2pre index c3a7657e..e9b5eb7d 100755 --- a/Documentation/txt2pre +++ b/Documentation/txt2pre @@ -39,9 +39,9 @@ for (qw[copydatabase(1) xapian-compact(1)]) { $xurls{$_} = ".$n.1.html" } -for (qw[flock(2) setrlimit(2) vfork(2)]) { +for (qw[make(1) flock(2) setrlimit(2) vfork(2) tmpfs(5)]) { my ($n, $s) = (/([\w\-]+)\((\d)\)/); - $xurls{$_} = "http://www.man7.org/linux/man-pages/man2/$n.$s.html"; + $xurls{$_} = "http://www.man7.org/linux/man-pages/man$s/$n.$s.html"; } for (qw[git(1) @@ -79,9 +79,10 @@ $xurls{'grok-pull'} = 'https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git' . '/tree/man/grok-pull.1.rst'; $xurls{'git-filter-repo(1)'} = 'https://github.com/newren/git-filter-repo'. - './blob/master/Documentation/git-filter-repo.txt'; + '/blob/master/Documentation/git-filter-repo.txt'; $xurls{'ssoma(1)'} = 'https://ssoma.public-inbox.org/ssoma.txt'; $xurls{'cgitrc(5)'} = 'https://git.zx2c4.com/cgit/tree/cgitrc.5.txt'; +$xurls{'prove(1)'} = 'https://perldoc.perl.org/prove.html'; my $str = do { local $/; }; my ($title) = ($str =~ /\A([^\n]+)/); diff --git a/HACKING b/HACKING index cceb686f..74a3096f 100644 --- a/HACKING +++ b/HACKING @@ -59,6 +59,24 @@ directory for design decisions made during development. See Documentation/technical/ in the source tree for more details on specific topics, in particular data_structures.txt +Faster tests +------------ + +The `make test' target provided by MakeMaker does not run in +parallel. Our `make check' target supports parallel runs, and +it also creates a `.prove' file to optimize `make check-run'. + +The prove(1) command (distributed with Perl) may also be used +for finer-grained testing: prove -bvw t/foo.t + +If using a make(1) (e.g. GNU make) with `include' support, the +`config.mak' Makefile snippet can be used to set environment +variables such as PERL_INLINE_DIRECTORY and TMPDIR. + +With PERL_INLINE_DIRECTORY set to enable Inline::C support and +TMPDIR pointed to a tmpfs(5) mount, `make check-run' takes 6-10s +(load-dependent) on a busy workstation built in 2010. + Perl notes ---------- diff --git a/INSTALL b/INSTALL index 3984df71..2dd7dcff 100644 --- a/INSTALL +++ b/INSTALL @@ -191,7 +191,7 @@ install the system (into /usr/local) with: perl Makefile.PL make - make test + make test # see HACKING for faster tests for hackers make install # root permissions may be needed When installing Search::Xapian, make sure the underlying Xapian diff --git a/MANIFEST b/MANIFEST index cb7d52a7..b06aa679 100644 --- a/MANIFEST +++ b/MANIFEST @@ -8,6 +8,7 @@ Documentation/RelNotes/v1.1.0-pre1.eml Documentation/RelNotes/v1.2.0.eml Documentation/RelNotes/v1.3.0.eml Documentation/RelNotes/v1.4.0.eml +Documentation/RelNotes/v1.5.0.eml Documentation/dc-dlvr-spam-flow.txt Documentation/design_notes.txt Documentation/design_www.txt @@ -40,6 +41,7 @@ Documentation/reproducibility.txt Documentation/standards.perl Documentation/technical/data_structures.txt Documentation/technical/ds.txt +Documentation/technical/memory.txt Documentation/technical/whyperl.txt Documentation/txt2pre HACKING @@ -252,7 +254,7 @@ t/index-git-times.t t/indexlevels-mirror-v1.t t/indexlevels-mirror.t t/init.t -t/iso-2202-jp.mbox +t/iso-2202-jp.eml t/linkify.t t/main-bin/spamc t/mda.t @@ -293,7 +295,7 @@ t/spamcheck_spamc.t t/spawn.t t/thread-cycle.t t/time.t -t/utf8.mbox +t/utf8.eml t/v1-add-remove-add.t t/v1reindex.t t/v2-add-remove-add.t @@ -305,6 +307,7 @@ t/view.t t/watch_filter_rubylang.t t/watch_maildir.t t/watch_maildir_v2.t +t/watch_multiple_headers.t t/www_altid.t t/www_listing.t t/www_static.t diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 336b7d4c..62ddbe82 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -71,7 +71,7 @@ sub resolve_inboxes ($;$$) { my ($argv, $opt, $cfg) = @_; $opt ||= {}; - $cfg //= eval { PublicInbox::Config->new }; + $cfg //= PublicInbox::Config->new; if ($opt->{all}) { my $cfgfile = PublicInbox::Config::default_file(); $cfg or die "--all specified, but $cfgfile not readable\n"; @@ -234,4 +234,15 @@ sub progress_prepare ($) { } } +# same unit factors as git: +sub parse_unsigned ($) { + my ($max_size) = @_; + + $$max_size =~ /\A([0-9]+)([kmg])?\z/i or return; + my ($n, $unit_factor) = ($1, $2 // ''); + my %u = ( k => 1024, m => 1024**2, g => 1024**3 ); + $$max_size = $n * ($u{lc($unit_factor)} // 1); + 1; +} + 1; diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 917939ca..458f29b2 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -367,7 +367,7 @@ sub _fill { my $ibx = {}; foreach my $k (qw(inboxdir filter newsgroup - watch watchheader httpbackendmax + watch httpbackendmax replyto feedmax nntpserver indexlevel)) { my $v = $self->{"$pfx.$k"}; $ibx->{$k} = $v if defined $v; @@ -388,7 +388,7 @@ sub _fill { # TODO: more arrays, we should support multi-value for # more things to encourage decentralization foreach my $k (qw(address altid nntpmirror coderepo hide listid url - infourl)) { + infourl watchheader)) { if (defined(my $v = $self->{"$pfx.$k"})) { $ibx->{$k} = _array($v); } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index c72c1e92..95d654f6 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -440,14 +440,31 @@ sub run_die ($;$$) { $? == 0 or die join(' ', @$cmd) . " failed: $?\n"; } +my @INIT_FILES = ('HEAD' => "ref: refs/heads/master\n", + 'description' => < <{git}->{git_dir} if ref($dir); + require File::Path; + File::Path::mkpath([ map { "$dir/$_" } qw(objects/info refs/heads) ]); + for (my $i = 0; $i < @INIT_FILES; $i++) { + my $f = $dir.'/'.$INIT_FILES[$i++]; + next if -f $f; + open my $fh, '>', $f or die "open $f: $!"; + print $fh $INIT_FILES[$i] or die "print $f: $!"; + close $fh or die "close $f: $!"; + } } sub done { diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 95ffd039..186eb420 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -82,7 +82,7 @@ sub _set_uint ($$$) { sub _set_limiter ($$$) { my ($self, $pi_config, $pfx) = @_; my $lkey = "-${pfx}_limiter"; - $self->{$lkey} ||= eval { + $self->{$lkey} ||= do { # full key is: publicinbox.$NAME.httpbackendmax my $mkey = $pfx.'max'; my $val = $self->{$mkey} or return; @@ -130,7 +130,7 @@ sub version { $_[0]->{version} // 1 } sub git_epoch { my ($self, $epoch) = @_; $self->version == 2 or return; - $self->{"$epoch.git"} ||= eval { + $self->{"$epoch.git"} ||= do { my $git_dir = "$self->{inboxdir}/git/$epoch.git"; my $g = PublicInbox::Git->new($git_dir); $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter}; @@ -141,7 +141,7 @@ sub git_epoch { sub git { my ($self) = @_; - $self->{git} ||= eval { + $self->{git} ||= do { my $git_dir = $self->{inboxdir}; $git_dir .= '/all.git' if $self->version == 2; my $g = PublicInbox::Git->new($git_dir); @@ -219,19 +219,22 @@ sub try_cat { sub description { my ($self) = @_; - $self->{description} //= do { + ($self->{description} //= do { my $desc = try_cat("$self->{inboxdir}/description"); local $/ = "\n"; chomp $desc; $desc =~ s/\s+/ /smg; - $desc eq '' ? '($INBOX_DIR/description missing)' : $desc; - }; + $desc eq '' ? undef : $desc; + }) // '($INBOX_DIR/description missing)'; } sub cloneurl { my ($self) = @_; - $self->{cloneurl} //= - [ split(/\s+/s, try_cat("$self->{inboxdir}/cloneurl")) ]; + ($self->{cloneurl} //= do { + my $s = try_cat("$self->{inboxdir}/cloneurl"); + my @urls = split(/\s+/s, $s); + scalar(@urls) ? \@urls : undef + }) // []; } sub base_url { @@ -308,9 +311,7 @@ sub nntp_usable { # for v1 users w/o SQLite only sub msg_by_path ($$;$) { my ($self, $path, $ref) = @_; - my $str = git($self)->cat_file('HEAD:'.$path, $ref); - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str; - $str; + git($self)->cat_file('HEAD:'.$path, $ref); } sub msg_by_smsg ($$;$) { @@ -321,9 +322,7 @@ sub msg_by_smsg ($$;$) { return unless defined $smsg; defined(my $blob = $smsg->{blob}) or return; - my $str = git($self)->cat_file($blob, $ref); - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str; - $str; + git($self)->cat_file($blob, $ref); } sub smsg_mime { diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index f2ba21fc..31aa76c6 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -111,7 +111,7 @@ sub is_maildir_path ($) { (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; } -sub maildir_path_load ($) { +sub mime_from_path ($) { my ($path) = @_; if (open my $fh, '<', $path) { local $/; @@ -138,7 +138,7 @@ sub import_maildir { opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; while (defined(my $fn = readdir($dh))) { next unless is_maildir_basename($fn); - my $mime = maildir_path_load("$dir/$fn") or next; + my $mime = mime_from_path("$dir/$fn") or next; if (my $filter = $self->filter($im)) { my $ret = $filter->scrub($mime) or return; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index d5beceaf..9995140c 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -106,8 +106,11 @@ sub msg_hdr ($$;$) { 'List-Post', "{-primary_address}>", ); my $crlf = $header_obj->crlf; - my $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970\n" . - $header_obj->as_string; + my $buf = $header_obj->as_string; + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf; + for (my $i = 0; $i < @append; $i += 2) { my $k = $append[$i]; my $v = $append[$i + 1]; diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index d1f75f6f..c79f198b 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -506,6 +506,8 @@ sub set_art { sub msg_hdr_write ($$$) { my ($self, $hdr, $body_follows) = @_; $hdr = $hdr->as_string; + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; utf8::encode($hdr); $hdr =~ s/(?{hdr_buf}}, $filter); $wcb->($r); } - - # Workaround a leak under Perl 5.16.3 when combined with - # Plack::Middleware::Deflater: - $wcb = undef; } sub psgi_return_start { # may run later, much later... diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 05689941..25118f43 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -64,6 +64,7 @@ sub new { $self->{lock_path} = "$inboxdir/ssoma.lock"; my $dir = $self->xdir; $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); + $self->{index_max_size} = $ibx->{index_max_size}; } elsif ($version == 2) { defined $shard or die "shard is required for v2\n"; # shard is a number @@ -551,13 +552,9 @@ sub unindex_both { sub do_cat_mail { my ($git, $blob, $sizeref) = @_; - my $mime = eval { - my $str = $git->cat_file($blob, $sizeref); - # fixup bugs from import: - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - PublicInbox::MIME->new($str); - }; - $@ ? undef : $mime; + my $str = $git->cat_file($blob, $sizeref) or + die "BUG: $blob not found in $git->{git_dir}"; + PublicInbox::MIME->new($str); } # called by public-inbox-index @@ -576,6 +573,16 @@ sub batch_adjust ($$$$$) { } } +sub too_big ($$$) { + my ($self, $git, $oid) = @_; + my $max_size = $self->{index_max_size} or return; + my (undef, undef, $size) = $git->check($oid); + die "E: bad $oid in $git->{git_dir}\n" if !defined($size); + return if $size <= $max_size; + warn "W: skipping $oid ($size > $max_size)\n"; + 1; +} + # only for v1 sub read_log { my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_; @@ -602,7 +609,8 @@ sub read_log { } next; } - my $mime = do_cat_mail($git, $blob, \$bytes) or next; + next if too_big($self, $git, $blob); + my $mime = do_cat_mail($git, $blob, \$bytes); my $smsg = bless {}, 'PublicInbox::Smsg'; batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr); $smsg->{blob} = $blob; @@ -610,7 +618,7 @@ sub read_log { $add_cb->($self, $mime, $smsg); } elsif ($line =~ /$delmsg/o) { my $blob = $1; - $D{$blob} = 1; + $D{$blob} = 1 unless too_big($self, $git, $blob); } elsif ($line =~ /^commit ($h40)/o) { $latest = $1; $newest ||= $latest; @@ -623,7 +631,7 @@ sub read_log { close($log) or die "git log failed: \$?=$?"; # get the leftovers foreach my $blob (keys %D) { - my $mime = do_cat_mail($git, $blob, \$bytes) or next; + my $mime = do_cat_mail($git, $blob, \$bytes); $del_cb->($self, $mime); } $batch_cb->($nr, $latest, $newest); diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 38d1aa6e..60f692b2 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -24,7 +24,16 @@ use PublicInbox::MID qw($MID_EXTRACT); sub thread { my ($msgs, $ordersub, $ctx) = @_; - my $id_table = {}; + + # A. put all current $msgs (non-ghosts) into %id_table + my %id_table = map {; + # this delete saves around 4K across 1K messages + # TODO: move this to a more appropriate place, breaks tests + # if we do it during psgi_cull + delete $_->{num}; + + $_->{mid} => PublicInbox::SearchThread::Msg::cast($_); + } @$msgs; # Sadly, we sort here anyways since the fill-in-the-blanks References: # can be shakier if somebody used In-Reply-To with multiple, disparate @@ -32,36 +41,21 @@ sub thread { # always determine ordering when somebody uses multiple In-Reply-To. # We'll trust the client Date: header here instead of the Received: # time since this is for display (and not retrieval) - _add_message($id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs; + _set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs; my $ibx = $ctx->{-inbox}; my $rootset = [ grep { !delete($_->{parent}) && $_->visible($ibx) - } values %$id_table ]; - $id_table = undef; + } values %id_table ]; $rootset = $ordersub->($rootset); $_->order_children($ordersub, $ctx) for @$rootset; $rootset; } -sub _get_cont_for_id ($$) { - my ($id_table, $mid) = @_; - $id_table->{$mid} ||= PublicInbox::SearchThread::Msg->new($mid); -} - -sub _add_message ($$) { - my ($id_table, $smsg) = @_; - - # A. if id_table... - my $this = _get_cont_for_id($id_table, $smsg->{mid}); - $this->{smsg} = $smsg; - - # saves around 4K across 1K messages - # TODO: move this to a more appropriate place, breaks tests - # if we do it during psgi_cull - delete $smsg->{num}; +sub _set_parent ($$) { + my ($id_table, $this) = @_; # B. For each element in the message's References field: - defined(my $refs = $smsg->{references}) or return; + defined(my $refs = $this->{references}) or return; # This loop exists to help fill in gaps left from missing # messages. It is not needed in a perfect world where @@ -70,7 +64,8 @@ sub _add_message ($$) { my $prev; foreach my $ref ($refs =~ m/$MID_EXTRACT/go) { # Find a Container object for the given Message-ID - my $cont = _get_cont_for_id($id_table, $ref); + my $cont = $id_table->{$ref} //= + PublicInbox::SearchThread::Msg::ghost($ref); # Link the References field's Containers together in # the order implied by the References header @@ -96,22 +91,31 @@ sub _add_message ($$) { } package PublicInbox::SearchThread::Msg; +use base qw(PublicInbox::Smsg); use strict; use warnings; use Carp qw(croak); -sub new { +# declare a ghost smsg (determined by absence of {blob}) +sub ghost { bless { - id => $_[1], + mid => $_[0], children => {}, # becomes an array when sorted by ->order(...) - }, $_[0]; + }, __PACKAGE__; +} + +# give a existing smsg the methods of this class +sub cast { + my ($smsg) = @_; + $smsg->{children} = {}; + bless $smsg, __PACKAGE__; } sub topmost { my ($self) = @_; my @q = ($self); while (my $cont = shift @q) { - return $cont if $cont->{smsg}; + return $cont if $cont->{blob}; push @q, values %{$cont->{children}}; } undef; @@ -122,7 +126,7 @@ sub add_child { croak "Cowardly refusing to become my own parent: $self" if $self == $child; - my $cid = $child->{id}; + my $cid = $child->{mid}; # reparenting: if (defined(my $parent = $child->{parent})) { @@ -148,8 +152,13 @@ sub has_descendent { # being folded/mangled by a MUA, and not a missing message. sub visible ($$) { my ($self, $ibx) = @_; - ($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) || - (scalar values %{$self->{children}}); + return 1 if $self->{blob}; + if (my $by_mid = $ibx->smsg_by_mid($self->{mid})) { + %$self = (%$self, %$by_mid); + 1; + } else { + (scalar values %{$self->{children}}); + } } sub order_children { diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 4fbf59ef..4336e4d9 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -243,8 +243,8 @@ sub search_nav_bot { sub sort_relevance { [ sort { - (eval { $b->topmost->{smsg}->{pct} } // 0) <=> - (eval { $a->topmost->{smsg}->{pct} } // 0) + (eval { $b->topmost->{pct} } // 0) <=> + (eval { $a->topmost->{pct} } // 0) } @{$_[0]} ] } diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index e9efbac7..b50871e8 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -9,7 +9,7 @@ use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD :seek); use POSIX qw(dup2); use IO::Socket::INET; our @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods - run_script start_script key2sub); + run_script start_script key2sub xsys xqx); sub tmpdir (;$) { my ($base) = @_; @@ -87,7 +87,7 @@ sub require_mods { sub key2script ($) { my ($key) = @_; - return $key if (index($key, '/') >= 0); + return $key if ($key eq 'git' || index($key, '/') >= 0); # n.b. we may have scripts which don't start with "public-inbox" in # the future: $key =~ s/\A([-\.])/public-inbox$1/; @@ -244,6 +244,28 @@ sub run_script ($;$$) { sub wait_for_tail () { sleep(2) } +# like system() built-in, but uses spawn() for env/rdr + vfork +sub xsys { + my ($cmd, $env, $rdr) = @_; + if (ref($cmd)) { + $rdr ||= {}; + } else { + $cmd = [ @_ ]; + $env = undef; + $rdr = {}; + } + run_script($cmd, $env, { %$rdr, run_mode => 0 }); + $? >> 8 +} + +# like `backtick` or qx{} op, but uses spawn() for env/rdr + vfork +sub xqx { + my ($cmd, $env, $rdr) = @_; + $rdr //= {}; + run_script($cmd, $env, { %$rdr, run_mode => 0, 1 => \(my $out) }); + wantarray ? split(/^/m, $out) : $out; +} + sub start_script { my ($cmd, $env, $opt) = @_; my ($key, @argv) = @$cmd; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 1c78ef24..01b8bed6 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -120,6 +120,7 @@ sub new { last_commit => [], # git repo -> commit }; $self->{shards} = count_shards($self) || nproc_shards($creat); + $self->{index_max_size} = $v2ibx->{index_max_size}; bless $self, $class; } @@ -730,9 +731,8 @@ sub fill_alternates ($$) { sub git_init { my ($self, $epoch) = @_; my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git"; - my @cmd = (qw(git init --bare -q), $git_dir); - PublicInbox::Import::run_die(\@cmd); - @cmd = (qw/git config/, "--file=$git_dir/config", + PublicInbox::Import::init_bare($git_dir); + my @cmd = (qw/git config/, "--file=$git_dir/config", 'include.path', '../../all.git/config'); PublicInbox::Import::run_die(\@cmd); fill_alternates($self, $epoch); @@ -868,6 +868,7 @@ sub atfork_child { sub mark_deleted ($$$$) { my ($self, $sync, $git, $oid) = @_; + return if PublicInbox::SearchIdx::too_big($self, $git, $oid); my $msgref = $git->cat_file($oid); my $mime = PublicInbox::MIME->new($$msgref); my $mids = mids($mime->header_obj); @@ -980,18 +981,6 @@ sub check_unindexed ($$$) { } } -# reuse Msgmap to store num => oid mapping (rather than num => mid) -sub multi_mid_q_new () { - my ($fh, $fn) = tempfile('multi_mid-XXXXXXX', EXLOCK => 0, TMPDIR => 1); - my $multi_mid = PublicInbox::Msgmap->new_file($fn, 1); - $multi_mid->{dbh}->do('PRAGMA synchronous = OFF'); - # for Msgmap->DESTROY: - $multi_mid->{tmp_name} = $fn; - $multi_mid->{pid} = $$; - close $fh or die "failed to close $fn: $!"; - $multi_mid -} - sub multi_mid_q_push ($$$) { my ($self, $sync, $oid) = @_; my $multi_mid = $sync->{multi_mid} //= PublicInbox::MultiMidQueue->new; @@ -1006,6 +995,7 @@ sub multi_mid_q_push ($$$) { sub reindex_oid ($$$$) { my ($self, $sync, $git, $oid) = @_; + return if PublicInbox::SearchIdx::too_big($self, $git, $oid); my ($num, $mid0, $len); my $msgref = $git->cat_file($oid, \$len); return if $len == 0; # purged diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index b6d7acaf..9b62ed3c 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -297,11 +297,9 @@ sub _th_index_lite { my $nr_c = scalar @$children; my $nr_s = 0; my $siblings; - if (my $smsg = $node->{smsg}) { - # delete saves about 200KB on a 1K message thread - if (my $refs = delete $smsg->{references}) { - ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); - } + # delete saves about 200KB on a 1K message thread + if (my $refs = delete $node->{references}) { + ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); } my $irt_map = $mapping->{$$irt} if defined $$irt; if (defined $irt_map) { @@ -310,12 +308,12 @@ sub _th_index_lite { $rv .= $pad . $irt_map->[0]; if ($idx > 0) { my $prev = $siblings->[$idx - 1]; - my $pmid = $prev->{id}; + my $pmid = $prev->{mid}; if ($idx > 2) { my $s = ($idx - 1). ' preceding siblings ...'; $rv .= pad_link($pmid, $level, $s); } elsif ($idx == 2) { - my $ppmid = $siblings->[0]->{id}; + my $ppmid = $siblings->[0]->{mid}; $rv .= $pad . $mapping->{$ppmid}->[0]; } $rv .= $pad . $mapping->{$pmid}->[0]; @@ -328,26 +326,26 @@ sub _th_index_lite { $attr =~ s!]+>([^<]+)!$1!s; # no point linking to self $rv .= "@ $attr"; if ($nr_c) { - my $cmid = $children->[0]->{id}; + my $cmid = $children->[0]->{mid}; $rv .= $pad . $mapping->{$cmid}->[0]; if ($nr_c > 2) { my $s = ($nr_c - 1). ' more replies'; $rv .= pad_link($cmid, $level + 1, $s); } elsif (my $cn = $children->[1]) { - $rv .= $pad . $mapping->{$cn->{id}}->[0]; + $rv .= $pad . $mapping->{$cn->{mid}}->[0]; } } my $next = $siblings->[$idx+1] if $siblings && $idx >= 0; if ($next) { - my $nmid = $next->{id}; + my $nmid = $next->{mid}; $rv .= $pad . $mapping->{$nmid}->[0]; my $nnext = $nr_s - $idx; if ($nnext > 2) { my $s = ($nnext - 1).' subsequent siblings'; $rv .= pad_link($nmid, $level, $s); } elsif (my $nn = $siblings->[$idx + 2]) { - $rv .= $pad . $mapping->{$nn->{id}}->[0]; + $rv .= $pad . $mapping->{$nn->{mid}}->[0]; } } $rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n"; @@ -369,7 +367,7 @@ sub walk_thread ($$$) { sub pre_thread { # walk_thread callback my ($ctx, $level, $node, $idx) = @_; - $ctx->{mapping}->{$node->{id}} = [ '', $node, $idx, $level ]; + $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ]; skel_dump($ctx, $level, $node); } @@ -388,8 +386,8 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback my $node = shift @$q or next; my $cl = $level + 1; unshift @$q, map { ($cl, $_) } @{$node->{children}}; - if (my $smsg = $ctx->{-inbox}->smsg_mime($node->{smsg})) { - return thread_index_entry($ctx, $level, $smsg); + if ($ctx->{-inbox}->smsg_mime($node)) { + return thread_index_entry($ctx, $level, $node); } else { return ghost_index_entry($ctx, $level, $node); } @@ -407,7 +405,7 @@ sub stream_thread ($$) { my $node = shift @q or next; my $cl = $level + 1; unshift @q, map { ($cl, $_) } @{$node->{children}}; - $smsg = $ibx->smsg_mime($node->{smsg}) and last; + $smsg = $ibx->smsg_mime($node) and last; } return missing_thread($ctx) unless $smsg; @@ -825,7 +823,7 @@ sub indent_for { sub find_mid_root { my ($ctx, $level, $node, $idx) = @_; ++$ctx->{root_idx} if $level == 0; - if ($node->{id} eq $ctx->{mid}) { + if ($node->{mid} eq $ctx->{mid}) { $ctx->{found_mid_at} = $ctx->{root_idx}; return 0; } @@ -899,8 +897,8 @@ sub dedupe_subject { } sub skel_dump { # walk_thread callback - my ($ctx, $level, $node) = @_; - my $smsg = $node->{smsg} or return _skel_ghost($ctx, $level, $node); + my ($ctx, $level, $smsg) = @_; + $smsg->{blob} or return _skel_ghost($ctx, $level, $smsg); my $skel = $ctx->{skel}; my $cur = $ctx->{cur}; @@ -983,7 +981,7 @@ sub skel_dump { # walk_thread callback sub _skel_ghost { my ($ctx, $level, $node) = @_; - my $mid = $node->{id}; + my $mid = $node->{mid}; my $d = ' [not found] '; $d .= ' ' if exists $ctx->{searchview}; $d .= indent_for($level) . th_pfx($level); @@ -1006,18 +1004,23 @@ sub _skel_ghost { sub sort_ds { [ sort { - (eval { $a->topmost->{smsg}->{ds} } || 0) <=> - (eval { $b->topmost->{smsg}->{ds} } || 0) + (eval { $a->topmost->{ds} } || 0) <=> + (eval { $b->topmost->{ds} } || 0) } @{$_[0]} ]; } # accumulate recent topics if search is supported # returns 200 if done, 404 if not sub acc_topic { # walk_thread callback - my ($ctx, $level, $node) = @_; - my $mid = $node->{id}; - my $smsg = $node->{smsg} // $ctx->{-inbox}->smsg_by_mid($mid); - if ($smsg) { + my ($ctx, $level, $smsg) = @_; + my $mid = $smsg->{mid}; + my $has_blob = $smsg->{blob} // do { + if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) { + %$smsg = (%$smsg, %$by_mid); + 1; + } + }; + if ($has_blob) { my $subj = subject_normalized($smsg->{subject}); $subj = '(no subject)' if $subj eq ''; my $ds = $smsg->{ds}; @@ -1208,7 +1211,7 @@ sub thread_adj_level { sub ghost_index_entry { my ($ctx, $level, $node) = @_; my ($beg, $end) = thread_adj_level($ctx, $level); - $beg . '
'. ghost_parent($ctx->{-upfx}, $node->{id})
+	$beg . '
'. ghost_parent($ctx->{-upfx}, $node->{mid})
 		. '
' . $end; } diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index e2024640..7b9e8915 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -11,7 +11,7 @@ use PublicInbox::InboxWritable; use File::Temp 0.19 (); # 0.19 for ->newdir use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Spamcheck; -*maildir_path_load = *PublicInbox::InboxWritable::maildir_path_load; +*mime_from_path = \&PublicInbox::InboxWritable::mime_from_path; sub new { my ($class, $config) = @_; @@ -59,9 +59,11 @@ sub new { my $watch = $ibx->{watch} or return; if (is_maildir($watch)) { my $watch_hdrs = []; - if (my $wh = $ibx->{watchheader}) { - my ($k, $v) = split(/:/, $wh, 2); - push @$watch_hdrs, [ $k, qr/\Q$v\E/ ]; + if (my $whs = $ibx->{watchheader}) { + for (@$whs) { + my ($k, $v) = split(/:/, $_, 2); + push @$watch_hdrs, [ $k, qr/\Q$v\E/ ]; + } } if (my $list_ids = $ibx->{listid}) { for (@$list_ids) { @@ -123,7 +125,7 @@ sub _remove_spam { my ($self, $path) = @_; # path must be marked as (S)een $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return; - my $mime = maildir_path_load($path) or return; + my $mime = mime_from_path($path) or return; $self->{config}->each_inbox(sub { my ($ibx) = @_; eval { @@ -165,7 +167,7 @@ sub _try_path { $warn_cb->(@_); }; foreach my $ibx (@$inboxes) { - my $mime = maildir_path_load($path) or next; + my $mime = mime_from_path($path) or next; my $im = _importer_for($self, $ibx); # any header match means it's eligible for the inbox: diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index aa917ed8..c3fbb1a7 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -20,9 +20,8 @@ sub close {} sub new { my ($class, $ctx, $cb) = @_; - $ctx->{emit_header} = 1; $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env}); - bless { cb => $cb || \&close, ctx => $ctx }, $class; + bless { cb => $cb || \&close, ctx => $ctx, emit_header => 1 }, $class; } sub response { @@ -130,7 +129,7 @@ sub feed_entry { $email = ascii_html($email); my $s = ''; - if (delete $ctx->{emit_header}) { + if (delete $self->{emit_header}) { $s .= atom_header($ctx, $title); } $s .= "$name$email" . diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index 2008ba09..b23a415e 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -151,7 +151,7 @@ sub inbox_config ($$$) { url = https://example.com/$name/ url = http://example.onion/$name/ EOS - for my $k (qw(address listid infourl)) { + for my $k (qw(address listid infourl watchheader)) { defined(my $v = $ibx->{$k}) or next; $$txt .= "\t$k = $_\n" for @$v; } @@ -171,7 +171,7 @@ EOF } } - for my $k (qw(filter newsgroup obfuscate replyto watchheader)) { + for my $k (qw(filter newsgroup obfuscate replyto)) { defined(my $v = $ibx->{$k}) or next; $$txt .= "\t$k = $v\n"; } diff --git a/script/public-inbox-convert b/script/public-inbox-convert index e13c13f4..4c220b36 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -24,7 +24,7 @@ my $old_dir = shift(@ARGV) or die $usage; my $new_dir = shift(@ARGV) or die $usage; die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; -my $config = eval { PublicInbox::Config->new }; +my $config = PublicInbox::Config->new; $old_dir = abs_path($old_dir); my $old; if ($config) { diff --git a/script/public-inbox-edit b/script/public-inbox-edit index ae5d8289..42f914a8 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -22,7 +22,7 @@ my @opt = qw(mid|m=s file|F=s raw); GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or die "bad command-line args\n$usage\n"; -my $cfg = eval { PublicInbox::Config->new }; +my $cfg = PublicInbox::Config->new; my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f" unless (defined $editor) { my $k = 'publicinbox.mailEditor'; @@ -92,9 +92,8 @@ Multiple messages with different content found matching warn "Will edit all of them\n"; } } else { - open my $fh, '<', $file or die "open($file) failed: $!"; - my $orig = do { local $/; <$fh> }; - my $mime = PublicInbox::MIME->new(\$orig); + my $mime = PublicInbox::InboxWritable::mime_from_path($file) or + die "open($file) failed: $!"; my $mids = mids($mime->header_obj); find_mid($found, $_, \@ibxs) for (@$mids); # populates $found my $cid = content_id($mime); diff --git a/script/public-inbox-index b/script/public-inbox-index index 7def9964..2d0f0eca 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -14,8 +14,9 @@ PublicInbox::Admin::require_or_die('-index'); use PublicInbox::Xapcmd; my $compact_opt; -my $opt = { quiet => -1, compact => 0 }; -GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune indexlevel|L=s)) +my $opt = { quiet => -1, compact => 0, maxsize => undef }; +GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune + indexlevel|L=s maxsize|max-size=s)) or die "bad command-line args\n$usage"; die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0; @@ -25,14 +26,22 @@ if ($opt->{compact}) { $compact_opt = { -coarse_lock => 1, compact => 1 }; } -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV); +my $cfg = PublicInbox::Config->new; +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, undef, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR "Usage: $usage\n"; exit 1 } my $mods = {}; +my $max_size = $opt->{maxsize} // $cfg->{lc('publicInbox.indexMaxSize')}; +if (defined $max_size) { + PublicInbox::Admin::parse_unsigned(\$max_size) or + die "`publicInbox.indexMaxSize=$max_size' not parsed\n"; +} + foreach my $ibx (@ibxs) { # XXX: users can shoot themselves in the foot, with opt->{indexlevel} $ibx->{indexlevel} //= $opt->{indexlevel} // PublicInbox::Admin::detect_indexlevel($ibx); + $ibx->{index_max_size} = $max_size; PublicInbox::Admin::scan_ibx_modules($mods, $ibx); } diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 0d6c989b..4c10b68b 100644 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -20,9 +20,9 @@ if ($train !~ /\A(?:ham|spam|rm)\z/) { my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_config = PublicInbox::Config->new; my $err; -my $mime = PublicInbox::MIME->new(eval { +my $mime = PublicInbox::MIME->new(do{ local $/; - my $data = scalar ; + my $data = ; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; if ($train ne 'rm') { @@ -36,7 +36,7 @@ my $mime = PublicInbox::MIME->new(eval { }; $err = $@; } - $data + \$data }); sub remove_or_add ($$$$) { diff --git a/script/public-inbox-mda b/script/public-inbox-mda index f37c7492..54d0af01 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -29,7 +29,7 @@ use PublicInbox::Spamcheck; # in case there's bugs in our code or user error. my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/"; $ems = PublicInbox::Emergency->new($emergency); -my $str = eval { local $/; }; +my $str = do { local $/; }; $str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; $ems->prepare(\$str); my $simple = Email::Simple->new(\$str); diff --git a/script/public-inbox-purge b/script/public-inbox-purge index c9b69c3d..8301b06d 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -21,7 +21,7 @@ GetOptions($opt, @PublicInbox::AdminEdit::OPT) or my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -my $data = do { local $/; scalar }; +my $data = do { local $/; }; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; diff --git a/scripts/import_maildir b/scripts/import_maildir index fbf3f649..f4e82543 100755 --- a/scripts/import_maildir +++ b/scripts/import_maildir @@ -28,7 +28,7 @@ my @msgs; foreach my $sub (qw(cur new)) { foreach my $fn (glob("$dir/$sub/*")) { open my $fh, '<', $fn or next; - my $s = Email::Simple->new(eval { local $/; <$fh> }); + my $s = Email::Simple->new(do { local $/; <$fh> }); my $date = $s->header('Date'); my $t = eval { str2time($date) }; defined $t or next; @@ -45,7 +45,7 @@ my $im = PublicInbox::Import->new($git, $name, $email); while (my $ary = pop @msgs) { my $fn = "$dir/$ary->[1]"; open my $fh, '<', $fn or next; - my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> }); + my $mime = PublicInbox::MIME->new(do { local $/; <$fh> }); $im->add($mime); } $im->done; diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool index e569d004..480e7b4f 100755 --- a/scripts/import_slrnspool +++ b/scripts/import_slrnspool @@ -70,7 +70,7 @@ for (; $exit == 0 && $n < $max; $n++) { $max = $n + $max_gap; print STDERR $fn, "\n"; - my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> }); + my $mime = PublicInbox::MIME->new(do { local $/; <$fh> }); $filter->scrub($mime); $im->add($mime); diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir index 0c21806a..8e444e84 100755 --- a/scripts/slrnspool2maildir +++ b/scripts/slrnspool2maildir @@ -23,7 +23,7 @@ foreach my $sub (qw(cur new tmp)) { foreach my $n (grep(/\d+\z/, glob("$spool/*"))) { if (open my $fh, '<', $n) { - my $f = Email::Filter->new(data => eval { local $/; <$fh> }); + my $f = Email::Filter->new(data => do { local $/; <$fh> }); my $s = $f->simple; # gmane rewrites Received headers, which increases spamminess diff --git a/scripts/ssoma-replay b/scripts/ssoma-replay index 3e928084..07121423 100755 --- a/scripts/ssoma-replay +++ b/scripts/ssoma-replay @@ -30,10 +30,7 @@ use Email::Simple; use URI::Escape qw/uri_escape_utf8/; use File::Temp qw/tempfile/; my ($fh, $filename) = tempfile('ssoma-replay-XXXXXXXX', TMPDIR => 1); -my $msg = eval { - local $/; - Email::Simple->new(); -}; +my $msg = Email::Simple->new(do { local $/; }); select $fh; # Note: the archive URL makes assumptions about where the diff --git a/t/admin.t b/t/admin.t index 41aebe46..c25667b2 100644 --- a/t/admin.t +++ b/t/admin.t @@ -4,13 +4,14 @@ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; +use PublicInbox::Import; use_ok 'PublicInbox::Admin', qw(resolve_repo_dir); my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/v1"; my $v2_dir = "$tmpdir/v2"; my ($res, $err, $v); -is(0, system(qw(git init -q --bare), $git_dir), 'git init v1'); +PublicInbox::Import::init_bare($git_dir); # v1 is(resolve_repo_dir($git_dir), $git_dir, 'top-level GIT_DIR resolved'); @@ -77,4 +78,24 @@ SKIP: { } chdir '/'; + +my @pairs = ( + '1g' => 1024 ** 3, + 666 => 666, + '1500K' => 1500 * 1024, + '15m' => 15 * (1024 ** 2), +); + +while (@pairs) { + my ($in, $out) = splice(@pairs, 0, 2); + my $orig = $in; + ok(PublicInbox::Admin::parse_unsigned(\$in), "parse_unsigned $orig"); + is($in, $out, "got $orig => ($in == $out)"); +} + +for my $v ('', 'bogus', '1p', '1gig') { + ok(!PublicInbox::Admin::parse_unsigned(\$v), + "parse_unsigned rejects $v"); +} + done_testing(); diff --git a/t/altid.t b/t/altid.t index 6c34cdd6..3134e627 100644 --- a/t/altid.t +++ b/t/altid.t @@ -23,9 +23,9 @@ my $ibx; } { - is(system(qw(git init -q --bare), $git_dir), 0, 'git init ok'); my $git = PublicInbox::Git->new($git_dir); my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); + $im->init_bare; $im->add(Email::MIME->create( header => [ From => 'a@example.com', diff --git a/t/cgi.t b/t/cgi.t index 97bdebd9..bceb83e5 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -7,6 +7,7 @@ use warnings; use Test::More; use Email::MIME; use PublicInbox::TestCommon; +use PublicInbox::Import; require_mods(qw(Plack::Handler::CGI Plack::Util)); my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; @@ -18,7 +19,7 @@ my $addr = 'test-public@example.com'; { is(1, mkdir($home, 0755), "setup ~/ for testing"); is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); - is(0, system(qw(git init -q --bare), $maindir), "git init (main)"); + PublicInbox::Import::init_bare($maindir); open my $fh, '>', "$maindir/description" or die "open: $!\n"; print $fh "test for public-inbox\n"; @@ -54,10 +55,14 @@ Date: Thu, 01 Jan 1970 00:00:00 +0000 zzzzzz EOF - $im->add($mime); + ok($im->add($mime), 'added initial message'); + + $mime->header_set('Message-ID', ''); + $mime->body_str_set("z\n" x 1024); + ok($im->add($mime), 'added big message'); # deliver a reply, too - my $reply = Email::MIME->new(<new(< To: Me Cc: $addr @@ -71,7 +76,7 @@ Me wrote: what? EOF - $im->add($reply); + ok($im->add($mime), 'added reply'); my $slashy_mid = 'slashy/asdf@example.com'; my $slashy = Email::MIME->new(<add($slashy); + ok($im->add($slashy), 'added slash'); $im->done; my $res = cgi_run("/test/slashy/asdf\@example.com/raw"); @@ -98,14 +103,9 @@ EOF my $path = "/test/blahblah\@example.com/t.mbox.gz"; my $res = cgi_run($path); like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled"); - my $indexed; - eval { - require DBD::SQLite; - require PublicInbox::SearchIdx; - my $s = PublicInbox::SearchIdx->new($ibx, 1); - $s->index_sync; - $indexed = 1; - }; + my $cmd = ['-index', $ibx->{inboxdir}, '--max-size=2k']; + my $opt = { 2 => \(my $err) }; + my $indexed = run_script($cmd, undef, $opt); if ($indexed) { $res = cgi_run($path); like($res->{head}, qr/^Status: 200 /, "search returned mbox"); @@ -116,9 +116,14 @@ EOF IO::Uncompress::Gunzip::gunzip(\$in => \$out); like($out, qr/^From /m, "From lines in mbox"); }; + $res = cgi_run('/test/toobig@example.com/'); + like($res->{head}, qr/^Status: 300 /, + 'did not index or return >max-size message'); + like($err, qr/skipping [a-f0-9]{40,}/, + 'warned about skipping large OID'); } else { like($res->{head}, qr/^Status: 501 /, "search not available"); - SKIP: { skip 'DBD::SQLite not available', 2 }; + SKIP: { skip 'DBD::SQLite not available', 4 }; } my $have_xml_treepp = eval { require XML::TreePP; 1 } if $indexed; diff --git a/t/config.t b/t/config.t index d08c3b9c..1f50bb86 100644 --- a/t/config.t +++ b/t/config.t @@ -5,12 +5,13 @@ use warnings; use Test::More; use PublicInbox::Config; use PublicInbox::TestCommon; +use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); { - is(system(qw(git init -q --bare), $tmpdir), 0, "git init successful"); + PublicInbox::Import::init_bare($tmpdir); my @cmd = ('git', "--git-dir=$tmpdir", qw(config foo.bar), "hi\nhi"); - is(system(@cmd), 0, "set config"); + is(xsys(@cmd), 0, "set config"); my $tmp = PublicInbox::Config->new("$tmpdir/config"); diff --git a/t/convert-compact.t b/t/convert-compact.t index 70609c7d..af16b701 100644 --- a/t/convert-compact.t +++ b/t/convert-compact.t @@ -20,8 +20,7 @@ my $ibx = { -primary_address => 'test@example.com', }; -ok(PublicInbox::Import::run_die([qw(git init --bare -q), $ibx->{inboxdir}]), - 'initialized v1 repo'); +PublicInbox::Import::init_bare($ibx->{inboxdir}); ok(umask(077), 'set restrictive umask'); ok(PublicInbox::Import::run_die([qw(git) , "--git-dir=$ibx->{inboxdir}", qw(config core.sharedRepository 0644)]), 'set sharedRepository'); diff --git a/t/ds-leak.t b/t/ds-leak.t index ea0eeaa6..72bf0379 100644 --- a/t/ds-leak.t +++ b/t/ds-leak.t @@ -6,10 +6,11 @@ use strict; use warnings; use Test::More; +use PublicInbox::TestCommon; use_ok 'PublicInbox::DS'; if ('close-on-exec for epoll and kqueue') { - use PublicInbox::Spawn qw(spawn); + use PublicInbox::Spawn qw(spawn which); my $pid; my $evfd_re = qr/(?:kqueue|eventpoll)/i; @@ -30,10 +31,12 @@ if ('close-on-exec for epoll and kqueue') { my $l = <$r>; is($l, undef, 'cloexec works and sleep(1) is running'); - my @of = grep(/$evfd_re/, `lsof -p $pid 2>/dev/null`); - my $err = $?; SKIP: { - skip "lsof missing? (\$?=$err)", 1 if $err; + my $lsof = which('lsof') or skip 'lsof missing', 1; + my $rdr = { 2 => \(my $null) }; + my @of = grep(/$evfd_re/, xqx([$lsof, '-p', $pid], {}, $rdr)); + my $err = $?; + skip "lsof broken ? (\$?=$err)", 1 if $err; is_deeply(\@of, [], 'no FDs leaked to subprocess'); }; if (defined $pid) { @@ -44,8 +47,9 @@ if ('close-on-exec for epoll and kqueue') { } SKIP: { - # not bothering with BSD::Resource - chomp(my $n = `/bin/sh -c 'ulimit -n'`); + require_mods('BSD::Resource', 1); + my $rlim = BSD::Resource::RLIMIT_NOFILE(); + my ($n,undef) = BSD::Resource::getrlimit($rlim); # FreeBSD 11.2 with 2GB RAM gives RLIMIT_NOFILE=57987! if ($n > 1024 && !$ENV{TEST_EXPENSIVE}) { diff --git a/t/edit.t b/t/edit.t index 2803dd01..d8833f9c 100644 --- a/t/edit.t +++ b/t/edit.t @@ -118,7 +118,7 @@ $t = 'non-interactive editor failure'; { $t = 'mailEditor set in config'; { $in = $out = $err = ''; - my $rc = system(qw(git config), "--file=$cfgfile", + my $rc = xsys(qw(git config), "--file=$cfgfile", 'publicinbox.maileditor', "$^X -i -p -e 's/boolean prefix/bool pfx/'"); is($rc, 0, 'set publicinbox.mailEditor'); diff --git a/t/feed.t b/t/feed.t index cfa09a7c..ffd5ca7e 100644 --- a/t/feed.t +++ b/t/feed.t @@ -34,7 +34,7 @@ my $git = $ibx->git; my $im = PublicInbox::Import->new($git, $ibx->{name}, 'test@example'); { - is(0, system(qw(git init -q --bare), $git_dir), "git init"); + $im->init_bare; local $ENV{GIT_DIR} = $git_dir; foreach my $i (1..6) { diff --git a/t/git.t b/t/git.t index 8224969d..b05ac123 100644 --- a/t/git.t +++ b/t/git.t @@ -6,15 +6,17 @@ use Test::More; use PublicInbox::TestCommon; my ($dir, $for_destroy) = tmpdir(); use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Import; use_ok 'PublicInbox::Git'; { - is(system(qw(git init -q --bare), $dir), 0, 'created git directory'); + PublicInbox::Import::init_bare($dir); my $fi_data = './t/git.fast-import-data'; - ok(-r $fi_data, "fast-import data readable (or run test at top level)"); - local $ENV{GIT_DIR} = $dir; - system("git fast-import --quiet <$fi_data"); + open my $fh, '<', $fi_data or die + "fast-import data readable (or run test at top level: $!"; + my $rdr = { 0 => $fh }; + xsys([qw(git fast-import --quiet)], { GIT_DIR => $dir }, $rdr); is($?, 0, 'fast-import succeeded'); } @@ -57,18 +59,14 @@ use_ok 'PublicInbox::Git'; } if (1) { - my $cmd = [ 'git', "--git-dir=$dir", qw(hash-object -w --stdin) ]; - # need a big file, use the AGPL-3.0 :p my $big_data = './COPYING'; ok(-r $big_data, 'COPYING readable'); my $size = -s $big_data; ok($size > 8192, 'file is big enough'); - - my $buf = do { - local $ENV{GIT_DIR} = $dir; - `git hash-object -w --stdin <$big_data`; - }; + open my $fh, '<', $big_data or die; + my $cmd = [ 'git', "--git-dir=$dir", qw(hash-object -w --stdin) ]; + my $buf = xqx($cmd, { GIT_DIR => $dir }, { 0 => $fh }); is(0, $?, 'hashed object successfully'); chomp $buf; @@ -90,7 +88,7 @@ if (1) { if ('alternates reloaded') { my ($alt, $alt_obj) = tmpdir(); my @cmd = ('git', "--git-dir=$alt", qw(hash-object -w --stdin)); - is(system(qw(git init -q --bare), $alt), 0, 'create alt directory'); + PublicInbox::Import::init_bare($alt); open my $fh, '<', "$alt/config" or die "open failed: $!\n"; my $rd = popen_rd(\@cmd, {}, { 0 => $fh } ); close $fh or die "close failed: $!"; diff --git a/t/hl_mod.t b/t/hl_mod.t index a4ef4a28..95057354 100644 --- a/t/hl_mod.t +++ b/t/hl_mod.t @@ -4,7 +4,8 @@ use strict; use warnings; use Test::More; -use PublicInbox::Spawn qw(which spawn); +use PublicInbox::Spawn qw(which); +use PublicInbox::TestCommon; use IO::Handle; # ->autoflush use Fcntl qw(:seek); eval { require highlight } or @@ -29,21 +30,14 @@ my $orig = $str; is($$ref, $$lref, 'do_hl_lang matches do_hl'); SKIP: { - which('w3m') or skip 'w3m(1) missing to check output', 1; - my $cmd = [ qw(w3m -T text/html -dump -config /dev/null) ]; - open my $in, '+>', undef or die; - open my $out, '+>', undef or die; - my $rdr = { 0 => fileno($in), 1 => fileno($out) }; - $in->autoflush(1); - print $in '
', $$ref, '
' or die; - $in->seek(0, SEEK_SET) or die; - my $pid = spawn($cmd, undef, $rdr); - waitpid($pid, 0); + my $w3m = which('w3m') or + skip('w3m(1) missing to check output', 1); + my $cmd = [ $w3m, qw(-T text/html -dump -config /dev/null) ]; + my $in = '
' . $$ref . '
'; + my $out = xqx($cmd, undef, { 0 => \$in }); # expand tabs and normalize whitespace, # w3m doesn't preserve tabs $orig =~ s/\t/ /gs; - $out->seek(0, SEEK_SET) or die; - $out = do { local $/; <$out> }; $out =~ s/\s*\z//sg; $orig =~ s/\s*\z//sg; is($out, $orig, 'w3m output matches'); diff --git a/t/html_index.t b/t/html_index.t index 158a7862..fda3962a 100644 --- a/t/html_index.t +++ b/t/html_index.t @@ -22,7 +22,7 @@ my $im = PublicInbox::Import->new($git, 'tester', 'test@example'); # setup { - is(0, system(qw(git init -q --bare), $git_dir), "git init"); + $im->init_bare; my $prev = ""; foreach my $i (1..6) { diff --git a/t/httpd-corner.t b/t/httpd-corner.t index f25a9a9c..7a6bcc66 100644 --- a/t/httpd-corner.t +++ b/t/httpd-corner.t @@ -6,7 +6,7 @@ use strict; use warnings; use Test::More; use Time::HiRes qw(gettimeofday tv_interval); -use PublicInbox::Spawn qw(which spawn); +use PublicInbox::Spawn qw(which spawn popen_rd); use PublicInbox::TestCommon; require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status)); use Digest::SHA qw(sha1_hex); @@ -26,9 +26,6 @@ my $psgi = "./t/httpd-corner.psgi"; my $sock = tcp_server() or die; my @zmods = qw(PublicInbox::GzipFilter IO::Uncompress::Gunzip); -# make sure stdin is not a pipe for lsof test to check for leaking pipes -open(STDIN, '<', '/dev/null') or die 'no /dev/null: $!'; - # Make sure we don't clobber socket options set by systemd or similar # using socket activation: my ($defer_accept_val, $accf_arg, $TCP_DEFER_ACCEPT); @@ -308,12 +305,12 @@ my $check_self = sub { }; SKIP: { - which('curl') or skip('curl(1) missing', 4); + my $curl = which('curl') or skip('curl(1) missing', 4); my $base = 'http://' . $sock->sockhost . ':' . $sock->sockport; my $url = "$base/sha1"; my ($r, $w); pipe($r, $w) or die "pipe: $!"; - my $cmd = [qw(curl --tcp-nodelay --no-buffer -T- -HExpect: -sS), $url]; + my $cmd = [$curl, qw(--tcp-nodelay -T- -HExpect: -sSN), $url]; open my $cout, '+>', undef or die; open my $cerr, '>', undef or die; my $rdr = { 0 => $r, 1 => $cout, 2 => $cerr }; @@ -330,7 +327,7 @@ SKIP: { seek($cout, 0, SEEK_SET); is(<$cout>, sha1_hex($str), 'read expected body'); - open my $fh, '-|', qw(curl -sS), "$base/async-big" or die $!; + my $fh = popen_rd([$curl, '-sS', "$base/async-big"]); my $n = 0; my $non_zero = 0; while (1) { @@ -338,15 +335,14 @@ SKIP: { $n += $r; $buf =~ /\A\0+\z/ or $non_zero++; } - close $fh or die "curl errored out \$?=$?"; + close $fh or die "close curl pipe: $!"; + is($?, 0, 'curl succesful'); is($n, 30 * 1024 * 1024, 'got expected output from curl'); is($non_zero, 0, 'read all zeros'); - require_mods(@zmods, 1); - open $fh, '-|', qw(curl -sS), "$base/psgi-return-gzip" or die; - binmode $fh; - my $buf = do { local $/; <$fh> }; - close $fh or die "curl errored out \$?=$?"; + require_mods(@zmods, 2); + my $buf = xqx([$curl, '-sS', "$base/psgi-return-gzip"]); + is($?, 0, 'curl succesful'); IO::Uncompress::Gunzip::gunzip(\$buf => \(my $out)); is($out, "hello world\n"); } @@ -605,12 +601,14 @@ SKIP: { SKIP: { skip 'only testing lsof(8) output on Linux', 1 if $^O ne 'linux'; - skip 'no lsof in PATH', 1 unless which('lsof'); - my @lsof = `lsof -p $td->{pid}`; + my $lsof = which('lsof') or skip 'no lsof in PATH', 1; + my $null_in = ''; + my $rdr = { 2 => \(my $null_err), 0 => \$null_in }; + my @lsof = xqx([$lsof, '-p', $td->{pid}], undef, $rdr); is_deeply([grep(/\bdeleted\b/, @lsof)], [], 'no lingering deleted inputs'); # filter out pipes inherited from the parent - my @this = `lsof -p $$`; + my @this = xqx([$lsof, '-p', $$], undef, $rdr); my $bad; my $extract_inodes = sub { map {; diff --git a/t/httpd.t b/t/httpd.t index 11511c73..61aec3b4 100644 --- a/t/httpd.t +++ b/t/httpd.t @@ -64,22 +64,22 @@ EOF is($conn->read($buf, 1), 0, "EOF"); } - is(system(qw(git clone -q --mirror), + is(xsys(qw(git clone -q --mirror), "http://$host:$port/$group", "$tmpdir/clone.git"), 0, 'smart clone successful'); # ensure dumb cloning works, too: - is(system('git', "--git-dir=$maindir", + is(xsys('git', "--git-dir=$maindir", qw(config http.uploadpack false)), 0, 'disable http.uploadpack'); - is(system(qw(git clone -q --mirror), + is(xsys(qw(git clone -q --mirror), "http://$host:$port/$group", "$tmpdir/dumb.git"), 0, 'clone successful'); ok($td->kill, 'killed httpd'); $td->join; - is(system('git', "--git-dir=$tmpdir/clone.git", + is(xsys('git', "--git-dir=$tmpdir/clone.git", qw(fsck --no-verbose)), 0, 'fsck on cloned directory successful'); } diff --git a/t/import.t b/t/import.t index 703aa362..79af9846 100644 --- a/t/import.t +++ b/t/import.t @@ -12,10 +12,9 @@ use File::Temp qw/tempfile/; use PublicInbox::TestCommon; my ($dir, $for_destroy) = tmpdir(); -is(system(qw(git init -q --bare), $dir), 0, 'git init successful'); my $git = PublicInbox::Git->new($dir); - my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); +$im->init_bare; my $mime = PublicInbox::MIME->create( header => [ From => 'a@example.com', diff --git a/t/inbox.t b/t/inbox.t index 5f86440d..b59d5dba 100644 --- a/t/inbox.t +++ b/t/inbox.t @@ -4,12 +4,31 @@ use strict; use warnings; use Test::More; use_ok 'PublicInbox::Inbox'; +use File::Temp 0.19 (); my $x = PublicInbox::Inbox->new({url => [ '//example.com/test/' ]}); is($x->base_url, 'https://example.com/test/', 'expanded protocol-relative'); $x = PublicInbox::Inbox->new({url => [ 'http://example.com/test' ]}); is($x->base_url, 'http://example.com/test/', 'added trailing slash'); $x = PublicInbox::Inbox->new({}); + is($x->base_url, undef, 'undef base_url allowed'); +my $tmpdir = File::Temp->newdir('pi-inbox-XXXXXX', TMPDIR => 1); +$x->{inboxdir} = $tmpdir->dirname; +is_deeply($x->cloneurl, [], 'no cloneurls'); +is($x->description, '($INBOX_DIR/description missing)', 'default description'); +{ + open my $fh, '>', "$x->{inboxdir}/cloneurl" or die; + print $fh "https://example.com/inbox\n" or die; + close $fh or die; + open $fh, '>', "$x->{inboxdir}/description" or die; + print $fh "blah\n" or die; + close $fh or die; +} +is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls update'); +is($x->description, 'blah', 'description updated'); +is(unlink(glob("$x->{inboxdir}/*")), 2, 'unlinked cloneurl & description'); +is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls memoized'); +is($x->description, 'blah', 'description memoized'); done_testing(); diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index 77c52718..f2200306 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -59,7 +59,7 @@ sub import_index_incremental { push @cmd, "$ibx->{inboxdir}/git/0.git", "$mirror/git/0.git"; } my $fetch_dir = $cmd[-1]; - is(system(@cmd), 0, "v$v clone OK"); + is(xsys(@cmd), 0, "v$v clone OK"); # inbox init local $ENV{PI_CONFIG} = "$tmpdir/.picfg"; @@ -86,7 +86,7 @@ sub import_index_incremental { $im->done; # mirror updates - is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); + is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script(['-index', $mirror]), "v$v index mirror again OK"); ($nr, $msgs) = $ro_mirror->recent; is($nr, 2, '2nd message seen in mirror'); @@ -123,7 +123,7 @@ sub import_index_incremental { } # sync the mirror - is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); + is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script(['-index', $mirror]), "v$v index mirror again OK"); ($nr, $msgs) = $ro_mirror->recent; is($nr, 1, '2nd message gone from mirror'); @@ -148,7 +148,7 @@ sub import_index_incremental { push @expect, $i; } $im->done; - is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); + is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script(['-index', '--reindex', $mirror]), "v$v index --reindex mirror OK"); @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; diff --git a/t/init.t b/t/init.t index a78c2fc8..94c6184e 100644 --- a/t/init.t +++ b/t/init.t @@ -105,7 +105,8 @@ done_testing(); sub read_indexlevel { my ($inbox) = @_; - local $ENV{GIT_CONFIG} = "$ENV{PI_DIR}/config"; - chomp(my $lvl = `git config publicinbox.$inbox.indexlevel`); + my $cmd = [ qw(git config), "publicinbox.$inbox.indexlevel" ]; + my $env = { GIT_CONFIG => "$ENV{PI_DIR}/config" }; + chomp(my $lvl = xqx($cmd, $env)); $lvl; } diff --git a/t/iso-2202-jp.mbox b/t/iso-2202-jp.eml similarity index 84% rename from t/iso-2202-jp.mbox rename to t/iso-2202-jp.eml index 1a8e1974..9e0bbad4 100644 --- a/t/iso-2202-jp.mbox +++ b/t/iso-2202-jp.eml @@ -1,4 +1,3 @@ -From historical@ruby-dev Thu Jan 1 00:00:00 1970 Message-Id: <199707281508.AAA24167@hoyogw.example> Date: Tue, 29 Jul 97 00:08:29 +0900 From: matz@example.com diff --git a/t/mda.t b/t/mda.t index ddc0c279..dc691616 100644 --- a/t/mda.t +++ b/t/mda.t @@ -7,7 +7,9 @@ use Email::MIME; use Cwd qw(getcwd); use PublicInbox::MID qw(mid2path); use PublicInbox::Git; +use PublicInbox::InboxWritable; use PublicInbox::TestCommon; +use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $pi_home = "$home/.public-inbox"; @@ -47,7 +49,7 @@ my $fail_bad_header = sub ($$$) { "spamc mock found (run in top of source tree"); is(1, mkdir($home, 0755), "setup ~/ for testing"); is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); - is(0, system(qw(git init -q --bare), $maindir), "git init (main)"); + PublicInbox::Import::init_bare($maindir); open my $fh, '>>', $pi_config or die; print $fh < }; - close $fh; - my $msg = Email::MIME->new($str); - + my $eml = 't/utf8.eml'; + my $msg = PublicInbox::InboxWritable::mime_from_path($eml) or + die "failed to open $eml: $!"; my $from = $msg->header('From'); my ($author) = PublicInbox::Address::names($from); my ($email) = PublicInbox::Address::emails($from); @@ -299,7 +298,7 @@ Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF - system(qw(git config --file), $pi_config, "$cfgpfx.listid", $list_id); + xsys(qw(git config --file), $pi_config, "$cfgpfx.listid", $list_id); $? == 0 or die "failed to set listid $?"; my $in = $simple->as_string; ok(run_script(['-mda'], undef, { 0 => \$in }), diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t index 6f288b7e..f2cbe9d5 100644 --- a/t/mda_filter_rubylang.t +++ b/t/mda_filter_rubylang.t @@ -14,7 +14,7 @@ my $pi_config = "$tmpdir/pi_config"; local $ENV{PI_CONFIG} = $pi_config; local $ENV{PI_EMERGENCY} = "$tmpdir/emergency"; my @cfg = ('git', 'config', "--file=$pi_config"); -is(system(@cfg, 'publicinboxmda.spamcheck', 'none'), 0); +is(xsys(@cfg, 'publicinboxmda.spamcheck', 'none'), 0); for my $v (qw(V1 V2)) { my @warn; @@ -26,8 +26,8 @@ for my $v (qw(V1 V2)) { "http://example.com/$v", $addr ]; ok(run_script($cmd), 'public-inbox-init'); ok(run_script(['-index', $inboxdir]), 'public-inbox-index'); - is(system(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0); - is(system(@cfg, "$cfgpfx.altid", + is(xsys(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0); + is(xsys(@cfg, "$cfgpfx.altid", 'serial:alerts:file=msgmap.sqlite3'), 0); for my $i (1..2) { diff --git a/t/msg_iter.t b/t/msg_iter.t index d303564f..573ee412 100644 --- a/t/msg_iter.t +++ b/t/msg_iter.t @@ -5,6 +5,7 @@ use warnings; use Test::More; use Email::MIME; use PublicInbox::Hval qw(ascii_html); +use PublicInbox::InboxWritable; use_ok('PublicInbox::MsgIter'); { @@ -42,12 +43,9 @@ use_ok('PublicInbox::MsgIter'); } { - my $f = 't/iso-2202-jp.mbox'; - my $mime = Email::MIME->new(do { - open my $fh, '<', $f or die "open($f): $!"; - local $/; - <$fh>; - }); + my $f = 't/iso-2202-jp.eml'; + my $mime = PublicInbox::InboxWritable::mime_from_path($f) or + die "open $f: $!"; my $raw = ''; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; @@ -61,12 +59,8 @@ use_ok('PublicInbox::MsgIter'); { my $f = 't/x-unknown-alpine.eml'; - my $mime = Email::MIME->new(do { - open my $fh, '<', $f or die "open($f): $!"; - local $/; - binmode $fh; - <$fh>; - }); + my $mime = PublicInbox::InboxWritable::mime_from_path($f) or + die "open $f: $!"; my $raw = ''; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; diff --git a/t/multi-mid.t b/t/multi-mid.t index 31a8fd74..5afb9693 100644 --- a/t/multi-mid.t +++ b/t/multi-mid.t @@ -65,7 +65,7 @@ for my $order ([$bad, $good], [$good, $bad]) { my @v2 = ($ibx->over->get_art(1), $ibx->over->get_art(2)); is_deeply(\@v2, \@old, 'v2 conversion times match'); - system(qw(git clone -sq --mirror), "$tmpdir/v2/git/0.git", + xsys(qw(git clone -sq --mirror), "$tmpdir/v2/git/0.git", "$tmpdir/v2-clone/git/0.git") == 0 or die "clone: $?"; $cmd = [ '-init', '-Lbasic', '-V2', 'v2c', "$tmpdir/v2-clone", 'http://example.com/v2c', 'v2c@example.com' ]; diff --git a/t/nntpd-tls.t b/t/nntpd-tls.t index 0714631d..a0522e1f 100644 --- a/t/nntpd-tls.t +++ b/t/nntpd-tls.t @@ -63,11 +63,9 @@ EOF { my $im = $ibx->importer(0); - my $mime = PublicInbox::MIME->new(do { - open my $fh, '<', 't/data/0001.patch' or die; - local $/; - <$fh> - }); + my $eml = 't/data/0001.patch'; + my $mime = PublicInbox::InboxWritable::mime_from_path($eml) or + die "open $eml: $!"; ok($im->add($mime), 'message added'); $im->done; if ($version == 1) { diff --git a/t/nntpd.t b/t/nntpd.t index 826e3f3d..5a3a62fb 100644 --- a/t/nntpd.t +++ b/t/nntpd.t @@ -4,9 +4,8 @@ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; +use PublicInbox::Spawn qw(which); require_mods(qw(DBD::SQLite)); -require PublicInbox::SearchIdx; -require PublicInbox::Msgmap; require PublicInbox::InboxWritable; use Email::Simple; use IO::Socket; @@ -48,7 +47,7 @@ $ibx = PublicInbox::Inbox->new($ibx); my @cmd = ('-init', $group, $inboxdir, 'http://example.com/', $addr); push @cmd, "-V$version", '-Lbasic'; ok(run_script(\@cmd), 'init OK'); - is(system(qw(git config), "--file=$home/.public-inbox/config", + is(xsys(qw(git config), "--file=$home/.public-inbox/config", "publicinbox.$group.newsgroup", $group), 0, 'enabled newsgroup'); my $len; @@ -78,8 +77,8 @@ EOF $im->add($mime); $im->done; if ($version == 1) { - my $s = PublicInbox::SearchIdx->new($ibx, 1); - $s->index_sync; + ok(run_script(['-index', $ibx->{inboxdir}]), + 'indexed v1'); } } @@ -253,8 +252,8 @@ Date: Fri, 02 Oct 1993 00:00:00 +0000 $im->add($for_leafnode); $im->done; if ($version == 1) { - my $s = PublicInbox::SearchIdx->new($ibx, 1); - $s->index_sync; + ok(run_script(['-index', $ibx->{inboxdir}]), + 'indexed v1'); } my $hdr = $n->head("<$long_hdr>"); my $expect = qr/\AMessage-ID: /i . qr/\Q<$long_hdr>\E/; @@ -304,7 +303,9 @@ Date: Fri, 02 Oct 1993 00:00:00 +0000 if ($INC{'Search/Xapian.pm'} && ($ENV{TEST_RUN_MODE}//2)) { skip 'Search/Xapian.pm pre-loaded (by t/run.perl?)', 1; } - my @of = `lsof -p $td->{pid} 2>/dev/null`; + my $lsof = which('lsof') or skip 'lsof missing', 1; + my $rdr = { 2 => \(my $null) }; + my @of = xqx([$lsof, '-p', $td->{pid}], undef, $rdr); skip('lsof broken', 1) if (!scalar(@of) || $?); my @xap = grep m!Search/Xapian!, @of; is_deeply(\@xap, [], 'Xapian not loaded in nntpd'); @@ -328,12 +329,12 @@ Date: Fri, 02 Oct 1993 00:00:00 +0000 $n = $s = undef; $td->join; - my $eout = eval { - local $/; + is($?, 0, 'no error in exited process'); + my $eout = do { open my $fh, '<', $err or die "open $err failed: $!"; + local $/; <$fh>; }; - is($?, 0, 'no error in exited process'); unlike($eout, qr/wide/i, 'no Wide character warnings'); } diff --git a/t/nulsubject.t b/t/nulsubject.t index fec6c1ae..1ded88d3 100644 --- a/t/nulsubject.t +++ b/t/nulsubject.t @@ -11,9 +11,9 @@ my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; { - is(system(qw(git init -q --bare), $git_dir), 0, 'git init ok'); my $git = PublicInbox::Git->new($git_dir); my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); + $im->init_bare; $im->add(Email::MIME->create( header => [ From => 'a@example.com', @@ -25,7 +25,8 @@ my $git_dir = "$tmpdir/a.git"; body => "hello world\n", )); $im->done; - is(system(qw(git --git-dir), $git_dir, 'fsck', '--strict'), 0, 'git fsck ok'); + is(xsys(qw(git --git-dir), $git_dir, 'fsck', '--strict'), 0, + 'git fsck ok'); } done_testing(); diff --git a/t/psgi_attach.t b/t/psgi_attach.t index 297e92e1..a47f3754 100644 --- a/t/psgi_attach.t +++ b/t/psgi_attach.t @@ -21,9 +21,9 @@ my $config = PublicInbox::Config->new(\<new($maindir); my $im = PublicInbox::Import->new($git, 'test', $addr); +$im->init_bare; { open my $fh, '<', '/dev/urandom' or die "unable to open urandom: $!\n"; diff --git a/t/psgi_mount.t b/t/psgi_mount.t index 8cac3502..3afb1fb5 100644 --- a/t/psgi_mount.t +++ b/t/psgi_mount.t @@ -21,9 +21,9 @@ my $config = PublicInbox::Config->new(\<new($maindir); my $im = PublicInbox::Import->new($git, 'test', $addr); +$im->init_bare; { my $mime = Email::MIME->new(< diff --git a/t/psgi_text.t b/t/psgi_text.t index 6d8a518d..77fc9ee2 100644 --- a/t/psgi_text.t +++ b/t/psgi_text.t @@ -21,7 +21,7 @@ my $config = PublicInbox::Config->new(\<new($config); test_psgi(sub { $www->call(@_) }, sub { diff --git a/t/psgi_v2.t b/t/psgi_v2.t index c4f80869..57017de1 100644 --- a/t/psgi_v2.t +++ b/t/psgi_v2.t @@ -26,16 +26,16 @@ my $new_mid; my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; -my $mime = PublicInbox::MIME->create( - header => [ - From => 'a@example.com', - To => 'test@example.com', - Subject => 'this is a subject', - 'Message-ID' => '', - Date => 'Fri, 02 Oct 1993 00:00:00 +0000', - ], - body => "hello world\n", -); +my $mime = PublicInbox::MIME->new(<<'EOF'); +From oldbug-pre-a0c07cba0e5d8b6a Fri Oct 2 00:00:00 1993 +From: a@example.com +To: test@example.com +Subject: this is a subject +Message-ID: +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +hello world +EOF ok($im->add($mime), 'added one message'); $mime->body_set("hello world!\n"); @@ -48,6 +48,10 @@ my $mids = mids($mime->header_obj); $new_mid = $mids->[1]; $im->done; +my $msg = $ibx->msg_by_mid('a-mid@b'); +like($$msg, qr/\AFrom oldbug/s, + '"From_" line stored to test old bug workaround'); + my $cfgpfx = "publicinbox.v2test"; my $cfg = <{-primary_address} @@ -63,6 +67,7 @@ test_psgi(sub { $www->call(@_) }, sub { 'got v2 description missing message'); $res = $cb->(GET('/v2test/a-mid@b/raw')); $raw = $res->content; + unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($raw, qr/^hello world$/m, 'got first message'); like($raw, qr/^hello world!$/m, 'got second message'); @from_ = ($raw =~ m/^From /mg); @@ -123,6 +128,7 @@ test_psgi(sub { $www->call(@_) }, sub { my $out; my $in = $res->content; my $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); + unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in t.mbox.gz'); like($out, qr/^hello world!$/m, 'got second in t.mbox.gz'); like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz'); @@ -133,6 +139,7 @@ test_psgi(sub { $www->call(@_) }, sub { $res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); + unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in mbox POST'); like($out, qr/^hello world!$/m, 'got second in mbox POST'); like($out, qr/^hello ghosts$/m, 'got third in mbox POST'); @@ -143,6 +150,7 @@ test_psgi(sub { $www->call(@_) }, sub { $res = $cb->(GET('/v2test/all.mbox.gz')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); + unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in all.mbox'); like($out, qr/^hello world!$/m, 'got second in all.mbox'); like($out, qr/^hello ghosts$/m, 'got third in all.mbox'); diff --git a/t/replace.t b/t/replace.t index a44560fc..2efa25f1 100644 --- a/t/replace.t +++ b/t/replace.t @@ -95,7 +95,7 @@ EOF for my $dir (glob("$ibx->{inboxdir}/git/*.git")) { my ($bn) = ($dir =~ m!([^/]+)\z!); - is(system(qw(git --git-dir), $dir, + is(xsys(qw(git --git-dir), $dir, qw(fsck --strict --no-progress)), 0, "git fsck is clean in epoch $bn"); } diff --git a/t/search-thr-index.t b/t/search-thr-index.t index f073304a..6c564b19 100644 --- a/t/search-thr-index.t +++ b/t/search-thr-index.t @@ -11,10 +11,11 @@ require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; require PublicInbox::Smsg; require PublicInbox::Inbox; +use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; -is(0, system(qw(git init -q --bare), $git_dir), "git init (main)"); +PublicInbox::Import::init_bare($git_dir); my $ibx = PublicInbox::Inbox->new({inboxdir => $git_dir}); my $rw = PublicInbox::SearchIdx->new($ibx, 1); ok($rw, "search indexer created"); diff --git a/t/search.t b/t/search.t index 839a320a..8508f273 100644 --- a/t/search.t +++ b/t/search.t @@ -7,13 +7,14 @@ use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; require PublicInbox::Inbox; +require PublicInbox::InboxWritable; use Email::MIME; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir }); my ($root_id, $last_id); -is(0, system(qw(git init --shared -q --bare), $git_dir), "git init (main)") +is(0, xsys(qw(git init --shared -q --bare), $git_dir), "git init (main)") or BAIL_OUT("`git init --shared' failed, weird FS or seccomp?"); eval { PublicInbox::Search->new($ibx)->xdb }; ok($@, "exception raised on non-existent DB"); @@ -290,14 +291,9 @@ $ibx->with_umask(sub { }); $ibx->with_umask(sub { - my $str = eval { - my $mbox = 't/utf8.mbox'; - open(my $fh, '<', $mbox) or die "failed to open mbox: $mbox\n"; - local $/; - <$fh> - }; - $str =~ s/\AFrom [^\n]+\n//s; - my $mime = Email::MIME->new($str); + my $eml = 't/utf8.eml'; + my $mime = PublicInbox::InboxWritable::mime_from_path($eml) or + die "open $eml: $!"; my $doc_id = $rw->add_message($mime); ok($doc_id > 0, 'message indexed doc_id with UTF-8'); my $msg = $rw->query('m:testmessage@example.com', {limit => 1})->[0]; diff --git a/t/solver_git.t b/t/solver_git.t index 2dbb07b0..7f79ff4c 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -8,8 +8,9 @@ use PublicInbox::TestCommon; require_git(2.6); use PublicInbox::Spawn qw(popen_rd); require_mods(qw(DBD::SQLite Search::Xapian Plack::Util)); -chomp(my $git_dir = `git rev-parse --git-dir 2>/dev/null`); -plan skip_all => "$0 must be run from a git working tree" if $?; +my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)}); +$? == 0 or plan skip_all => "$0 must be run from a git working tree"; +chomp $git_dir; # needed for alternates, and --absolute-git-dir is only in git 2.13+ $git_dir = abs_path($git_dir); @@ -28,8 +29,8 @@ my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; my $deliver_patch = sub ($) { - open my $fh, '<', $_[0] or die "open: $!"; - my $mime = PublicInbox::MIME->new(do { local $/; <$fh> }); + my $mime = PublicInbox::InboxWritable::mime_from_path($_[0]) or + die "open $_[0]: $!"; $im->add($mime); $im->done; }; @@ -109,7 +110,8 @@ SKIP: { require_mods(@psgi, 7 + scalar(@psgi)); use_ok($_) for @psgi; my $binfoo = "$inboxdir/binfoo.git"; - system(qw(git init --bare -q), $binfoo) == 0 or die "git init: $?"; + require PublicInbox::Import; + PublicInbox::Import::init_bare($binfoo); require_ok 'PublicInbox::ViewVCS'; my $big_size = do { no warnings 'once'; diff --git a/t/thread-cycle.t b/t/thread-cycle.t index e9ea0a27..d6545c6d 100644 --- a/t/thread-cycle.t +++ b/t/thread-cycle.t @@ -19,6 +19,7 @@ sub make_objs { my $msg = $_; $msg->{ds} ||= ++$n; $msg->{references} =~ s/\s+/ /sg if $msg->{references}; + $msg->{blob} = '0'x40; # any dummy value will do, here my $simple = Email::Simple->create(header => [ 'Message-ID' => "<$msg->{mid}>", 'References' => $msg->{references}, @@ -100,13 +101,13 @@ done_testing(); sub thread_to_s { my ($msgs) = @_; my $rootset = PublicInbox::SearchThread::thread($msgs, sub { - [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] }); + [ sort { $a->{mid} cmp $b->{mid} } @{$_[0]} ] }); my $st = ''; my @q = map { (0, $_) } @$rootset; while (@q) { my $level = shift @q; my $node = shift @q or next; - $st .= (" "x$level). "$node->{id}\n"; + $st .= (" "x$level). "$node->{mid}\n"; my $cl = $level + 1; unshift @q, map { ($cl, $_) } @{$node->{children}}; } diff --git a/t/utf8.mbox b/t/utf8.eml similarity index 90% rename from t/utf8.mbox rename to t/utf8.eml index cebaf9b0..9bf1002c 100644 --- a/t/utf8.mbox +++ b/t/utf8.eml @@ -1,4 +1,3 @@ -From e@yhbt.net Thu Jan 01 00:00:00 1970 Date: Thu, 01 Jan 1970 00:00:00 +0000 To: =?utf-8?Q?El=C3=A9anor?= From: =?utf-8?Q?El=C3=A9anor?= diff --git a/t/v1-add-remove-add.t b/t/v1-add-remove-add.t index 2867bb94..fdf06a96 100644 --- a/t/v1-add-remove-add.t +++ b/t/v1-add-remove-add.t @@ -9,7 +9,6 @@ use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; my ($inboxdir, $for_destroy) = tmpdir(); -is(system(qw(git init --bare -q), $inboxdir), 0); my $ibx = { inboxdir => $inboxdir, name => 'test-add-remove-add', @@ -27,8 +26,9 @@ my $mime = PublicInbox::MIME->create( body => "hello world\n", ); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); +$im->init_bare; ok($im->add($mime), 'message added'); -ok($im->remove($mime), 'message added'); +ok($im->remove($mime), 'message removed'); ok($im->add($mime), 'message added again'); $im->done; my $rw = PublicInbox::SearchIdx->new($ibx, 1); diff --git a/t/v1reindex.t b/t/v1reindex.t index 240e28f9..378c8efb 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -12,7 +12,6 @@ require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::SearchIdx'; use_ok 'PublicInbox::Import'; my ($inboxdir, $for_destroy) = tmpdir(); -is(system(qw(git init -q --bare), $inboxdir), 0); my $ibx_config = { inboxdir => $inboxdir, name => 'test-v1reindex', @@ -35,6 +34,7 @@ my ($mark1, $mark2, $mark3, $mark4); my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + $im->init_bare; foreach my $i (1..10) { $mime->header_set('Message-Id', "<$i\@example.com>"); ok($im->add($mime), "message $i added"); diff --git a/t/v2-add-remove-add.t b/t/v2-add-remove-add.t index c427de6e..328f5326 100644 --- a/t/v2-add-remove-add.t +++ b/t/v2-add-remove-add.t @@ -29,7 +29,7 @@ my $mime = PublicInbox::MIME->create( my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; ok($im->add($mime), 'message added'); -ok($im->remove($mime), 'message added'); +ok($im->remove($mime), 'message removed'); ok($im->add($mime), 'message added again'); $im->done; my $msgs = $ibx->recent({limit => 1000}); diff --git a/t/v2mda.t b/t/v2mda.t index c2118a89..e9dcdf44 100644 --- a/t/v2mda.t +++ b/t/v2mda.t @@ -72,7 +72,7 @@ is($saved->{mime}->as_string, $mime->as_string, 'injected message'); my $config = "$ENV{PI_DIR}/config"; ok(-f $config, 'config exists'); my $k = 'publicinboxmda.spamcheck'; - is(system('git', 'config', "--file=$config", $k, 'none'), 0, + is(xsys('git', 'config', "--file=$config", $k, 'none'), 0, 'disabled spamcheck for mda'); ok(run_script(['-mda'], undef, $rdr), 'mda did not die'); diff --git a/t/v2mirror.t b/t/v2mirror.t index 2e23e763..ecf96891 100644 --- a/t/v2mirror.t +++ b/t/v2mirror.t @@ -76,7 +76,7 @@ foreach my $i (0..$epoch_max) { "http://$host:$port/v2/$i$sfx", "$tmpdir/m/git/$i.git"); - is(system(@cmd), 0, "cloned $i.git"); + is(xsys(@cmd), 0, "cloned $i.git"); ok(-d "$tmpdir/m/git/$i.git", "mirror $i OK"); } @@ -102,7 +102,7 @@ $ibx->cleanup; my $fetch_each_epoch = sub { foreach my $i (0..$epoch_max) { my $dir = "$tmpdir/m/git/$i.git"; - is(system('git', "--git-dir=$dir", 'fetch', '-q'), 0, + is(xsys('git', "--git-dir=$dir", 'fetch', '-q'), 0, 'fetch successful'); } }; @@ -187,6 +187,37 @@ is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror'); is(scalar($mset->items), 0, '1@example.com no longer visible in mirror'); } +if ('max size') { + $mime->header_set('Message-ID', '<2big@a>'); + my $max = '2k'; + $mime->body_str_set("z\n" x 1024); + ok($v2w->add($mime), "add big message"); + $v2w->done; + $ibx->cleanup; + $fetch_each_epoch->(); + PublicInbox::InboxWritable::cleanup($mibx); + my $cmd = ['-index', "$tmpdir/m", "--max-size=$max" ]; + my $opt = { 2 => \(my $err) }; + ok(run_script($cmd, undef, $opt), 'indexed with --max-size'); + like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message'); + $mset = $mibx->search->reopen->query('m:2big@a', {mset =>1}); + is(scalar($mset->items), 0, 'large message not indexed'); + + { + open my $fh, '>>', $pi_config or die; + print $fh <search->reopen->query('m:2big@a', {mset =>1}); + is(scalar($mset->items), 0, 'large message not re-indexed'); +} + ok($td->kill, 'killed httpd'); $td->join; diff --git a/t/v2reindex.t b/t/v2reindex.t index 7c14117a..b6164ff8 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -18,12 +18,11 @@ my $ibx_config = { -primary_address => 'test@example.com', indexlevel => 'full', }; -my $agpl = eval { +my $agpl = do { open my $fh, '<', 'COPYING' or die "can't open COPYING: $!"; local $/; <$fh>; }; -$agpl or die "AGPL or die :P\n"; my $phrase = q("defending all users' freedom"); my $mime = PublicInbox::MIME->create( header => [ diff --git a/t/v2writable.t b/t/v2writable.t index 66d5663e..8897062a 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -48,7 +48,7 @@ my $git0; if ('ensure git configs are correct') { my @cmd = (qw(git config), "--file=$inboxdir/all.git/config", qw(core.sharedRepository 0644)); - is(system(@cmd), 0, "set sharedRepository in all.git"); + is(xsys(@cmd), 0, "set sharedRepository in all.git"); $git0 = PublicInbox::Git->new("$inboxdir/git/0.git"); chomp(my $v = $git0->qx(qw(config core.sharedRepository))); is($v, '0644', 'child repo inherited core.sharedRepository'); diff --git a/t/watch_maildir.t b/t/watch_maildir.t index 5fb908fb..9ed04cab 100644 --- a/t/watch_maildir.t +++ b/t/watch_maildir.t @@ -6,6 +6,7 @@ use Email::MIME; use Cwd; use PublicInbox::Config; use PublicInbox::TestCommon; +use PublicInbox::Import; require_mods(qw(Filesys::Notify::Simple)); my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/test.git"; @@ -15,7 +16,7 @@ use_ok 'PublicInbox::WatchMaildir'; use_ok 'PublicInbox::Emergency'; my $cfgpfx = "publicinbox.test"; my $addr = 'test-public@example.com'; -is(system(qw(git init -q --bare), $git_dir), 0, 'initialized git dir'); +PublicInbox::Import::init_bare($git_dir); my $msg = < }; + $msg = do { local $/; <$fh> }; PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::WatchMaildir->new($config)->scan('full'); my ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7'); @@ -141,7 +142,7 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); my $v1repo = "$tmpdir/v1"; my $v1pfx = "publicinbox.v1"; my $v1addr = 'v1-public@example.com'; - is(system(qw(git init -q --bare), $v1repo), 0, 'v1 init OK'); + PublicInbox::Import::init_bare($v1repo); my $cfg2 = < +# License: AGPL-3.0+ +use strict; +use Test::More; +use PublicInbox::Config; +use PublicInbox::TestCommon; +require_git(2.6); +require_mods(qw(Search::Xapian DBD::SQLite Filesys::Notify::Simple)); +my ($tmpdir, $for_destroy) = tmpdir(); +my $inboxdir = "$tmpdir/v2"; +my $maildir = "$tmpdir/md"; +use_ok 'PublicInbox::WatchMaildir'; +use_ok 'PublicInbox::Emergency'; +my $cfgpfx = "publicinbox.test"; +my $addr = 'test-public@example.com'; +my @cmd = ('-init', '-V2', 'test', $inboxdir, + 'http://example.com/list', $addr); +local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; +ok(run_script(\@cmd), 'public-inbox init OK'); + +my $msg_to = < +Date: Sat, 18 Apr 2020 00:00:00 +0000 + +content1 +EOF + +my $msg_cc = < +Date: Sat, 18 Apr 2020 00:01:00 +0000 + +content2 +EOF + +my $msg_none = < +Date: Sat, 18 Apr 2020 00:02:00 +0000 + +content3 +EOF + +PublicInbox::Emergency->new($maildir)->prepare(\$msg_to); +PublicInbox::Emergency->new($maildir)->prepare(\$msg_cc); +PublicInbox::Emergency->new($maildir)->prepare(\$msg_none); + +my $cfg = <new(\$cfg); +PublicInbox::WatchMaildir->new($config)->scan('full'); +my $ibx = $config->lookup_name('test'); +ok($ibx, 'found inbox by name'); + +my $num = $ibx->mm->num_for('to@a.com'); +ok(defined $num, 'Matched for address in To:'); +$num = $ibx->mm->num_for('cc@a.com'); +ok(defined $num, 'Matched for address in Cc:'); +$num = $ibx->mm->num_for('none@a.com'); +is($num, undef, 'No match without address in To: or Cc:'); + +done_testing; diff --git a/t/www_listing.t b/t/www_listing.t index 9230329c..31d76356 100644 --- a/t/www_listing.t +++ b/t/www_listing.t @@ -6,6 +6,7 @@ use warnings; use Test::More; use PublicInbox::Spawn qw(which); use PublicInbox::TestCommon; +use PublicInbox::Import; require_mods(qw(URI::Escape Plack::Builder Digest::SHA IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny)); require PublicInbox::WwwListing; @@ -18,13 +19,15 @@ use_ok 'PublicInbox::Git'; my ($tmpdir, $for_destroy) = tmpdir(); my $bare = PublicInbox::Git->new("$tmpdir/bare.git"); -is(system(qw(git init -q --bare), $bare->{git_dir}), 0, 'git init --bare'); +PublicInbox::Import::init_bare($bare->{git_dir}); is(PublicInbox::WwwListing::fingerprint($bare), undef, 'empty repo has no fingerprint'); { my $fi_data = './t/git.fast-import-data'; - local $ENV{GIT_DIR} = $bare->{git_dir}; - is(system("git fast-import --quiet <$fi_data"), 0, 'fast-import'); + open my $fh, '<', $fi_data or die "open $fi_data: $!"; + my $env = { GIT_DIR => $bare->{git_dir} }; + is(xsys([qw(git fast-import --quiet)], $env, { 0 => $fh }), 0, + 'fast-import'); } like(PublicInbox::WwwListing::fingerprint($bare), qr/\A[a-f0-9]{40}\z/, @@ -75,17 +78,17 @@ SKIP: { ok($sock, 'sock created'); my ($host, $port) = ($sock->sockhost, $sock->sockport); my @clone = qw(git clone -q -s --bare); - is(system(@clone, $bare->{git_dir}, $alt), 0, 'clone shared repo'); + is(xsys(@clone, $bare->{git_dir}, $alt), 0, 'clone shared repo'); - system(qw(git init --bare -q), "$v2/all.git") == 0 or die; + PublicInbox::Import::init_bare("$v2/all.git"); for my $i (0..2) { - is(system(@clone, $alt, "$v2/git/$i.git"), 0, "clone epoch $i"); + is(xsys(@clone, $alt, "$v2/git/$i.git"), 0, "clone epoch $i") } ok(open(my $fh, '>', "$v2/inbox.lock"), 'mock a v2 inbox'); open $fh, '>', "$alt/description" or die; print $fh "we're all clones\n" or die; close $fh or die; - is(system('git', "--git-dir=$alt", qw(config gitweb.owner lorelei)), 0, + is(xsys('git', "--git-dir=$alt", qw(config gitweb.owner lorelei)), 0, 'set gitweb user'); ok(unlink("$bare->{git_dir}/description"), 'removed bare/description'); open $fh, '>', $cfgfile or die; @@ -113,7 +116,8 @@ SKIP: { tiny_test($json, $host, $port); - skip 'skipping grok-pull integration test', 2 if !which('grok-pull'); + my $grok_pull = which('grok-pull') or + skip('skipping grok-pull integration test', 2); ok(mkdir("$tmpdir/mirror"), 'prepare grok mirror dest'); open $fh, '>', "$tmpdir/repos.conf" or die; @@ -128,7 +132,7 @@ mymanifest = $tmpdir/local-manifest.js.gz close $fh or die; - system(qw(grok-pull -c), "$tmpdir/repos.conf"); + xsys($grok_pull, '-c', "$tmpdir/repos.conf"); is($? >> 8, 127, 'grok-pull exit code as expected'); for (qw(alt bare v2/git/0.git v2/git/1.git v2/git/2.git)) { ok(-d "$tmpdir/mirror/$_", "grok-pull created $_"); @@ -148,7 +152,7 @@ mymanifest = $tmpdir/per-inbox-manifest.js.gz close $fh or die; ok(mkdir("$tmpdir/per-inbox"), 'prepare single-v2-inbox mirror'); - system(qw(grok-pull -c), "$tmpdir/per-inbox.conf"); + xsys($grok_pull, '-c', "$tmpdir/per-inbox.conf"); is($? >> 8, 127, 'grok-pull exit code as expected'); for (qw(v2/git/0.git v2/git/1.git v2/git/2.git)) { ok(-d "$tmpdir/per-inbox/$_", "grok-pull created $_"); diff --git a/xt/git-http-backend.t b/xt/git-http-backend.t index f2ae44fe..2f02725a 100644 --- a/xt/git-http-backend.t +++ b/xt/git-http-backend.t @@ -83,13 +83,13 @@ SKIP: { # make sure Last-Modified + If-Modified-Since works with curl skip 'curl(1) not found', $nr unless $curl; my $url = "http://$host:$port/description"; my $dst = "$tmpdir/desc"; - is(system($curl, qw(-RsSf), '-o', $dst, $url), 0, 'curl -R'); + is(xsys($curl, qw(-RsSf), '-o', $dst, $url), 0, 'curl -R'); is((stat($dst))[9], $mtime, 'curl used remote mtime'); - is(system($curl, qw(-sSf), '-z', $dst, '-o', "$dst.2", $url), 0, + is(xsys($curl, qw(-sSf), '-z', $dst, '-o', "$dst.2", $url), 0, 'curl -z noop'); ok(!-e "$dst.2", 'no modification, nothing retrieved'); utime(0, 0, $dst) or die "utime failed: $!"; - is(system($curl, qw(-sSfR), '-z', $dst, '-o', "$dst.2", $url), 0, + is(xsys($curl, qw(-sSfR), '-z', $dst, '-o', "$dst.2", $url), 0, 'curl -z updates'); ok(-e "$dst.2", 'faked modification, got new file retrieved'); }