+Date: Fri, 17 Apr 2020 08:48:59 +0000
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [ANNOUNCE] public-inbox 1.4.0
-Message-Id: <20200417084800.public-inbox-1.4.0-rele@sed>
+Message-ID: <20200417084800.public-inbox-1.4.0-rele@sed>
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
This release focuses on reproducibility improvements and
bugfixes for corner-cases. Busy instances of PublicInbox::WWW
--- /dev/null
+From: Eric Wong <e@yhbt.net>
+To: meta@public-inbox.org
+Subject: [WIP] public-inbox 1.5.0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+
+TBD
+
+Please report bugs via plain-text mail to: meta@public-inbox.org
+
+See archives at https://public-inbox.org/meta/ for all history.
+See https://public-inbox.org/TODO for what the future holds.
require PublicInbox::WwwAtomStream;
# WwwAtomStream stats this dir for mtime
my $astream = PublicInbox::WwwAtomStream->new($ctx);
- delete $ctx->{emit_header};
+ delete $astream->{emit_header};
my $ibx = $ctx->{-inbox};
my $title = PublicInbox::WwwAtomStream::title_tag($ibx->description);
my $updated = PublicInbox::WwwAtomStream::feed_updated(gmtime($mtime));
watchheader = List-Id:<test.example.com>
If specified, L<public-inbox-watch(1)> will only process mail matching
-the given header. Multiple values are not currently supported.
+the given header. If specified multiple times, mail will be processed
+if it matches any of the values.
Default: none; only for L<public-inbox-watch(1)> users
See L<public-inbox-edit(1)>
+=item publicinbox.indexMaxSize
+
+See L<public-inbox-index(1)>
+
=item publicinbox.wwwlisting
Enable a HTML listing style when the root path of the URL '/' is accessed.
L<public-inbox-edit(1)> or L<public-inbox-purge(1)> to ensure data
is expunged from mirrors.
+=item --max-size SIZE
+
+Sets or overrides L</publicinbox.indexMaxSize> on a
+per-invocation basis. See L</publicinbox.indexMaxSize>
+below.
+
=back
=head1 FILES
v2 inboxes are described in L<public-inbox-v2-format>.
+=head1 CONFIGURATION
+
+=over 8
+
+=item publicinbox.indexMaxSize
+
+Prevents indexing of messages larger than the specified size
+value. A single suffix modifier of C<k>, C<m> or C<g> is
+supported, thus the value of C<1m> to prevents indexing of
+messages larger than one megabyte.
+
+This is useful for avoiding memory exhaustion in mirrors.
+
+Default: none
+
+=back
+
=head1 ENVIRONMENT
=over 8
There may be hundreds or thousands of these objects in memory
at-a-time, so fields are pruned if unneeded.
-* PublicInbox::SearchThread::Msg - container for message threading
+* PublicInbox::SearchThread::Msg - subclass of Smsg
Common abbreviation: $cont or $node
Used by: PublicInbox::WWW
- The container we use for a non-recursive[1] variant of
+ The structure we use for a non-recursive[1] variant of
JWZ's algorithm: <https://www.jwz.org/doc/threading.html>.
- This holds a $smsg and is only used for message threading.
- This wrapper class may go away in the future and handled
- directly by PublicInbox::Smsg to save memory.
+ Nowadays, this is a re-blessed $smsg with additional fields.
As with $smsg objects, there may be hundreds or thousands
of these objects in memory at-a-time.
--- /dev/null
+semi-automatic memory management in public-inbox
+------------------------------------------------
+
+The majority of public-inbox is implemented in Perl 5, a
+language and interpreter not particularly known for being
+memory-efficient.
+
+We strive to keep processes small to improve locality, allow
+the kernel to cache more files, and to be a good neighbor to
+other processes running on the machine. Taking advantage of
+automatic reference counting (ARC) in Perl allows us
+deterministically release memory back to the heap.
+
+We start with a simple data model with few circular
+references. This both eases human understanding and reduces
+the likelyhood of bugs.
+
+Knowing the relative sizes and quantities of our data
+structures, we limit the scope of allocations as much as
+possible and keep large allocations shortest-lived. This
+minimizes both the cognitive overhead on humans in addition
+to reducing memory pressure on the machine.
+
+Short-lived non-immortal closures (aka "anonymous subs") are
+avoided in long-running daemons unless required for
+compatibility with PSGI. Closures are memory-intensive and
+may make allocation lifetimes less obvious to humans. They
+are also the source of memory leaks in older versions of
+Perl, including 5.16.3 found in enterprise distros.
+
+We also use Perl's `delete' and `undef' built-ins to drop
+reference counts sooner than scope allows. These functions
+are required to break the few reference cycles we have that
+would otherwise lead to leaks.
+
+Of note, `undef' may be used in two ways:
+
+1. to free(3) the underlying buffer:
+
+ undef $scalar;
+
+2. to reset a buffer but reduce realloc(3) on subsequent growth:
+
+ $scalar = ""; # useful when repeated appending
+ $scalar = undef; # usually not needed
+
+In the future, our internal data model will be further
+flattened and simplified to reduce the overhead imposed by
+small objects. Large allocations may also be avoided by
+optionally using Inline::C.
$xurls{$_} = ".$n.1.html"
}
-for (qw[flock(2) setrlimit(2) vfork(2)]) {
+for (qw[make(1) flock(2) setrlimit(2) vfork(2) tmpfs(5)]) {
my ($n, $s) = (/([\w\-]+)\((\d)\)/);
- $xurls{$_} = "http://www.man7.org/linux/man-pages/man2/$n.$s.html";
+ $xurls{$_} = "http://www.man7.org/linux/man-pages/man$s/$n.$s.html";
}
for (qw[git(1)
'https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git' .
'/tree/man/grok-pull.1.rst';
$xurls{'git-filter-repo(1)'} = 'https://github.com/newren/git-filter-repo'.
- './blob/master/Documentation/git-filter-repo.txt';
+ '/blob/master/Documentation/git-filter-repo.txt';
$xurls{'ssoma(1)'} = 'https://ssoma.public-inbox.org/ssoma.txt';
$xurls{'cgitrc(5)'} = 'https://git.zx2c4.com/cgit/tree/cgitrc.5.txt';
+$xurls{'prove(1)'} = 'https://perldoc.perl.org/prove.html';
my $str = do { local $/; <STDIN> };
my ($title) = ($str =~ /\A([^\n]+)/);
See Documentation/technical/ in the source tree for more details
on specific topics, in particular data_structures.txt
+Faster tests
+------------
+
+The `make test' target provided by MakeMaker does not run in
+parallel. Our `make check' target supports parallel runs, and
+it also creates a `.prove' file to optimize `make check-run'.
+
+The prove(1) command (distributed with Perl) may also be used
+for finer-grained testing: prove -bvw t/foo.t
+
+If using a make(1) (e.g. GNU make) with `include' support, the
+`config.mak' Makefile snippet can be used to set environment
+variables such as PERL_INLINE_DIRECTORY and TMPDIR.
+
+With PERL_INLINE_DIRECTORY set to enable Inline::C support and
+TMPDIR pointed to a tmpfs(5) mount, `make check-run' takes 6-10s
+(load-dependent) on a busy workstation built in 2010.
+
Perl notes
----------
perl Makefile.PL
make
- make test
+ make test # see HACKING for faster tests for hackers
make install # root permissions may be needed
When installing Search::Xapian, make sure the underlying Xapian
Documentation/RelNotes/v1.2.0.eml
Documentation/RelNotes/v1.3.0.eml
Documentation/RelNotes/v1.4.0.eml
+Documentation/RelNotes/v1.5.0.eml
Documentation/dc-dlvr-spam-flow.txt
Documentation/design_notes.txt
Documentation/design_www.txt
Documentation/standards.perl
Documentation/technical/data_structures.txt
Documentation/technical/ds.txt
+Documentation/technical/memory.txt
Documentation/technical/whyperl.txt
Documentation/txt2pre
HACKING
t/indexlevels-mirror-v1.t
t/indexlevels-mirror.t
t/init.t
-t/iso-2202-jp.mbox
+t/iso-2202-jp.eml
t/linkify.t
t/main-bin/spamc
t/mda.t
t/spawn.t
t/thread-cycle.t
t/time.t
-t/utf8.mbox
+t/utf8.eml
t/v1-add-remove-add.t
t/v1reindex.t
t/v2-add-remove-add.t
t/watch_filter_rubylang.t
t/watch_maildir.t
t/watch_maildir_v2.t
+t/watch_multiple_headers.t
t/www_altid.t
t/www_listing.t
t/www_static.t
my ($argv, $opt, $cfg) = @_;
$opt ||= {};
- $cfg //= eval { PublicInbox::Config->new };
+ $cfg //= PublicInbox::Config->new;
if ($opt->{all}) {
my $cfgfile = PublicInbox::Config::default_file();
$cfg or die "--all specified, but $cfgfile not readable\n";
}
}
+# same unit factors as git:
+sub parse_unsigned ($) {
+ my ($max_size) = @_;
+
+ $$max_size =~ /\A([0-9]+)([kmg])?\z/i or return;
+ my ($n, $unit_factor) = ($1, $2 // '');
+ my %u = ( k => 1024, m => 1024**2, g => 1024**3 );
+ $$max_size = $n * ($u{lc($unit_factor)} // 1);
+ 1;
+}
+
1;
my $ibx = {};
foreach my $k (qw(inboxdir filter newsgroup
- watch watchheader httpbackendmax
+ watch httpbackendmax
replyto feedmax nntpserver indexlevel)) {
my $v = $self->{"$pfx.$k"};
$ibx->{$k} = $v if defined $v;
# TODO: more arrays, we should support multi-value for
# more things to encourage decentralization
foreach my $k (qw(address altid nntpmirror coderepo hide listid url
- infourl)) {
+ infourl watchheader)) {
if (defined(my $v = $self->{"$pfx.$k"})) {
$ibx->{$k} = _array($v);
}
$? == 0 or die join(' ', @$cmd) . " failed: $?\n";
}
+my @INIT_FILES = ('HEAD' => "ref: refs/heads/master\n",
+ 'description' => <<EOD,
+Unnamed repository; edit this file 'description' to name the repository.
+EOD
+ 'config' => <<EOC);
+[core]
+ repositoryFormatVersion = 0
+ filemode = true
+ bare = true
+[repack]
+ writeBitmaps = true
+EOC
+
sub init_bare {
- my ($dir) = @_;
- my @cmd = (qw(git init --bare -q), $dir);
- run_die(\@cmd);
- # set a reasonable default:
- @cmd = (qw/git config/, "--file=$dir/config",
- 'repack.writeBitmaps', 'true');
- run_die(\@cmd);
+ my ($dir) = @_; # or self
+ $dir = $dir->{git}->{git_dir} if ref($dir);
+ require File::Path;
+ File::Path::mkpath([ map { "$dir/$_" } qw(objects/info refs/heads) ]);
+ for (my $i = 0; $i < @INIT_FILES; $i++) {
+ my $f = $dir.'/'.$INIT_FILES[$i++];
+ next if -f $f;
+ open my $fh, '>', $f or die "open $f: $!";
+ print $fh $INIT_FILES[$i] or die "print $f: $!";
+ close $fh or die "close $f: $!";
+ }
}
sub done {
sub _set_limiter ($$$) {
my ($self, $pi_config, $pfx) = @_;
my $lkey = "-${pfx}_limiter";
- $self->{$lkey} ||= eval {
+ $self->{$lkey} ||= do {
# full key is: publicinbox.$NAME.httpbackendmax
my $mkey = $pfx.'max';
my $val = $self->{$mkey} or return;
sub git_epoch {
my ($self, $epoch) = @_;
$self->version == 2 or return;
- $self->{"$epoch.git"} ||= eval {
+ $self->{"$epoch.git"} ||= do {
my $git_dir = "$self->{inboxdir}/git/$epoch.git";
my $g = PublicInbox::Git->new($git_dir);
$g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
sub git {
my ($self) = @_;
- $self->{git} ||= eval {
+ $self->{git} ||= do {
my $git_dir = $self->{inboxdir};
$git_dir .= '/all.git' if $self->version == 2;
my $g = PublicInbox::Git->new($git_dir);
sub description {
my ($self) = @_;
- $self->{description} //= do {
+ ($self->{description} //= do {
my $desc = try_cat("$self->{inboxdir}/description");
local $/ = "\n";
chomp $desc;
$desc =~ s/\s+/ /smg;
- $desc eq '' ? '($INBOX_DIR/description missing)' : $desc;
- };
+ $desc eq '' ? undef : $desc;
+ }) // '($INBOX_DIR/description missing)';
}
sub cloneurl {
my ($self) = @_;
- $self->{cloneurl} //=
- [ split(/\s+/s, try_cat("$self->{inboxdir}/cloneurl")) ];
+ ($self->{cloneurl} //= do {
+ my $s = try_cat("$self->{inboxdir}/cloneurl");
+ my @urls = split(/\s+/s, $s);
+ scalar(@urls) ? \@urls : undef
+ }) // [];
}
sub base_url {
# for v1 users w/o SQLite only
sub msg_by_path ($$;$) {
my ($self, $path, $ref) = @_;
- my $str = git($self)->cat_file('HEAD:'.$path, $ref);
- $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
- $str;
+ git($self)->cat_file('HEAD:'.$path, $ref);
}
sub msg_by_smsg ($$;$) {
return unless defined $smsg;
defined(my $blob = $smsg->{blob}) or return;
- my $str = git($self)->cat_file($blob, $ref);
- $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
- $str;
+ git($self)->cat_file($blob, $ref);
}
sub smsg_mime {
(is_maildir_basename($p[-1]) && -f $path) ? 1 : 0;
}
-sub maildir_path_load ($) {
+sub mime_from_path ($) {
my ($path) = @_;
if (open my $fh, '<', $path) {
local $/;
opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n";
while (defined(my $fn = readdir($dh))) {
next unless is_maildir_basename($fn);
- my $mime = maildir_path_load("$dir/$fn") or next;
+ my $mime = mime_from_path("$dir/$fn") or next;
if (my $filter = $self->filter($im)) {
my $ret = $filter->scrub($mime) or return;
'List-Post', "<mailto:$ibx->{-primary_address}>",
);
my $crlf = $header_obj->crlf;
- my $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970\n" .
- $header_obj->as_string;
+ my $buf = $header_obj->as_string;
+ # fixup old bug from import (pre-a0c07cba0e5d8b6a)
+ $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+ $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf;
+
for (my $i = 0; $i < @append; $i += 2) {
my $k = $append[$i];
my $v = $append[$i + 1];
sub msg_hdr_write ($$$) {
my ($self, $hdr, $body_follows) = @_;
$hdr = $hdr->as_string;
+ # fixup old bug from import (pre-a0c07cba0e5d8b6a)
+ $hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
utf8::encode($hdr);
$hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
${$self->{hdr_buf}}, $filter);
$wcb->($r);
}
-
- # Workaround a leak under Perl 5.16.3 when combined with
- # Plack::Middleware::Deflater:
- $wcb = undef;
}
sub psgi_return_start { # may run later, much later...
$self->{lock_path} = "$inboxdir/ssoma.lock";
my $dir = $self->xdir;
$self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
+ $self->{index_max_size} = $ibx->{index_max_size};
} elsif ($version == 2) {
defined $shard or die "shard is required for v2\n";
# shard is a number
sub do_cat_mail {
my ($git, $blob, $sizeref) = @_;
- my $mime = eval {
- my $str = $git->cat_file($blob, $sizeref);
- # fixup bugs from import:
- $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
- PublicInbox::MIME->new($str);
- };
- $@ ? undef : $mime;
+ my $str = $git->cat_file($blob, $sizeref) or
+ die "BUG: $blob not found in $git->{git_dir}";
+ PublicInbox::MIME->new($str);
}
# called by public-inbox-index
}
}
+sub too_big ($$$) {
+ my ($self, $git, $oid) = @_;
+ my $max_size = $self->{index_max_size} or return;
+ my (undef, undef, $size) = $git->check($oid);
+ die "E: bad $oid in $git->{git_dir}\n" if !defined($size);
+ return if $size <= $max_size;
+ warn "W: skipping $oid ($size > $max_size)\n";
+ 1;
+}
+
# only for v1
sub read_log {
my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_;
}
next;
}
- my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+ next if too_big($self, $git, $blob);
+ my $mime = do_cat_mail($git, $blob, \$bytes);
my $smsg = bless {}, 'PublicInbox::Smsg';
batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr);
$smsg->{blob} = $blob;
$add_cb->($self, $mime, $smsg);
} elsif ($line =~ /$delmsg/o) {
my $blob = $1;
- $D{$blob} = 1;
+ $D{$blob} = 1 unless too_big($self, $git, $blob);
} elsif ($line =~ /^commit ($h40)/o) {
$latest = $1;
$newest ||= $latest;
close($log) or die "git log failed: \$?=$?";
# get the leftovers
foreach my $blob (keys %D) {
- my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+ my $mime = do_cat_mail($git, $blob, \$bytes);
$del_cb->($self, $mime);
}
$batch_cb->($nr, $latest, $newest);
sub thread {
my ($msgs, $ordersub, $ctx) = @_;
- my $id_table = {};
+
+ # A. put all current $msgs (non-ghosts) into %id_table
+ my %id_table = map {;
+ # this delete saves around 4K across 1K messages
+ # TODO: move this to a more appropriate place, breaks tests
+ # if we do it during psgi_cull
+ delete $_->{num};
+
+ $_->{mid} => PublicInbox::SearchThread::Msg::cast($_);
+ } @$msgs;
# Sadly, we sort here anyways since the fill-in-the-blanks References:
# can be shakier if somebody used In-Reply-To with multiple, disparate
# always determine ordering when somebody uses multiple In-Reply-To.
# We'll trust the client Date: header here instead of the Received:
# time since this is for display (and not retrieval)
- _add_message($id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs;
+ _set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs;
my $ibx = $ctx->{-inbox};
my $rootset = [ grep {
!delete($_->{parent}) && $_->visible($ibx)
- } values %$id_table ];
- $id_table = undef;
+ } values %id_table ];
$rootset = $ordersub->($rootset);
$_->order_children($ordersub, $ctx) for @$rootset;
$rootset;
}
-sub _get_cont_for_id ($$) {
- my ($id_table, $mid) = @_;
- $id_table->{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
-}
-
-sub _add_message ($$) {
- my ($id_table, $smsg) = @_;
-
- # A. if id_table...
- my $this = _get_cont_for_id($id_table, $smsg->{mid});
- $this->{smsg} = $smsg;
-
- # saves around 4K across 1K messages
- # TODO: move this to a more appropriate place, breaks tests
- # if we do it during psgi_cull
- delete $smsg->{num};
+sub _set_parent ($$) {
+ my ($id_table, $this) = @_;
# B. For each element in the message's References field:
- defined(my $refs = $smsg->{references}) or return;
+ defined(my $refs = $this->{references}) or return;
# This loop exists to help fill in gaps left from missing
# messages. It is not needed in a perfect world where
my $prev;
foreach my $ref ($refs =~ m/$MID_EXTRACT/go) {
# Find a Container object for the given Message-ID
- my $cont = _get_cont_for_id($id_table, $ref);
+ my $cont = $id_table->{$ref} //=
+ PublicInbox::SearchThread::Msg::ghost($ref);
# Link the References field's Containers together in
# the order implied by the References header
}
package PublicInbox::SearchThread::Msg;
+use base qw(PublicInbox::Smsg);
use strict;
use warnings;
use Carp qw(croak);
-sub new {
+# declare a ghost smsg (determined by absence of {blob})
+sub ghost {
bless {
- id => $_[1],
+ mid => $_[0],
children => {}, # becomes an array when sorted by ->order(...)
- }, $_[0];
+ }, __PACKAGE__;
+}
+
+# give a existing smsg the methods of this class
+sub cast {
+ my ($smsg) = @_;
+ $smsg->{children} = {};
+ bless $smsg, __PACKAGE__;
}
sub topmost {
my ($self) = @_;
my @q = ($self);
while (my $cont = shift @q) {
- return $cont if $cont->{smsg};
+ return $cont if $cont->{blob};
push @q, values %{$cont->{children}};
}
undef;
croak "Cowardly refusing to become my own parent: $self"
if $self == $child;
- my $cid = $child->{id};
+ my $cid = $child->{mid};
# reparenting:
if (defined(my $parent = $child->{parent})) {
# being folded/mangled by a MUA, and not a missing message.
sub visible ($$) {
my ($self, $ibx) = @_;
- ($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) ||
- (scalar values %{$self->{children}});
+ return 1 if $self->{blob};
+ if (my $by_mid = $ibx->smsg_by_mid($self->{mid})) {
+ %$self = (%$self, %$by_mid);
+ 1;
+ } else {
+ (scalar values %{$self->{children}});
+ }
}
sub order_children {
sub sort_relevance {
[ sort {
- (eval { $b->topmost->{smsg}->{pct} } // 0) <=>
- (eval { $a->topmost->{smsg}->{pct} } // 0)
+ (eval { $b->topmost->{pct} } // 0) <=>
+ (eval { $a->topmost->{pct} } // 0)
} @{$_[0]} ]
}
use POSIX qw(dup2);
use IO::Socket::INET;
our @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
- run_script start_script key2sub);
+ run_script start_script key2sub xsys xqx);
sub tmpdir (;$) {
my ($base) = @_;
sub key2script ($) {
my ($key) = @_;
- return $key if (index($key, '/') >= 0);
+ return $key if ($key eq 'git' || index($key, '/') >= 0);
# n.b. we may have scripts which don't start with "public-inbox" in
# the future:
$key =~ s/\A([-\.])/public-inbox$1/;
sub wait_for_tail () { sleep(2) }
+# like system() built-in, but uses spawn() for env/rdr + vfork
+sub xsys {
+ my ($cmd, $env, $rdr) = @_;
+ if (ref($cmd)) {
+ $rdr ||= {};
+ } else {
+ $cmd = [ @_ ];
+ $env = undef;
+ $rdr = {};
+ }
+ run_script($cmd, $env, { %$rdr, run_mode => 0 });
+ $? >> 8
+}
+
+# like `backtick` or qx{} op, but uses spawn() for env/rdr + vfork
+sub xqx {
+ my ($cmd, $env, $rdr) = @_;
+ $rdr //= {};
+ run_script($cmd, $env, { %$rdr, run_mode => 0, 1 => \(my $out) });
+ wantarray ? split(/^/m, $out) : $out;
+}
+
sub start_script {
my ($cmd, $env, $opt) = @_;
my ($key, @argv) = @$cmd;
last_commit => [], # git repo -> commit
};
$self->{shards} = count_shards($self) || nproc_shards($creat);
+ $self->{index_max_size} = $v2ibx->{index_max_size};
bless $self, $class;
}
sub git_init {
my ($self, $epoch) = @_;
my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git";
- my @cmd = (qw(git init --bare -q), $git_dir);
- PublicInbox::Import::run_die(\@cmd);
- @cmd = (qw/git config/, "--file=$git_dir/config",
+ PublicInbox::Import::init_bare($git_dir);
+ my @cmd = (qw/git config/, "--file=$git_dir/config",
'include.path', '../../all.git/config');
PublicInbox::Import::run_die(\@cmd);
fill_alternates($self, $epoch);
sub mark_deleted ($$$$) {
my ($self, $sync, $git, $oid) = @_;
+ return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
my $msgref = $git->cat_file($oid);
my $mime = PublicInbox::MIME->new($$msgref);
my $mids = mids($mime->header_obj);
}
}
-# reuse Msgmap to store num => oid mapping (rather than num => mid)
-sub multi_mid_q_new () {
- my ($fh, $fn) = tempfile('multi_mid-XXXXXXX', EXLOCK => 0, TMPDIR => 1);
- my $multi_mid = PublicInbox::Msgmap->new_file($fn, 1);
- $multi_mid->{dbh}->do('PRAGMA synchronous = OFF');
- # for Msgmap->DESTROY:
- $multi_mid->{tmp_name} = $fn;
- $multi_mid->{pid} = $$;
- close $fh or die "failed to close $fn: $!";
- $multi_mid
-}
-
sub multi_mid_q_push ($$$) {
my ($self, $sync, $oid) = @_;
my $multi_mid = $sync->{multi_mid} //= PublicInbox::MultiMidQueue->new;
sub reindex_oid ($$$$) {
my ($self, $sync, $git, $oid) = @_;
+ return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
my ($num, $mid0, $len);
my $msgref = $git->cat_file($oid, \$len);
return if $len == 0; # purged
my $nr_c = scalar @$children;
my $nr_s = 0;
my $siblings;
- if (my $smsg = $node->{smsg}) {
- # delete saves about 200KB on a 1K message thread
- if (my $refs = delete $smsg->{references}) {
- ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o);
- }
+ # delete saves about 200KB on a 1K message thread
+ if (my $refs = delete $node->{references}) {
+ ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o);
}
my $irt_map = $mapping->{$$irt} if defined $$irt;
if (defined $irt_map) {
$rv .= $pad . $irt_map->[0];
if ($idx > 0) {
my $prev = $siblings->[$idx - 1];
- my $pmid = $prev->{id};
+ my $pmid = $prev->{mid};
if ($idx > 2) {
my $s = ($idx - 1). ' preceding siblings ...';
$rv .= pad_link($pmid, $level, $s);
} elsif ($idx == 2) {
- my $ppmid = $siblings->[0]->{id};
+ my $ppmid = $siblings->[0]->{mid};
$rv .= $pad . $mapping->{$ppmid}->[0];
}
$rv .= $pad . $mapping->{$pmid}->[0];
$attr =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
$rv .= "<b>@ $attr";
if ($nr_c) {
- my $cmid = $children->[0]->{id};
+ my $cmid = $children->[0]->{mid};
$rv .= $pad . $mapping->{$cmid}->[0];
if ($nr_c > 2) {
my $s = ($nr_c - 1). ' more replies';
$rv .= pad_link($cmid, $level + 1, $s);
} elsif (my $cn = $children->[1]) {
- $rv .= $pad . $mapping->{$cn->{id}}->[0];
+ $rv .= $pad . $mapping->{$cn->{mid}}->[0];
}
}
my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
if ($next) {
- my $nmid = $next->{id};
+ my $nmid = $next->{mid};
$rv .= $pad . $mapping->{$nmid}->[0];
my $nnext = $nr_s - $idx;
if ($nnext > 2) {
my $s = ($nnext - 1).' subsequent siblings';
$rv .= pad_link($nmid, $level, $s);
} elsif (my $nn = $siblings->[$idx + 2]) {
- $rv .= $pad . $mapping->{$nn->{id}}->[0];
+ $rv .= $pad . $mapping->{$nn->{mid}}->[0];
}
}
$rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
sub pre_thread { # walk_thread callback
my ($ctx, $level, $node, $idx) = @_;
- $ctx->{mapping}->{$node->{id}} = [ '', $node, $idx, $level ];
+ $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ];
skel_dump($ctx, $level, $node);
}
my $node = shift @$q or next;
my $cl = $level + 1;
unshift @$q, map { ($cl, $_) } @{$node->{children}};
- if (my $smsg = $ctx->{-inbox}->smsg_mime($node->{smsg})) {
- return thread_index_entry($ctx, $level, $smsg);
+ if ($ctx->{-inbox}->smsg_mime($node)) {
+ return thread_index_entry($ctx, $level, $node);
} else {
return ghost_index_entry($ctx, $level, $node);
}
my $node = shift @q or next;
my $cl = $level + 1;
unshift @q, map { ($cl, $_) } @{$node->{children}};
- $smsg = $ibx->smsg_mime($node->{smsg}) and last;
+ $smsg = $ibx->smsg_mime($node) and last;
}
return missing_thread($ctx) unless $smsg;
sub find_mid_root {
my ($ctx, $level, $node, $idx) = @_;
++$ctx->{root_idx} if $level == 0;
- if ($node->{id} eq $ctx->{mid}) {
+ if ($node->{mid} eq $ctx->{mid}) {
$ctx->{found_mid_at} = $ctx->{root_idx};
return 0;
}
}
sub skel_dump { # walk_thread callback
- my ($ctx, $level, $node) = @_;
- my $smsg = $node->{smsg} or return _skel_ghost($ctx, $level, $node);
+ my ($ctx, $level, $smsg) = @_;
+ $smsg->{blob} or return _skel_ghost($ctx, $level, $smsg);
my $skel = $ctx->{skel};
my $cur = $ctx->{cur};
sub _skel_ghost {
my ($ctx, $level, $node) = @_;
- my $mid = $node->{id};
+ my $mid = $node->{mid};
my $d = ' [not found] ';
$d .= ' ' if exists $ctx->{searchview};
$d .= indent_for($level) . th_pfx($level);
sub sort_ds {
[ sort {
- (eval { $a->topmost->{smsg}->{ds} } || 0) <=>
- (eval { $b->topmost->{smsg}->{ds} } || 0)
+ (eval { $a->topmost->{ds} } || 0) <=>
+ (eval { $b->topmost->{ds} } || 0)
} @{$_[0]} ];
}
# accumulate recent topics if search is supported
# returns 200 if done, 404 if not
sub acc_topic { # walk_thread callback
- my ($ctx, $level, $node) = @_;
- my $mid = $node->{id};
- my $smsg = $node->{smsg} // $ctx->{-inbox}->smsg_by_mid($mid);
- if ($smsg) {
+ my ($ctx, $level, $smsg) = @_;
+ my $mid = $smsg->{mid};
+ my $has_blob = $smsg->{blob} // do {
+ if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) {
+ %$smsg = (%$smsg, %$by_mid);
+ 1;
+ }
+ };
+ if ($has_blob) {
my $subj = subject_normalized($smsg->{subject});
$subj = '(no subject)' if $subj eq '';
my $ds = $smsg->{ds};
sub ghost_index_entry {
my ($ctx, $level, $node) = @_;
my ($beg, $end) = thread_adj_level($ctx, $level);
- $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{id})
+ $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{mid})
. '</pre>' . $end;
}
use File::Temp 0.19 (); # 0.19 for ->newdir
use PublicInbox::Filter::Base qw(REJECT);
use PublicInbox::Spamcheck;
-*maildir_path_load = *PublicInbox::InboxWritable::maildir_path_load;
+*mime_from_path = \&PublicInbox::InboxWritable::mime_from_path;
sub new {
my ($class, $config) = @_;
my $watch = $ibx->{watch} or return;
if (is_maildir($watch)) {
my $watch_hdrs = [];
- if (my $wh = $ibx->{watchheader}) {
- my ($k, $v) = split(/:/, $wh, 2);
- push @$watch_hdrs, [ $k, qr/\Q$v\E/ ];
+ if (my $whs = $ibx->{watchheader}) {
+ for (@$whs) {
+ my ($k, $v) = split(/:/, $_, 2);
+ push @$watch_hdrs, [ $k, qr/\Q$v\E/ ];
+ }
}
if (my $list_ids = $ibx->{listid}) {
for (@$list_ids) {
my ($self, $path) = @_;
# path must be marked as (S)een
$path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return;
- my $mime = maildir_path_load($path) or return;
+ my $mime = mime_from_path($path) or return;
$self->{config}->each_inbox(sub {
my ($ibx) = @_;
eval {
$warn_cb->(@_);
};
foreach my $ibx (@$inboxes) {
- my $mime = maildir_path_load($path) or next;
+ my $mime = mime_from_path($path) or next;
my $im = _importer_for($self, $ibx);
# any header match means it's eligible for the inbox:
sub new {
my ($class, $ctx, $cb) = @_;
- $ctx->{emit_header} = 1;
$ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env});
- bless { cb => $cb || \&close, ctx => $ctx }, $class;
+ bless { cb => $cb || \&close, ctx => $ctx, emit_header => 1 }, $class;
}
sub response {
$email = ascii_html($email);
my $s = '';
- if (delete $ctx->{emit_header}) {
+ if (delete $self->{emit_header}) {
$s .= atom_header($ctx, $title);
}
$s .= "<entry><author><name>$name</name><email>$email</email>" .
url = https://example.com/$name/
url = http://example.onion/$name/
EOS
- for my $k (qw(address listid infourl)) {
+ for my $k (qw(address listid infourl watchheader)) {
defined(my $v = $ibx->{$k}) or next;
$$txt .= "\t$k = $_\n" for @$v;
}
}
}
- for my $k (qw(filter newsgroup obfuscate replyto watchheader)) {
+ for my $k (qw(filter newsgroup obfuscate replyto)) {
defined(my $v = $ibx->{$k}) or next;
$$txt .= "\t$k = $v\n";
}
my $new_dir = shift(@ARGV) or die $usage;
die "$new_dir exists\n" if -d $new_dir;
die "$old_dir not a directory\n" unless -d $old_dir;
-my $config = eval { PublicInbox::Config->new };
+my $config = PublicInbox::Config->new;
$old_dir = abs_path($old_dir);
my $old;
if ($config) {
GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or
die "bad command-line args\n$usage\n";
-my $cfg = eval { PublicInbox::Config->new };
+my $cfg = PublicInbox::Config->new;
my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f"
unless (defined $editor) {
my $k = 'publicinbox.mailEditor';
warn "Will edit all of them\n";
}
} else {
- open my $fh, '<', $file or die "open($file) failed: $!";
- my $orig = do { local $/; <$fh> };
- my $mime = PublicInbox::MIME->new(\$orig);
+ my $mime = PublicInbox::InboxWritable::mime_from_path($file) or
+ die "open($file) failed: $!";
my $mids = mids($mime->header_obj);
find_mid($found, $_, \@ibxs) for (@$mids); # populates $found
my $cid = content_id($mime);
use PublicInbox::Xapcmd;
my $compact_opt;
-my $opt = { quiet => -1, compact => 0 };
-GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune indexlevel|L=s))
+my $opt = { quiet => -1, compact => 0, maxsize => undef };
+GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune
+ indexlevel|L=s maxsize|max-size=s))
or die "bad command-line args\n$usage";
die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0;
$compact_opt = { -coarse_lock => 1, compact => 1 };
}
-my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV);
+my $cfg = PublicInbox::Config->new;
+my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, undef, $cfg);
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR "Usage: $usage\n"; exit 1 }
my $mods = {};
+my $max_size = $opt->{maxsize} // $cfg->{lc('publicInbox.indexMaxSize')};
+if (defined $max_size) {
+ PublicInbox::Admin::parse_unsigned(\$max_size) or
+ die "`publicInbox.indexMaxSize=$max_size' not parsed\n";
+}
+
foreach my $ibx (@ibxs) {
# XXX: users can shoot themselves in the foot, with opt->{indexlevel}
$ibx->{indexlevel} //= $opt->{indexlevel} //
PublicInbox::Admin::detect_indexlevel($ibx);
+ $ibx->{index_max_size} = $max_size;
PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
}
my $spamc = PublicInbox::Spamcheck::Spamc->new;
my $pi_config = PublicInbox::Config->new;
my $err;
-my $mime = PublicInbox::MIME->new(eval {
+my $mime = PublicInbox::MIME->new(do{
local $/;
- my $data = scalar <STDIN>;
+ my $data = <STDIN>;
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
if ($train ne 'rm') {
};
$err = $@;
}
- $data
+ \$data
});
sub remove_or_add ($$$$) {
# in case there's bugs in our code or user error.
my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/";
$ems = PublicInbox::Emergency->new($emergency);
-my $str = eval { local $/; <STDIN> };
+my $str = do { local $/; <STDIN> };
$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
$ems->prepare(\$str);
my $simple = Email::Simple->new(\$str);
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
PublicInbox::AdminEdit::check_editable(\@ibxs);
-my $data = do { local $/; scalar <STDIN> };
+my $data = do { local $/; <STDIN> };
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
my $n_purged = 0;
foreach my $sub (qw(cur new)) {
foreach my $fn (glob("$dir/$sub/*")) {
open my $fh, '<', $fn or next;
- my $s = Email::Simple->new(eval { local $/; <$fh> });
+ my $s = Email::Simple->new(do { local $/; <$fh> });
my $date = $s->header('Date');
my $t = eval { str2time($date) };
defined $t or next;
while (my $ary = pop @msgs) {
my $fn = "$dir/$ary->[1]";
open my $fh, '<', $fn or next;
- my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> });
+ my $mime = PublicInbox::MIME->new(do { local $/; <$fh> });
$im->add($mime);
}
$im->done;
$max = $n + $max_gap;
print STDERR $fn, "\n";
- my $mime = PublicInbox::MIME->new(eval { local $/; <$fh> });
+ my $mime = PublicInbox::MIME->new(do { local $/; <$fh> });
$filter->scrub($mime);
$im->add($mime);
foreach my $n (grep(/\d+\z/, glob("$spool/*"))) {
if (open my $fh, '<', $n) {
- my $f = Email::Filter->new(data => eval { local $/; <$fh> });
+ my $f = Email::Filter->new(data => do { local $/; <$fh> });
my $s = $f->simple;
# gmane rewrites Received headers, which increases spamminess
use URI::Escape qw/uri_escape_utf8/;
use File::Temp qw/tempfile/;
my ($fh, $filename) = tempfile('ssoma-replay-XXXXXXXX', TMPDIR => 1);
-my $msg = eval {
- local $/;
- Email::Simple->new(<STDIN>);
-};
+my $msg = Email::Simple->new(do { local $/; <STDIN> });
select $fh;
# Note: the archive URL makes assumptions about where the
use warnings;
use Test::More;
use PublicInbox::TestCommon;
+use PublicInbox::Import;
use_ok 'PublicInbox::Admin', qw(resolve_repo_dir);
my ($tmpdir, $for_destroy) = tmpdir();
my $git_dir = "$tmpdir/v1";
my $v2_dir = "$tmpdir/v2";
my ($res, $err, $v);
-is(0, system(qw(git init -q --bare), $git_dir), 'git init v1');
+PublicInbox::Import::init_bare($git_dir);
# v1
is(resolve_repo_dir($git_dir), $git_dir, 'top-level GIT_DIR resolved');
}
chdir '/';
+
+my @pairs = (
+ '1g' => 1024 ** 3,
+ 666 => 666,
+ '1500K' => 1500 * 1024,
+ '15m' => 15 * (1024 ** 2),
+);
+
+while (@pairs) {
+ my ($in, $out) = splice(@pairs, 0, 2);
+ my $orig = $in;
+ ok(PublicInbox::Admin::parse_unsigned(\$in), "parse_unsigned $orig");
+ is($in, $out, "got $orig => ($in == $out)");
+}
+
+for my $v ('', 'bogus', '1p', '1gig') {
+ ok(!PublicInbox::Admin::parse_unsigned(\$v),
+ "parse_unsigned rejects $v");
+}
+
done_testing();
}
{
- is(system(qw(git init -q --bare), $git_dir), 0, 'git init ok');
my $git = PublicInbox::Git->new($git_dir);
my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
+ $im->init_bare;
$im->add(Email::MIME->create(
header => [
From => 'a@example.com',
use Test::More;
use Email::MIME;
use PublicInbox::TestCommon;
+use PublicInbox::Import;
require_mods(qw(Plack::Handler::CGI Plack::Util));
my ($tmpdir, $for_destroy) = tmpdir();
my $home = "$tmpdir/pi-home";
{
is(1, mkdir($home, 0755), "setup ~/ for testing");
is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox");
- is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+ PublicInbox::Import::init_bare($maindir);
open my $fh, '>', "$maindir/description" or die "open: $!\n";
print $fh "test for public-inbox\n";
zzzzzz
EOF
- $im->add($mime);
+ ok($im->add($mime), 'added initial message');
+
+ $mime->header_set('Message-ID', '<toobig@example.com>');
+ $mime->body_str_set("z\n" x 1024);
+ ok($im->add($mime), 'added big message');
# deliver a reply, too
- my $reply = Email::MIME->new(<<EOF);
+ $mime = Email::MIME->new(<<EOF);
From: You <you\@example.com>
To: Me <me\@example.com>
Cc: $addr
what?
EOF
- $im->add($reply);
+ ok($im->add($mime), 'added reply');
my $slashy_mid = 'slashy/asdf@example.com';
my $slashy = Email::MIME->new(<<EOF);
slashy
EOF
- $im->add($slashy);
+ ok($im->add($slashy), 'added slash');
$im->done;
my $res = cgi_run("/test/slashy/asdf\@example.com/raw");
my $path = "/test/blahblah\@example.com/t.mbox.gz";
my $res = cgi_run($path);
like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled");
- my $indexed;
- eval {
- require DBD::SQLite;
- require PublicInbox::SearchIdx;
- my $s = PublicInbox::SearchIdx->new($ibx, 1);
- $s->index_sync;
- $indexed = 1;
- };
+ my $cmd = ['-index', $ibx->{inboxdir}, '--max-size=2k'];
+ my $opt = { 2 => \(my $err) };
+ my $indexed = run_script($cmd, undef, $opt);
if ($indexed) {
$res = cgi_run($path);
like($res->{head}, qr/^Status: 200 /, "search returned mbox");
IO::Uncompress::Gunzip::gunzip(\$in => \$out);
like($out, qr/^From /m, "From lines in mbox");
};
+ $res = cgi_run('/test/toobig@example.com/');
+ like($res->{head}, qr/^Status: 300 /,
+ 'did not index or return >max-size message');
+ like($err, qr/skipping [a-f0-9]{40,}/,
+ 'warned about skipping large OID');
} else {
like($res->{head}, qr/^Status: 501 /, "search not available");
- SKIP: { skip 'DBD::SQLite not available', 2 };
+ SKIP: { skip 'DBD::SQLite not available', 4 };
}
my $have_xml_treepp = eval { require XML::TreePP; 1 } if $indexed;
use Test::More;
use PublicInbox::Config;
use PublicInbox::TestCommon;
+use PublicInbox::Import;
my ($tmpdir, $for_destroy) = tmpdir();
{
- is(system(qw(git init -q --bare), $tmpdir), 0, "git init successful");
+ PublicInbox::Import::init_bare($tmpdir);
my @cmd = ('git', "--git-dir=$tmpdir", qw(config foo.bar), "hi\nhi");
- is(system(@cmd), 0, "set config");
+ is(xsys(@cmd), 0, "set config");
my $tmp = PublicInbox::Config->new("$tmpdir/config");
-primary_address => 'test@example.com',
};
-ok(PublicInbox::Import::run_die([qw(git init --bare -q), $ibx->{inboxdir}]),
- 'initialized v1 repo');
+PublicInbox::Import::init_bare($ibx->{inboxdir});
ok(umask(077), 'set restrictive umask');
ok(PublicInbox::Import::run_die([qw(git) , "--git-dir=$ibx->{inboxdir}",
qw(config core.sharedRepository 0644)]), 'set sharedRepository');
use strict;
use warnings;
use Test::More;
+use PublicInbox::TestCommon;
use_ok 'PublicInbox::DS';
if ('close-on-exec for epoll and kqueue') {
- use PublicInbox::Spawn qw(spawn);
+ use PublicInbox::Spawn qw(spawn which);
my $pid;
my $evfd_re = qr/(?:kqueue|eventpoll)/i;
my $l = <$r>;
is($l, undef, 'cloexec works and sleep(1) is running');
- my @of = grep(/$evfd_re/, `lsof -p $pid 2>/dev/null`);
- my $err = $?;
SKIP: {
- skip "lsof missing? (\$?=$err)", 1 if $err;
+ my $lsof = which('lsof') or skip 'lsof missing', 1;
+ my $rdr = { 2 => \(my $null) };
+ my @of = grep(/$evfd_re/, xqx([$lsof, '-p', $pid], {}, $rdr));
+ my $err = $?;
+ skip "lsof broken ? (\$?=$err)", 1 if $err;
is_deeply(\@of, [], 'no FDs leaked to subprocess');
};
if (defined $pid) {
}
SKIP: {
- # not bothering with BSD::Resource
- chomp(my $n = `/bin/sh -c 'ulimit -n'`);
+ require_mods('BSD::Resource', 1);
+ my $rlim = BSD::Resource::RLIMIT_NOFILE();
+ my ($n,undef) = BSD::Resource::getrlimit($rlim);
# FreeBSD 11.2 with 2GB RAM gives RLIMIT_NOFILE=57987!
if ($n > 1024 && !$ENV{TEST_EXPENSIVE}) {
$t = 'mailEditor set in config'; {
$in = $out = $err = '';
- my $rc = system(qw(git config), "--file=$cfgfile",
+ my $rc = xsys(qw(git config), "--file=$cfgfile",
'publicinbox.maileditor',
"$^X -i -p -e 's/boolean prefix/bool pfx/'");
is($rc, 0, 'set publicinbox.mailEditor');
my $im = PublicInbox::Import->new($git, $ibx->{name}, 'test@example');
{
- is(0, system(qw(git init -q --bare), $git_dir), "git init");
+ $im->init_bare;
local $ENV{GIT_DIR} = $git_dir;
foreach my $i (1..6) {
use PublicInbox::TestCommon;
my ($dir, $for_destroy) = tmpdir();
use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Import;
use_ok 'PublicInbox::Git';
{
- is(system(qw(git init -q --bare), $dir), 0, 'created git directory');
+ PublicInbox::Import::init_bare($dir);
my $fi_data = './t/git.fast-import-data';
- ok(-r $fi_data, "fast-import data readable (or run test at top level)");
- local $ENV{GIT_DIR} = $dir;
- system("git fast-import --quiet <$fi_data");
+ open my $fh, '<', $fi_data or die
+ "fast-import data readable (or run test at top level: $!";
+ my $rdr = { 0 => $fh };
+ xsys([qw(git fast-import --quiet)], { GIT_DIR => $dir }, $rdr);
is($?, 0, 'fast-import succeeded');
}
}
if (1) {
- my $cmd = [ 'git', "--git-dir=$dir", qw(hash-object -w --stdin) ];
-
# need a big file, use the AGPL-3.0 :p
my $big_data = './COPYING';
ok(-r $big_data, 'COPYING readable');
my $size = -s $big_data;
ok($size > 8192, 'file is big enough');
-
- my $buf = do {
- local $ENV{GIT_DIR} = $dir;
- `git hash-object -w --stdin <$big_data`;
- };
+ open my $fh, '<', $big_data or die;
+ my $cmd = [ 'git', "--git-dir=$dir", qw(hash-object -w --stdin) ];
+ my $buf = xqx($cmd, { GIT_DIR => $dir }, { 0 => $fh });
is(0, $?, 'hashed object successfully');
chomp $buf;
if ('alternates reloaded') {
my ($alt, $alt_obj) = tmpdir();
my @cmd = ('git', "--git-dir=$alt", qw(hash-object -w --stdin));
- is(system(qw(git init -q --bare), $alt), 0, 'create alt directory');
+ PublicInbox::Import::init_bare($alt);
open my $fh, '<', "$alt/config" or die "open failed: $!\n";
my $rd = popen_rd(\@cmd, {}, { 0 => $fh } );
close $fh or die "close failed: $!";
use strict;
use warnings;
use Test::More;
-use PublicInbox::Spawn qw(which spawn);
+use PublicInbox::Spawn qw(which);
+use PublicInbox::TestCommon;
use IO::Handle; # ->autoflush
use Fcntl qw(:seek);
eval { require highlight } or
is($$ref, $$lref, 'do_hl_lang matches do_hl');
SKIP: {
- which('w3m') or skip 'w3m(1) missing to check output', 1;
- my $cmd = [ qw(w3m -T text/html -dump -config /dev/null) ];
- open my $in, '+>', undef or die;
- open my $out, '+>', undef or die;
- my $rdr = { 0 => fileno($in), 1 => fileno($out) };
- $in->autoflush(1);
- print $in '<pre>', $$ref, '</pre>' or die;
- $in->seek(0, SEEK_SET) or die;
- my $pid = spawn($cmd, undef, $rdr);
- waitpid($pid, 0);
+ my $w3m = which('w3m') or
+ skip('w3m(1) missing to check output', 1);
+ my $cmd = [ $w3m, qw(-T text/html -dump -config /dev/null) ];
+ my $in = '<pre>' . $$ref . '</pre>';
+ my $out = xqx($cmd, undef, { 0 => \$in });
# expand tabs and normalize whitespace,
# w3m doesn't preserve tabs
$orig =~ s/\t/ /gs;
- $out->seek(0, SEEK_SET) or die;
- $out = do { local $/; <$out> };
$out =~ s/\s*\z//sg;
$orig =~ s/\s*\z//sg;
is($out, $orig, 'w3m output matches');
# setup
{
- is(0, system(qw(git init -q --bare), $git_dir), "git init");
+ $im->init_bare;
my $prev = "";
foreach my $i (1..6) {
use warnings;
use Test::More;
use Time::HiRes qw(gettimeofday tv_interval);
-use PublicInbox::Spawn qw(which spawn);
+use PublicInbox::Spawn qw(which spawn popen_rd);
use PublicInbox::TestCommon;
require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status));
use Digest::SHA qw(sha1_hex);
my $sock = tcp_server() or die;
my @zmods = qw(PublicInbox::GzipFilter IO::Uncompress::Gunzip);
-# make sure stdin is not a pipe for lsof test to check for leaking pipes
-open(STDIN, '<', '/dev/null') or die 'no /dev/null: $!';
-
# Make sure we don't clobber socket options set by systemd or similar
# using socket activation:
my ($defer_accept_val, $accf_arg, $TCP_DEFER_ACCEPT);
};
SKIP: {
- which('curl') or skip('curl(1) missing', 4);
+ my $curl = which('curl') or skip('curl(1) missing', 4);
my $base = 'http://' . $sock->sockhost . ':' . $sock->sockport;
my $url = "$base/sha1";
my ($r, $w);
pipe($r, $w) or die "pipe: $!";
- my $cmd = [qw(curl --tcp-nodelay --no-buffer -T- -HExpect: -sS), $url];
+ my $cmd = [$curl, qw(--tcp-nodelay -T- -HExpect: -sSN), $url];
open my $cout, '+>', undef or die;
open my $cerr, '>', undef or die;
my $rdr = { 0 => $r, 1 => $cout, 2 => $cerr };
seek($cout, 0, SEEK_SET);
is(<$cout>, sha1_hex($str), 'read expected body');
- open my $fh, '-|', qw(curl -sS), "$base/async-big" or die $!;
+ my $fh = popen_rd([$curl, '-sS', "$base/async-big"]);
my $n = 0;
my $non_zero = 0;
while (1) {
$n += $r;
$buf =~ /\A\0+\z/ or $non_zero++;
}
- close $fh or die "curl errored out \$?=$?";
+ close $fh or die "close curl pipe: $!";
+ is($?, 0, 'curl succesful');
is($n, 30 * 1024 * 1024, 'got expected output from curl');
is($non_zero, 0, 'read all zeros');
- require_mods(@zmods, 1);
- open $fh, '-|', qw(curl -sS), "$base/psgi-return-gzip" or die;
- binmode $fh;
- my $buf = do { local $/; <$fh> };
- close $fh or die "curl errored out \$?=$?";
+ require_mods(@zmods, 2);
+ my $buf = xqx([$curl, '-sS', "$base/psgi-return-gzip"]);
+ is($?, 0, 'curl succesful');
IO::Uncompress::Gunzip::gunzip(\$buf => \(my $out));
is($out, "hello world\n");
}
SKIP: {
skip 'only testing lsof(8) output on Linux', 1 if $^O ne 'linux';
- skip 'no lsof in PATH', 1 unless which('lsof');
- my @lsof = `lsof -p $td->{pid}`;
+ my $lsof = which('lsof') or skip 'no lsof in PATH', 1;
+ my $null_in = '';
+ my $rdr = { 2 => \(my $null_err), 0 => \$null_in };
+ my @lsof = xqx([$lsof, '-p', $td->{pid}], undef, $rdr);
is_deeply([grep(/\bdeleted\b/, @lsof)], [], 'no lingering deleted inputs');
# filter out pipes inherited from the parent
- my @this = `lsof -p $$`;
+ my @this = xqx([$lsof, '-p', $$], undef, $rdr);
my $bad;
my $extract_inodes = sub {
map {;
is($conn->read($buf, 1), 0, "EOF");
}
- is(system(qw(git clone -q --mirror),
+ is(xsys(qw(git clone -q --mirror),
"http://$host:$port/$group", "$tmpdir/clone.git"),
0, 'smart clone successful');
# ensure dumb cloning works, too:
- is(system('git', "--git-dir=$maindir",
+ is(xsys('git', "--git-dir=$maindir",
qw(config http.uploadpack false)),
0, 'disable http.uploadpack');
- is(system(qw(git clone -q --mirror),
+ is(xsys(qw(git clone -q --mirror),
"http://$host:$port/$group", "$tmpdir/dumb.git"),
0, 'clone successful');
ok($td->kill, 'killed httpd');
$td->join;
- is(system('git', "--git-dir=$tmpdir/clone.git",
+ is(xsys('git', "--git-dir=$tmpdir/clone.git",
qw(fsck --no-verbose)), 0,
'fsck on cloned directory successful');
}
use PublicInbox::TestCommon;
my ($dir, $for_destroy) = tmpdir();
-is(system(qw(git init -q --bare), $dir), 0, 'git init successful');
my $git = PublicInbox::Git->new($dir);
-
my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
+$im->init_bare;
my $mime = PublicInbox::MIME->create(
header => [
From => 'a@example.com',
use warnings;
use Test::More;
use_ok 'PublicInbox::Inbox';
+use File::Temp 0.19 ();
my $x = PublicInbox::Inbox->new({url => [ '//example.com/test/' ]});
is($x->base_url, 'https://example.com/test/', 'expanded protocol-relative');
$x = PublicInbox::Inbox->new({url => [ 'http://example.com/test' ]});
is($x->base_url, 'http://example.com/test/', 'added trailing slash');
$x = PublicInbox::Inbox->new({});
+
is($x->base_url, undef, 'undef base_url allowed');
+my $tmpdir = File::Temp->newdir('pi-inbox-XXXXXX', TMPDIR => 1);
+$x->{inboxdir} = $tmpdir->dirname;
+is_deeply($x->cloneurl, [], 'no cloneurls');
+is($x->description, '($INBOX_DIR/description missing)', 'default description');
+{
+ open my $fh, '>', "$x->{inboxdir}/cloneurl" or die;
+ print $fh "https://example.com/inbox\n" or die;
+ close $fh or die;
+ open $fh, '>', "$x->{inboxdir}/description" or die;
+ print $fh "blah\n" or die;
+ close $fh or die;
+}
+is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls update');
+is($x->description, 'blah', 'description updated');
+is(unlink(glob("$x->{inboxdir}/*")), 2, 'unlinked cloneurl & description');
+is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls memoized');
+is($x->description, 'blah', 'description memoized');
done_testing();
push @cmd, "$ibx->{inboxdir}/git/0.git", "$mirror/git/0.git";
}
my $fetch_dir = $cmd[-1];
- is(system(@cmd), 0, "v$v clone OK");
+ is(xsys(@cmd), 0, "v$v clone OK");
# inbox init
local $ENV{PI_CONFIG} = "$tmpdir/.picfg";
$im->done;
# mirror updates
- is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
+ is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
ok(run_script(['-index', $mirror]), "v$v index mirror again OK");
($nr, $msgs) = $ro_mirror->recent;
is($nr, 2, '2nd message seen in mirror');
}
# sync the mirror
- is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
+ is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
ok(run_script(['-index', $mirror]), "v$v index mirror again OK");
($nr, $msgs) = $ro_mirror->recent;
is($nr, 1, '2nd message gone from mirror');
push @expect, $i;
}
$im->done;
- is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
+ is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
ok(run_script(['-index', '--reindex', $mirror]),
"v$v index --reindex mirror OK");
@ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)};
sub read_indexlevel {
my ($inbox) = @_;
- local $ENV{GIT_CONFIG} = "$ENV{PI_DIR}/config";
- chomp(my $lvl = `git config publicinbox.$inbox.indexlevel`);
+ my $cmd = [ qw(git config), "publicinbox.$inbox.indexlevel" ];
+ my $env = { GIT_CONFIG => "$ENV{PI_DIR}/config" };
+ chomp(my $lvl = xqx($cmd, $env));
$lvl;
}
-From historical@ruby-dev Thu Jan 1 00:00:00 1970
Message-Id: <199707281508.AAA24167@hoyogw.example>
Date: Tue, 29 Jul 97 00:08:29 +0900
From: matz@example.com
use Cwd qw(getcwd);
use PublicInbox::MID qw(mid2path);
use PublicInbox::Git;
+use PublicInbox::InboxWritable;
use PublicInbox::TestCommon;
+use PublicInbox::Import;
my ($tmpdir, $for_destroy) = tmpdir();
my $home = "$tmpdir/pi-home";
my $pi_home = "$home/.public-inbox";
"spamc mock found (run in top of source tree");
is(1, mkdir($home, 0755), "setup ~/ for testing");
is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox");
- is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+ PublicInbox::Import::init_bare($maindir);
open my $fh, '>>', $pi_config or die;
print $fh <<EOF or die;
use PublicInbox::MDA;
use PublicInbox::Address;
use Encode qw/encode/;
- my $mbox = 't/utf8.mbox';
- open(my $fh, '<', $mbox) or die "failed to open mbox: $mbox\n";
- my $str = eval { local $/; <$fh> };
- close $fh;
- my $msg = Email::MIME->new($str);
-
+ my $eml = 't/utf8.eml';
+ my $msg = PublicInbox::InboxWritable::mime_from_path($eml) or
+ die "failed to open $eml: $!";
my $from = $msg->header('From');
my ($author) = PublicInbox::Address::names($from);
my ($email) = PublicInbox::Address::emails($from);
Date: Thu, 01 Jan 1970 00:00:00 +0000
EOF
- system(qw(git config --file), $pi_config, "$cfgpfx.listid", $list_id);
+ xsys(qw(git config --file), $pi_config, "$cfgpfx.listid", $list_id);
$? == 0 or die "failed to set listid $?";
my $in = $simple->as_string;
ok(run_script(['-mda'], undef, { 0 => \$in }),
local $ENV{PI_CONFIG} = $pi_config;
local $ENV{PI_EMERGENCY} = "$tmpdir/emergency";
my @cfg = ('git', 'config', "--file=$pi_config");
-is(system(@cfg, 'publicinboxmda.spamcheck', 'none'), 0);
+is(xsys(@cfg, 'publicinboxmda.spamcheck', 'none'), 0);
for my $v (qw(V1 V2)) {
my @warn;
"http://example.com/$v", $addr ];
ok(run_script($cmd), 'public-inbox-init');
ok(run_script(['-index', $inboxdir]), 'public-inbox-index');
- is(system(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0);
- is(system(@cfg, "$cfgpfx.altid",
+ is(xsys(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0);
+ is(xsys(@cfg, "$cfgpfx.altid",
'serial:alerts:file=msgmap.sqlite3'), 0);
for my $i (1..2) {
use Test::More;
use Email::MIME;
use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::InboxWritable;
use_ok('PublicInbox::MsgIter');
{
}
{
- my $f = 't/iso-2202-jp.mbox';
- my $mime = Email::MIME->new(do {
- open my $fh, '<', $f or die "open($f): $!";
- local $/;
- <$fh>;
- });
+ my $f = 't/iso-2202-jp.eml';
+ my $mime = PublicInbox::InboxWritable::mime_from_path($f) or
+ die "open $f: $!";
my $raw = '';
msg_iter($mime, sub {
my ($part, $level, @ex) = @{$_[0]};
{
my $f = 't/x-unknown-alpine.eml';
- my $mime = Email::MIME->new(do {
- open my $fh, '<', $f or die "open($f): $!";
- local $/;
- binmode $fh;
- <$fh>;
- });
+ my $mime = PublicInbox::InboxWritable::mime_from_path($f) or
+ die "open $f: $!";
my $raw = '';
msg_iter($mime, sub {
my ($part, $level, @ex) = @{$_[0]};
my @v2 = ($ibx->over->get_art(1), $ibx->over->get_art(2));
is_deeply(\@v2, \@old, 'v2 conversion times match');
- system(qw(git clone -sq --mirror), "$tmpdir/v2/git/0.git",
+ xsys(qw(git clone -sq --mirror), "$tmpdir/v2/git/0.git",
"$tmpdir/v2-clone/git/0.git") == 0 or die "clone: $?";
$cmd = [ '-init', '-Lbasic', '-V2', 'v2c', "$tmpdir/v2-clone",
'http://example.com/v2c', 'v2c@example.com' ];
{
my $im = $ibx->importer(0);
- my $mime = PublicInbox::MIME->new(do {
- open my $fh, '<', 't/data/0001.patch' or die;
- local $/;
- <$fh>
- });
+ my $eml = 't/data/0001.patch';
+ my $mime = PublicInbox::InboxWritable::mime_from_path($eml) or
+ die "open $eml: $!";
ok($im->add($mime), 'message added');
$im->done;
if ($version == 1) {
use warnings;
use Test::More;
use PublicInbox::TestCommon;
+use PublicInbox::Spawn qw(which);
require_mods(qw(DBD::SQLite));
-require PublicInbox::SearchIdx;
-require PublicInbox::Msgmap;
require PublicInbox::InboxWritable;
use Email::Simple;
use IO::Socket;
my @cmd = ('-init', $group, $inboxdir, 'http://example.com/', $addr);
push @cmd, "-V$version", '-Lbasic';
ok(run_script(\@cmd), 'init OK');
- is(system(qw(git config), "--file=$home/.public-inbox/config",
+ is(xsys(qw(git config), "--file=$home/.public-inbox/config",
"publicinbox.$group.newsgroup", $group),
0, 'enabled newsgroup');
my $len;
$im->add($mime);
$im->done;
if ($version == 1) {
- my $s = PublicInbox::SearchIdx->new($ibx, 1);
- $s->index_sync;
+ ok(run_script(['-index', $ibx->{inboxdir}]),
+ 'indexed v1');
}
}
$im->add($for_leafnode);
$im->done;
if ($version == 1) {
- my $s = PublicInbox::SearchIdx->new($ibx, 1);
- $s->index_sync;
+ ok(run_script(['-index', $ibx->{inboxdir}]),
+ 'indexed v1');
}
my $hdr = $n->head("<$long_hdr>");
my $expect = qr/\AMessage-ID: /i . qr/\Q<$long_hdr>\E/;
if ($INC{'Search/Xapian.pm'} && ($ENV{TEST_RUN_MODE}//2)) {
skip 'Search/Xapian.pm pre-loaded (by t/run.perl?)', 1;
}
- my @of = `lsof -p $td->{pid} 2>/dev/null`;
+ my $lsof = which('lsof') or skip 'lsof missing', 1;
+ my $rdr = { 2 => \(my $null) };
+ my @of = xqx([$lsof, '-p', $td->{pid}], undef, $rdr);
skip('lsof broken', 1) if (!scalar(@of) || $?);
my @xap = grep m!Search/Xapian!, @of;
is_deeply(\@xap, [], 'Xapian not loaded in nntpd');
$n = $s = undef;
$td->join;
- my $eout = eval {
- local $/;
+ is($?, 0, 'no error in exited process');
+ my $eout = do {
open my $fh, '<', $err or die "open $err failed: $!";
+ local $/;
<$fh>;
};
- is($?, 0, 'no error in exited process');
unlike($eout, qr/wide/i, 'no Wide character warnings');
}
my $git_dir = "$tmpdir/a.git";
{
- is(system(qw(git init -q --bare), $git_dir), 0, 'git init ok');
my $git = PublicInbox::Git->new($git_dir);
my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
+ $im->init_bare;
$im->add(Email::MIME->create(
header => [
From => 'a@example.com',
body => "hello world\n",
));
$im->done;
- is(system(qw(git --git-dir), $git_dir, 'fsck', '--strict'), 0, 'git fsck ok');
+ is(xsys(qw(git --git-dir), $git_dir, 'fsck', '--strict'), 0,
+ 'git fsck ok');
}
done_testing();
$cfgpfx.address=$addr
$cfgpfx.inboxdir=$maindir
EOF
-is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
my $git = PublicInbox::Git->new($maindir);
my $im = PublicInbox::Import->new($git, 'test', $addr);
+$im->init_bare;
{
open my $fh, '<', '/dev/urandom' or die "unable to open urandom: $!\n";
$cfgpfx.address=$addr
$cfgpfx.inboxdir=$maindir
EOF
-is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
my $git = PublicInbox::Git->new($maindir);
my $im = PublicInbox::Import->new($git, 'test', $addr);
+$im->init_bare;
{
my $mime = Email::MIME->new(<<EOF);
From: Me <me\@example.com>
$cfgpfx.address=$addr
$cfgpfx.inboxdir=$maindir
EOF
-is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+PublicInbox::Import::init_bare($maindir);
my $www = PublicInbox::WWW->new($config);
test_psgi(sub { $www->call(@_) }, sub {
my $im = PublicInbox::V2Writable->new($ibx, 1);
$im->{parallel} = 0;
-my $mime = PublicInbox::MIME->create(
- header => [
- From => 'a@example.com',
- To => 'test@example.com',
- Subject => 'this is a subject',
- 'Message-ID' => '<a-mid@b>',
- Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
- ],
- body => "hello world\n",
-);
+my $mime = PublicInbox::MIME->new(<<'EOF');
+From oldbug-pre-a0c07cba0e5d8b6a Fri Oct 2 00:00:00 1993
+From: a@example.com
+To: test@example.com
+Subject: this is a subject
+Message-ID: <a-mid@b>
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+
+hello world
+EOF
ok($im->add($mime), 'added one message');
$mime->body_set("hello world!\n");
$new_mid = $mids->[1];
$im->done;
+my $msg = $ibx->msg_by_mid('a-mid@b');
+like($$msg, qr/\AFrom oldbug/s,
+ '"From_" line stored to test old bug workaround');
+
my $cfgpfx = "publicinbox.v2test";
my $cfg = <<EOF;
$cfgpfx.address=$ibx->{-primary_address}
'got v2 description missing message');
$res = $cb->(GET('/v2test/a-mid@b/raw'));
$raw = $res->content;
+ unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($raw, qr/^hello world$/m, 'got first message');
like($raw, qr/^hello world!$/m, 'got second message');
@from_ = ($raw =~ m/^From /mg);
my $out;
my $in = $res->content;
my $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in t.mbox.gz');
like($out, qr/^hello world!$/m, 'got second in t.mbox.gz');
like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz');
$res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m'));
$in = $res->content;
$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in mbox POST');
like($out, qr/^hello world!$/m, 'got second in mbox POST');
like($out, qr/^hello ghosts$/m, 'got third in mbox POST');
$res = $cb->(GET('/v2test/all.mbox.gz'));
$in = $res->content;
$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in all.mbox');
like($out, qr/^hello world!$/m, 'got second in all.mbox');
like($out, qr/^hello ghosts$/m, 'got third in all.mbox');
for my $dir (glob("$ibx->{inboxdir}/git/*.git")) {
my ($bn) = ($dir =~ m!([^/]+)\z!);
- is(system(qw(git --git-dir), $dir,
+ is(xsys(qw(git --git-dir), $dir,
qw(fsck --strict --no-progress)),
0, "git fsck is clean in epoch $bn");
}
require PublicInbox::SearchIdx;
require PublicInbox::Smsg;
require PublicInbox::Inbox;
+use PublicInbox::Import;
my ($tmpdir, $for_destroy) = tmpdir();
my $git_dir = "$tmpdir/a.git";
-is(0, system(qw(git init -q --bare), $git_dir), "git init (main)");
+PublicInbox::Import::init_bare($git_dir);
my $ibx = PublicInbox::Inbox->new({inboxdir => $git_dir});
my $rw = PublicInbox::SearchIdx->new($ibx, 1);
ok($rw, "search indexer created");
require_mods(qw(DBD::SQLite Search::Xapian));
require PublicInbox::SearchIdx;
require PublicInbox::Inbox;
+require PublicInbox::InboxWritable;
use Email::MIME;
my ($tmpdir, $for_destroy) = tmpdir();
my $git_dir = "$tmpdir/a.git";
my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir });
my ($root_id, $last_id);
-is(0, system(qw(git init --shared -q --bare), $git_dir), "git init (main)")
+is(0, xsys(qw(git init --shared -q --bare), $git_dir), "git init (main)")
or BAIL_OUT("`git init --shared' failed, weird FS or seccomp?");
eval { PublicInbox::Search->new($ibx)->xdb };
ok($@, "exception raised on non-existent DB");
});
$ibx->with_umask(sub {
- my $str = eval {
- my $mbox = 't/utf8.mbox';
- open(my $fh, '<', $mbox) or die "failed to open mbox: $mbox\n";
- local $/;
- <$fh>
- };
- $str =~ s/\AFrom [^\n]+\n//s;
- my $mime = Email::MIME->new($str);
+ my $eml = 't/utf8.eml';
+ my $mime = PublicInbox::InboxWritable::mime_from_path($eml) or
+ die "open $eml: $!";
my $doc_id = $rw->add_message($mime);
ok($doc_id > 0, 'message indexed doc_id with UTF-8');
my $msg = $rw->query('m:testmessage@example.com', {limit => 1})->[0];
require_git(2.6);
use PublicInbox::Spawn qw(popen_rd);
require_mods(qw(DBD::SQLite Search::Xapian Plack::Util));
-chomp(my $git_dir = `git rev-parse --git-dir 2>/dev/null`);
-plan skip_all => "$0 must be run from a git working tree" if $?;
+my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)});
+$? == 0 or plan skip_all => "$0 must be run from a git working tree";
+chomp $git_dir;
# needed for alternates, and --absolute-git-dir is only in git 2.13+
$git_dir = abs_path($git_dir);
$im->{parallel} = 0;
my $deliver_patch = sub ($) {
- open my $fh, '<', $_[0] or die "open: $!";
- my $mime = PublicInbox::MIME->new(do { local $/; <$fh> });
+ my $mime = PublicInbox::InboxWritable::mime_from_path($_[0]) or
+ die "open $_[0]: $!";
$im->add($mime);
$im->done;
};
require_mods(@psgi, 7 + scalar(@psgi));
use_ok($_) for @psgi;
my $binfoo = "$inboxdir/binfoo.git";
- system(qw(git init --bare -q), $binfoo) == 0 or die "git init: $?";
+ require PublicInbox::Import;
+ PublicInbox::Import::init_bare($binfoo);
require_ok 'PublicInbox::ViewVCS';
my $big_size = do {
no warnings 'once';
my $msg = $_;
$msg->{ds} ||= ++$n;
$msg->{references} =~ s/\s+/ /sg if $msg->{references};
+ $msg->{blob} = '0'x40; # any dummy value will do, here
my $simple = Email::Simple->create(header => [
'Message-ID' => "<$msg->{mid}>",
'References' => $msg->{references},
sub thread_to_s {
my ($msgs) = @_;
my $rootset = PublicInbox::SearchThread::thread($msgs, sub {
- [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
+ [ sort { $a->{mid} cmp $b->{mid} } @{$_[0]} ] });
my $st = '';
my @q = map { (0, $_) } @$rootset;
while (@q) {
my $level = shift @q;
my $node = shift @q or next;
- $st .= (" "x$level). "$node->{id}\n";
+ $st .= (" "x$level). "$node->{mid}\n";
my $cl = $level + 1;
unshift @q, map { ($cl, $_) } @{$node->{children}};
}
-From e@yhbt.net Thu Jan 01 00:00:00 1970
Date: Thu, 01 Jan 1970 00:00:00 +0000
To: =?utf-8?Q?El=C3=A9anor?= <e@example.com>
From: =?utf-8?Q?El=C3=A9anor?= <e@example.com>
require_mods(qw(DBD::SQLite Search::Xapian));
require PublicInbox::SearchIdx;
my ($inboxdir, $for_destroy) = tmpdir();
-is(system(qw(git init --bare -q), $inboxdir), 0);
my $ibx = {
inboxdir => $inboxdir,
name => 'test-add-remove-add',
body => "hello world\n",
);
my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
+$im->init_bare;
ok($im->add($mime), 'message added');
-ok($im->remove($mime), 'message added');
+ok($im->remove($mime), 'message removed');
ok($im->add($mime), 'message added again');
$im->done;
my $rw = PublicInbox::SearchIdx->new($ibx, 1);
use_ok 'PublicInbox::SearchIdx';
use_ok 'PublicInbox::Import';
my ($inboxdir, $for_destroy) = tmpdir();
-is(system(qw(git init -q --bare), $inboxdir), 0);
my $ibx_config = {
inboxdir => $inboxdir,
name => 'test-v1reindex',
my %config = %$ibx_config;
my $ibx = PublicInbox::Inbox->new(\%config);
my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
+ $im->init_bare;
foreach my $i (1..10) {
$mime->header_set('Message-Id', "<$i\@example.com>");
ok($im->add($mime), "message $i added");
my $im = PublicInbox::V2Writable->new($ibx, 1);
$im->{parallel} = 0;
ok($im->add($mime), 'message added');
-ok($im->remove($mime), 'message added');
+ok($im->remove($mime), 'message removed');
ok($im->add($mime), 'message added again');
$im->done;
my $msgs = $ibx->recent({limit => 1000});
my $config = "$ENV{PI_DIR}/config";
ok(-f $config, 'config exists');
my $k = 'publicinboxmda.spamcheck';
- is(system('git', 'config', "--file=$config", $k, 'none'), 0,
+ is(xsys('git', 'config', "--file=$config", $k, 'none'), 0,
'disabled spamcheck for mda');
ok(run_script(['-mda'], undef, $rdr), 'mda did not die');
"http://$host:$port/v2/$i$sfx",
"$tmpdir/m/git/$i.git");
- is(system(@cmd), 0, "cloned $i.git");
+ is(xsys(@cmd), 0, "cloned $i.git");
ok(-d "$tmpdir/m/git/$i.git", "mirror $i OK");
}
my $fetch_each_epoch = sub {
foreach my $i (0..$epoch_max) {
my $dir = "$tmpdir/m/git/$i.git";
- is(system('git', "--git-dir=$dir", 'fetch', '-q'), 0,
+ is(xsys('git', "--git-dir=$dir", 'fetch', '-q'), 0,
'fetch successful');
}
};
is(scalar($mset->items), 0, '1@example.com no longer visible in mirror');
}
+if ('max size') {
+ $mime->header_set('Message-ID', '<2big@a>');
+ my $max = '2k';
+ $mime->body_str_set("z\n" x 1024);
+ ok($v2w->add($mime), "add big message");
+ $v2w->done;
+ $ibx->cleanup;
+ $fetch_each_epoch->();
+ PublicInbox::InboxWritable::cleanup($mibx);
+ my $cmd = ['-index', "$tmpdir/m", "--max-size=$max" ];
+ my $opt = { 2 => \(my $err) };
+ ok(run_script($cmd, undef, $opt), 'indexed with --max-size');
+ like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message');
+ $mset = $mibx->search->reopen->query('m:2big@a', {mset =>1});
+ is(scalar($mset->items), 0, 'large message not indexed');
+
+ {
+ open my $fh, '>>', $pi_config or die;
+ print $fh <<EOF or die;
+[publicinbox]
+ indexMaxSize = 2k
+EOF
+ close $fh or die;
+ }
+ $cmd = ['-index', "$tmpdir/m", "--reindex" ];
+ ok(run_script($cmd, undef, $opt), 'reindexed w/ indexMaxSize in file');
+ like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message');
+ $mset = $mibx->search->reopen->query('m:2big@a', {mset =>1});
+ is(scalar($mset->items), 0, 'large message not re-indexed');
+}
+
ok($td->kill, 'killed httpd');
$td->join;
-primary_address => 'test@example.com',
indexlevel => 'full',
};
-my $agpl = eval {
+my $agpl = do {
open my $fh, '<', 'COPYING' or die "can't open COPYING: $!";
local $/;
<$fh>;
};
-$agpl or die "AGPL or die :P\n";
my $phrase = q("defending all users' freedom");
my $mime = PublicInbox::MIME->create(
header => [
if ('ensure git configs are correct') {
my @cmd = (qw(git config), "--file=$inboxdir/all.git/config",
qw(core.sharedRepository 0644));
- is(system(@cmd), 0, "set sharedRepository in all.git");
+ is(xsys(@cmd), 0, "set sharedRepository in all.git");
$git0 = PublicInbox::Git->new("$inboxdir/git/0.git");
chomp(my $v = $git0->qx(qw(config core.sharedRepository)));
is($v, '0644', 'child repo inherited core.sharedRepository');
use Cwd;
use PublicInbox::Config;
use PublicInbox::TestCommon;
+use PublicInbox::Import;
require_mods(qw(Filesys::Notify::Simple));
my ($tmpdir, $for_destroy) = tmpdir();
my $git_dir = "$tmpdir/test.git";
use_ok 'PublicInbox::Emergency';
my $cfgpfx = "publicinbox.test";
my $addr = 'test-public@example.com';
-is(system(qw(git init -q --bare), $git_dir), 0, 'initialized git dir');
+PublicInbox::Import::init_bare($git_dir);
my $msg = <<EOF;
From: user\@example.com
use Cwd;
use PublicInbox::Config;
use PublicInbox::TestCommon;
+use PublicInbox::Import;
require_git(2.6);
require_mods(qw(Search::Xapian DBD::SQLite Filesys::Notify::Simple));
require PublicInbox::V2Writable;
{
my $patch = 't/data/0001.patch';
open my $fh, '<', $patch or die "failed to open $patch: $!\n";
- $msg = eval { local $/; <$fh> };
+ $msg = do { local $/; <$fh> };
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
PublicInbox::WatchMaildir->new($config)->scan('full');
my ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7');
my $v1repo = "$tmpdir/v1";
my $v1pfx = "publicinbox.v1";
my $v1addr = 'v1-public@example.com';
- is(system(qw(git init -q --bare), $v1repo), 0, 'v1 init OK');
+ PublicInbox::Import::init_bare($v1repo);
my $cfg2 = <<EOF;
$orig$v1pfx.address=$v1addr
$v1pfx.inboxdir=$v1repo
--- /dev/null
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::Config;
+use PublicInbox::TestCommon;
+require_git(2.6);
+require_mods(qw(Search::Xapian DBD::SQLite Filesys::Notify::Simple));
+my ($tmpdir, $for_destroy) = tmpdir();
+my $inboxdir = "$tmpdir/v2";
+my $maildir = "$tmpdir/md";
+use_ok 'PublicInbox::WatchMaildir';
+use_ok 'PublicInbox::Emergency';
+my $cfgpfx = "publicinbox.test";
+my $addr = 'test-public@example.com';
+my @cmd = ('-init', '-V2', 'test', $inboxdir,
+ 'http://example.com/list', $addr);
+local $ENV{PI_CONFIG} = "$tmpdir/pi_config";
+ok(run_script(\@cmd), 'public-inbox init OK');
+
+my $msg_to = <<EOF;
+From: user\@a.com
+To: $addr
+Subject: address is in to
+Message-Id: <to\@a.com>
+Date: Sat, 18 Apr 2020 00:00:00 +0000
+
+content1
+EOF
+
+my $msg_cc = <<EOF;
+From: user1\@a.com
+To: user2\@a.com
+Cc: $addr
+Subject: address is in cc
+Message-Id: <cc\@a.com>
+Date: Sat, 18 Apr 2020 00:01:00 +0000
+
+content2
+EOF
+
+my $msg_none = <<EOF;
+From: user1\@a.com
+To: user2\@a.com
+Cc: user3\@a.com
+Subject: address is not in to or cc
+Message-Id: <none\@a.com>
+Date: Sat, 18 Apr 2020 00:02:00 +0000
+
+content3
+EOF
+
+PublicInbox::Emergency->new($maildir)->prepare(\$msg_to);
+PublicInbox::Emergency->new($maildir)->prepare(\$msg_cc);
+PublicInbox::Emergency->new($maildir)->prepare(\$msg_none);
+
+my $cfg = <<EOF;
+$cfgpfx.address=$addr
+$cfgpfx.inboxdir=$inboxdir
+$cfgpfx.watch=maildir:$maildir
+$cfgpfx.watchheader=To:$addr
+$cfgpfx.watchheader=Cc:$addr
+EOF
+my $config = PublicInbox::Config->new(\$cfg);
+PublicInbox::WatchMaildir->new($config)->scan('full');
+my $ibx = $config->lookup_name('test');
+ok($ibx, 'found inbox by name');
+
+my $num = $ibx->mm->num_for('to@a.com');
+ok(defined $num, 'Matched for address in To:');
+$num = $ibx->mm->num_for('cc@a.com');
+ok(defined $num, 'Matched for address in Cc:');
+$num = $ibx->mm->num_for('none@a.com');
+is($num, undef, 'No match without address in To: or Cc:');
+
+done_testing;
use Test::More;
use PublicInbox::Spawn qw(which);
use PublicInbox::TestCommon;
+use PublicInbox::Import;
require_mods(qw(URI::Escape Plack::Builder Digest::SHA
IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny));
require PublicInbox::WwwListing;
my ($tmpdir, $for_destroy) = tmpdir();
my $bare = PublicInbox::Git->new("$tmpdir/bare.git");
-is(system(qw(git init -q --bare), $bare->{git_dir}), 0, 'git init --bare');
+PublicInbox::Import::init_bare($bare->{git_dir});
is(PublicInbox::WwwListing::fingerprint($bare), undef,
'empty repo has no fingerprint');
{
my $fi_data = './t/git.fast-import-data';
- local $ENV{GIT_DIR} = $bare->{git_dir};
- is(system("git fast-import --quiet <$fi_data"), 0, 'fast-import');
+ open my $fh, '<', $fi_data or die "open $fi_data: $!";
+ my $env = { GIT_DIR => $bare->{git_dir} };
+ is(xsys([qw(git fast-import --quiet)], $env, { 0 => $fh }), 0,
+ 'fast-import');
}
like(PublicInbox::WwwListing::fingerprint($bare), qr/\A[a-f0-9]{40}\z/,
ok($sock, 'sock created');
my ($host, $port) = ($sock->sockhost, $sock->sockport);
my @clone = qw(git clone -q -s --bare);
- is(system(@clone, $bare->{git_dir}, $alt), 0, 'clone shared repo');
+ is(xsys(@clone, $bare->{git_dir}, $alt), 0, 'clone shared repo');
- system(qw(git init --bare -q), "$v2/all.git") == 0 or die;
+ PublicInbox::Import::init_bare("$v2/all.git");
for my $i (0..2) {
- is(system(@clone, $alt, "$v2/git/$i.git"), 0, "clone epoch $i");
+ is(xsys(@clone, $alt, "$v2/git/$i.git"), 0, "clone epoch $i")
}
ok(open(my $fh, '>', "$v2/inbox.lock"), 'mock a v2 inbox');
open $fh, '>', "$alt/description" or die;
print $fh "we're all clones\n" or die;
close $fh or die;
- is(system('git', "--git-dir=$alt", qw(config gitweb.owner lorelei)), 0,
+ is(xsys('git', "--git-dir=$alt", qw(config gitweb.owner lorelei)), 0,
'set gitweb user');
ok(unlink("$bare->{git_dir}/description"), 'removed bare/description');
open $fh, '>', $cfgfile or die;
tiny_test($json, $host, $port);
- skip 'skipping grok-pull integration test', 2 if !which('grok-pull');
+ my $grok_pull = which('grok-pull') or
+ skip('skipping grok-pull integration test', 2);
ok(mkdir("$tmpdir/mirror"), 'prepare grok mirror dest');
open $fh, '>', "$tmpdir/repos.conf" or die;
close $fh or die;
- system(qw(grok-pull -c), "$tmpdir/repos.conf");
+ xsys($grok_pull, '-c', "$tmpdir/repos.conf");
is($? >> 8, 127, 'grok-pull exit code as expected');
for (qw(alt bare v2/git/0.git v2/git/1.git v2/git/2.git)) {
ok(-d "$tmpdir/mirror/$_", "grok-pull created $_");
close $fh or die;
ok(mkdir("$tmpdir/per-inbox"), 'prepare single-v2-inbox mirror');
- system(qw(grok-pull -c), "$tmpdir/per-inbox.conf");
+ xsys($grok_pull, '-c', "$tmpdir/per-inbox.conf");
is($? >> 8, 127, 'grok-pull exit code as expected');
for (qw(v2/git/0.git v2/git/1.git v2/git/2.git)) {
ok(-d "$tmpdir/per-inbox/$_", "grok-pull created $_");
skip 'curl(1) not found', $nr unless $curl;
my $url = "http://$host:$port/description";
my $dst = "$tmpdir/desc";
- is(system($curl, qw(-RsSf), '-o', $dst, $url), 0, 'curl -R');
+ is(xsys($curl, qw(-RsSf), '-o', $dst, $url), 0, 'curl -R');
is((stat($dst))[9], $mtime, 'curl used remote mtime');
- is(system($curl, qw(-sSf), '-z', $dst, '-o', "$dst.2", $url), 0,
+ is(xsys($curl, qw(-sSf), '-z', $dst, '-o', "$dst.2", $url), 0,
'curl -z noop');
ok(!-e "$dst.2", 'no modification, nothing retrieved');
utime(0, 0, $dst) or die "utime failed: $!";
- is(system($curl, qw(-sSfR), '-z', $dst, '-o', "$dst.2", $url), 0,
+ is(xsys($curl, qw(-sSfR), '-z', $dst, '-o', "$dst.2", $url), 0,
'curl -z updates');
ok(-e "$dst.2", 'faked modification, got new file retrieved');
}