# MakeMaker only seems to support manpage sections 1 and 3...
m1 =
m1 += public-inbox-compact
+m1 += public-inbox-convert
+m1 += public-inbox-edit
m1 += public-inbox-httpd
m1 += public-inbox-index
m1 += public-inbox-mda
$(RSYNC) --chmod=Fugo=r -av $(rsync_docs) $(rsync_xdocs) $(RSYNC_DEST)
clean-doc:
- $(RM) $(man1) $(man5) $(man7) $(gz_docs) $(docs_html) $(mantxt)
+ $(RM) $(man1) $(man5) $(man7) $(man8) $(gz_docs) $(docs_html) $(mantxt)
$(RM) $(gz_xdocs) $(xdocs_html) $(xdocs)
clean :: clean-doc
Default: basename of C<publicinbox.cgitbin>, /var/www/htdocs/cgit/
or /usr/share/cgit/
+=item publicinbox.mailEditor
+
+See L<public-inbox-edit(1)>
+
=item publicinbox.wwwlisting
Enable a HTML listing style when the root path of the URL '/' is accessed.
--- /dev/null
+=head1 NAME
+
+public-inbox-edit - edit messages in a public inbox
+
+=head1 SYNOPSIS
+
+ public-inbox-edit -m MESSAGE-ID --all|INBOX_DIR
+
+ public-inbox-edit -F RAW_FILE --all|INBOX_DIR [.. INBOX_DIR]
+
+=head1 DESCRIPTION
+
+public-inbox-edit allows editing messages in a given inbox
+to remove sensitive information. It is only intended as a
+last resort, as it will cause discontiguous git history and
+draw more attention to the sensitive data in mirrors.
+
+=head1 OPTIONS
+
+=over
+
+=item --all
+
+Edit the message in all inboxes configured in ~/.public-inbox/config.
+This is an alternative to specifying individual inboxes directories
+on the command-line.
+
+=item -m MESSAGE-ID
+
+Edits the message corresponding to the given C<MESSAGE-ID>.
+If the C<MESSAGE-ID> is ambiguous, C<--force> or using the
+C<--file> of the original will be required.
+
+=item -F FILE
+
+Edits the message corresponding to the Message-ID: header
+and content given in C<FILE>. This requires the unmodified
+raw message, and the contents of C<FILE> will not itself
+be modified. This is useful if a Message-ID is ambiguous
+due to filtering/munging rules or other edits.
+
+=item --force
+
+Forcibly perform the edit even if Message-ID is ambiguous.
+
+=item --raw
+
+Do not perform "From " line escaping. By default, this
+generates a mboxrd variant file to detect unpurged messages
+in the new mbox. This makes sense if your configured
+C<publicinbox.mailEditor> is a regular editor and not
+something like C<mutt -f>
+
+=back
+
+=head1 CONFIGURATION
+
+=over 8
+
+=item publicinbox.mailEditor
+
+The command to perform the edit with. An example of this would be
+C<mutt -f>, and the user would then use the facilities in L<mutt(1)>
+to edit the mail. This is useful for editing attachments or
+Base64-encoded emails which are more difficult to edit with a
+normal editor (configured via C<GIT_EDITOR>, C<VISUAL> or C<EDITOR>).
+
+Default: none
+
+=back
+
+=head1 ENVIRONMENT
+
+=over 8
+
+=for comment MAIL_EDITOR is undocumented (unstable, don't want naming conflicts)
+
+=item GIT_EDITOR / VISUAL / EDITOR
+
+public-inbox-edit will fall back to using one of these variables
+(in that order) if C<publicinbox.mailEditor> is unset.
+
+=item PI_CONFIG
+
+The default config file, normally "~/.public-inbox/config".
+See L<public-inbox-config(5)>
+
+=back
+
+=head1 LIMITATIONS
+
+Only L<v2|public-inbox-v2-format(5)> repositories are supported.
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>
+and L<http://hjrcffqmbrq6wope.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright 2019 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<public-inbox-purge(1)|https://public-inbox.org/public-inbox-purge.html>
This does not touch the NNTP article number database.
+=item --prune
+
+Run L<git-gc(1)> to prune and expire reflogs if discontiguous history
+is detected. This is intended to be used in mirrors after running
+L<public-inbox-edit(1)> or L<public-inbox-purge(1)> to ensure data
+is expunged from mirrors.
+
=back
=head1 FILES
v2 repositories are described in L<public-inbox-v2-format>.
-=back
-
=head1 ENVIRONMENT
=over 8
pkg: p5-TimeDate
rpm: perl-TimeDate
+* Digest::SHA typically installed with Perl
+ rpm: perl-Digest-SHA
+
* Email::MIME deb: libemail-mime-perl
pkg: p5-Email-MIME
rpm: perl-Email-MIME
(for public-inbox-watch, pulled in by Plack)
- IO::Compress::Gzip deb: perl-modules (or libio-compress-perl)
- rpm: perl-PerlIO-gzip
pkg: perl5
+ rpm: perl-IO-Compress
(for gzipped mbox over HTTP)
Uncommonly needed modules:
pkg: p5-IPC-Run
rpm: perl-IPC-Run
+- Plack::Test deb: libplack-test-perl
+ pkg: p5-Plack
+ rpm: perl-Plack-Test
+
+- Test::Simple deb: perl-modules-5.$MINOR
+ pkg: perl5
+ rpm: perl-Test-Simple
+
- XML::Feed deb: libxml-feed-perl
pkg: p5-XML-Feed
rpm: perl-XML-Feed
-- Test::HTTP::Server::Simple deb: libtest-http-server-simple-perl
- pkg: p5-Test-HTTP-Server-Simple
- rpm: perl-Test-HTTP-Server-Simple
- (XXX is this really needed?)
-
standard MakeMaker installation (Perl)
--------------------------------------
+To use MakeMaker, you need to ensure ExtUtils::MakeMaker is available.
+This is typically installed with Perl, but RPM-based systems will likely
+need to install the `perl-ExtUtils-MakeMaker' package.
+
Once the dependencies are installed, you should be able to build and
install the system (into /usr/local) with:
Documentation/public-inbox-config.pod
Documentation/public-inbox-convert.pod
Documentation/public-inbox-daemon.pod
+Documentation/public-inbox-edit.pod
Documentation/public-inbox-httpd.pod
Documentation/public-inbox-index.pod
Documentation/public-inbox-mda.pod
examples/varnish-4.vcl
lib/PublicInbox/Address.pm
lib/PublicInbox/Admin.pm
+lib/PublicInbox/AdminEdit.pm
lib/PublicInbox/AltId.pm
lib/PublicInbox/Cgit.pm
lib/PublicInbox/Config.pm
sa_config/user/.spamassassin/user_prefs
script/public-inbox-compact
script/public-inbox-convert
+script/public-inbox-edit
script/public-inbox-httpd
script/public-inbox-index
script/public-inbox-init
t/convert-compact.t
t/data/0001.patch
t/ds-leak.t
+t/edit.t
t/emergency.t
t/fail-bin/spamc
t/feed.t
t/psgi_v2.t
t/purge.t
t/qspawn.t
+t/replace.t
t/reply.t
t/search-thr-index.t
t/search.t
# We also depend on git.
# Keep this sorted and synced to the INSTALL document
'Date::Parse' => 0,
+
+ # libperl$PERL_VERSION,
+ # `perl5' on FreeBSD
+ # perl-Digest-SHA on RH-based
+ 'Digest::SHA' => 0,
'Email::MIME' => 0,
# the following should be pulled in by Email::MIME:
# We have more test dependencies, but do not force
# users to install them. See INSTALL
+
+ # All Perl installs I know about have these, but RH-based
+ # distros make them separate even though 'perl' pulls them in
+ 'File::Path' => 0,
+ 'File::Temp' => 0,
+ 'Getopt::Long' => 0,
+ 'Exporter' => 0,
+ # ExtUtils::MakeMaker # this file won't run w/o it...
},
MAN3PODS => \%man3,
);
-include config.mak
-include Documentation/include.mk
SCRIPTS := scripts/ssoma-replay
-my_syntax := \$(addsuffix .syntax, $PM_FILES \$(EXE_FILES) \$(SCRIPTS))
-
+syn_files := $PM_FILES \$(EXE_FILES) \$(SCRIPTS)
+my_syntax := \$(addsuffix .syntax, \$(syn_files))
+changed = \$(shell git ls-files -m)
%.syntax ::
- @\$(PERL) -I lib -c \$(subst .syntax,,\$@)
+ @\$(PERL) -w -I lib -c \$(subst .syntax,,\$@)
syntax:: \$(my_syntax)
+dsyn :: \$(addsuffix .syntax, \$(filter \$(changed), \$(syn_files)))
+
check-manifest :: MANIFEST
if git ls-files >\$?.gen 2>&1; then diff -u \$? \$?.gen; fi
my $pkg_fmt = shift;
@ARGV or die $usage, "\n";
+my @test_essential = qw(Test::Simple Plack::Test);
+
# package profiles
my $profiles = {
- # the smallest possible profile
+ # the smallest possible profile for testing
# TODO: trim this, Plack pulls in Filesys::Notify::Simple,
# and we don't need that for mda-only installs
essential => [ qw(
perl
Date::Parse
Devel::Peek
+ Digest::SHA
Email::Simple
Email::MIME
Email::MIME::ContentType
Encode
+ ExtUtils::MakeMaker
Filesys::Notify::Simple
Plack
URI::Escape
- ) ],
+ ), @test_essential ],
# everything optional for normal use
optional => [ qw(
xapian-compact
) ],
- # developer stuff
+ # optional developer stuff
devtest => [ qw(
IPC::Run
- Test::HTTP::Server::Simple
XML::Feed
curl
w3m
deb => 'perl', # libperl5.XX, but the XX varies
pkg => 'perl5',
},
+ 'Digest::SHA' => {
+ deb => 'perl', # libperl5.XX, but the XX varies
+ pkg => 'perl5',
+ },
'Encode' => {
deb => 'perl', # libperl5.XX, but the XX varies
pkg => 'perl5',
rpm => 'perl-Encode',
},
+ 'ExtUtils::MakeMaker' => {
+ deb => 'perl', # perl-modules-5.xx
+ pkg => 'perl5',
+ rpm => 'perl-ExtUtils-MakeMaker',
+ },
'IO::Compress::Gzip' => {
deb => 'perl', # perl-modules-5.xx
pkg => 'perl5',
- rpm => 'perl-PerlIO-gzip',
+ rpm => 'perl-IO-Compress',
},
'DBD::SQLite' => { deb => 'libdbd-sqlite3-perl' },
+ 'Plack::Test' => {
+ deb => 'libplack-perl',
+ pkg => 'p5-Plack',
+ rpm => 'perl-Plack-Test',
+ },
'URI::Escape' => {
deb => 'liburi-perl',
pkg => 'p5-URI',
rpm => 'perl-URI',
},
+ 'Test::Simple' => {
+ deb => 'perl', # perl-modules-5.XX, but the XX varies
+ pkg => 'perl5',
+ rpm => 'perl-Test-Simple',
+ },
'highlight.pm' => {
deb => 'libhighlight-perl',
pkg => [],
use Cwd 'abs_path';
use base qw(Exporter);
our @EXPORT_OK = qw(resolve_repo_dir);
+my $CFG; # all the admin stuff is a singleton
+require PublicInbox::Config;
sub resolve_repo_dir {
my ($cd, $ver) = @_;
$l;
}
-sub resolve_inboxes {
- my ($argv, $warn_on_unconfigured) = @_;
- require PublicInbox::Config;
+sub unconfigured_ibx ($$) {
+ my ($dir, $i) = @_;
+ my $name = "unconfigured-$i";
+ PublicInbox::Inbox->new({
+ name => $name,
+ address => [ "$name\@example.com" ],
+ mainrepo => $dir,
+ # TODO: consumers may want to warn on this:
+ #-unconfigured => 1,
+ });
+}
+
+sub config () { $CFG //= eval { PublicInbox::Config->new } }
+
+sub resolve_inboxes ($;$) {
+ my ($argv, $opt) = @_;
require PublicInbox::Inbox;
+ $opt ||= {};
- my @ibxs = map { resolve_repo_dir($_) } @$argv;
- push(@ibxs, resolve_repo_dir()) unless @ibxs;
+ my $cfg = config();
+ if ($opt->{all}) {
+ my $cfgfile = PublicInbox::Config::default_file();
+ $cfg or die "--all specified, but $cfgfile not readable\n";
+ @$argv and die "--all specified, but directories specified\n";
+ }
+ my $min_ver = $opt->{-min_inbox_version} || 0;
+ my (@old, @ibxs);
my %dir2ibx;
- if (my $config = eval { PublicInbox::Config->new }) {
- $config->each_inbox(sub {
+ if ($cfg) {
+ $cfg->each_inbox(sub {
my ($ibx) = @_;
+ $ibx->{version} ||= 1;
$dir2ibx{abs_path($ibx->{mainrepo})} = $ibx;
});
- } elsif ($warn_on_unconfigured) {
- # do we really care about this? It's annoying...
- warn $warn_on_unconfigured, "\n";
}
- for my $i (0..$#ibxs) {
- my $dir = $ibxs[$i];
- $ibxs[$i] = $dir2ibx{$dir} ||= do {
- my $name = "unconfigured-$i";
- PublicInbox::Inbox->new({
- name => $name,
- address => [ "$name\@example.com" ],
- mainrepo => $dir,
- # TODO: consumers may want to warn on this:
- #-unconfigured => 1,
- });
- };
+ if ($opt->{all}) {
+ my @all = values %dir2ibx;
+ @all = grep { $_->{version} >= $min_ver } @all;
+ push @ibxs, @all;
+ } else { # directories specified on the command-line
+ my $i = 0;
+ my @dirs = @$argv;
+ push @dirs, '.' unless @dirs;
+ foreach (@dirs) {
+ my $v;
+ my $dir = resolve_repo_dir($_, \$v);
+ if ($v < $min_ver) {
+ push @old, $dir;
+ next;
+ }
+ my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i);
+ $i++;
+ push @ibxs, $ibx;
+ }
+ }
+ if (@old) {
+ die "inboxes $min_ver inboxes not supported by $0\n\t",
+ join("\n\t", @old), "\n";
}
@ibxs;
}
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common stuff between -edit, -purge (and maybe -learn in the future)
+package PublicInbox::AdminEdit;
+use strict;
+use warnings;
+use PublicInbox::Admin;
+our @OPT = qw(all force|f verbose|v!);
+
+sub check_editable ($) {
+ my ($ibxs) = @_;
+
+ foreach my $ibx (@$ibxs) {
+ my $lvl = $ibx->{indexlevel};
+ if (defined $lvl) {
+ PublicInbox::Admin::indexlevel_ok_or_die($lvl);
+ next;
+ }
+
+ # Undefined indexlevel, so `full'...
+ # Search::Xapian exists and the DB can be read, at least, fine
+ $ibx->search and next;
+
+ # it's possible for a Xapian directory to exist,
+ # but Search::Xapian to go missing/broken.
+ # Make sure it's purged in that case:
+ $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n";
+
+ # $ibx->{search} is populated by $ibx->over call
+ my $xdir_ro = $ibx->{search}->xdir(1);
+ my $npart = 0;
+ foreach my $part (<$xdir_ro/*>) {
+ if (-d $part && $part =~ m!/[0-9]+\z!) {
+ my $bytes = 0;
+ $bytes += -s $_ foreach glob("$part/*");
+ $npart++ if $bytes;
+ }
+ }
+ if ($npart) {
+ PublicInbox::Admin::require_or_die('-search');
+ } else {
+ # somebody could "rm -r" all the Xapian directories;
+ # let them purge the overview, at least
+ $ibx->{indexlevel} ||= 'basic';
+ }
+ }
+}
+
+# takes the output of V2Writable::purge and V2Writable::replace
+# $rewrites = [ array commits keyed by epoch ]
+sub show_rewrites ($$$) {
+ my ($fh, $ibx, $rewrites) = @_;
+ print $fh "$ibx->{mainrepo}:";
+ if (scalar @$rewrites) {
+ my $epoch = -1;
+ my @out = map {;
+ ++$epoch;
+ "$epoch.git: ".(defined($_) ? $_ : '(unchanged)')
+ } @$rewrites;
+ print $fh join("\n\t", '', @out), "\n";
+ } else {
+ print $fh " NONE\n";
+ }
+}
+
+1;
fail($self, "Unexpected result from git cat-file: $head");
my $size = $1;
- my $ref_type = $ref ? ref($ref) : '';
-
my $rv;
my $left = $size;
- $$ref = $size if ($ref_type eq 'SCALAR');
- my $cb_err;
-
- if ($ref_type eq 'CODE') {
- $rv = eval { $ref->($in, \$left) };
- $cb_err = $@;
- # drain the rest
- my $max = 8192;
- while ($left > 0) {
- my $r = read($in, my $x, $left > $max ? $max : $left);
- defined($r) or fail($self, "read failed: $!");
- $r == 0 and fail($self, 'exited unexpectedly');
- $left -= $r;
- }
- } else {
- my $offset = 0;
- my $buf = '';
- while ($left > 0) {
- my $r = read($in, $buf, $left, $offset);
- defined($r) or fail($self, "read failed: $!");
- $r == 0 and fail($self, 'exited unexpectedly');
- $left -= $r;
- $offset += $r;
- }
- $rv = \$buf;
+ $$ref = $size if $ref;
+
+ my $offset = 0;
+ my $buf = '';
+ while ($left > 0) {
+ my $r = read($in, $buf, $left, $offset);
+ defined($r) or fail($self, "read failed: $!");
+ $r == 0 and fail($self, 'exited unexpectedly');
+ $left -= $r;
+ $offset += $r;
}
+ $rv = \$buf;
- my $r = read($in, my $buf, 1);
+ my $r = read($in, my $lf, 1);
defined($r) or fail($self, "read failed: $!");
- fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n");
- die $cb_err if $cb_err;
+ fail($self, 'newline missing after blob') if ($r != 1 || $lf ne "\n");
$rv;
}
"$ts $zone";
}
-sub extract_author_info ($) {
+sub extract_cmt_info ($) {
my ($mime) = @_;
my $sender = '';
$name = '';
warn "no name in From: $from or Sender: $sender\n";
}
- ($name, $email);
+
+ my $hdr = $mime->header_obj;
+
+ my $subject = $hdr->header('Subject');
+ $subject = '(no subject)' unless defined $subject;
+ # Mime decoding can create nulls replace them with spaces to protect git
+ $subject =~ tr/\0/ /;
+ utf8::encode($subject);
+ my $at = git_timestamp(my @at = msg_datestamp($hdr));
+ my $ct = git_timestamp(my @ct = msg_timestamp($hdr));
+ ($name, $email, $at, $ct, $subject);
}
# kill potentially confusing/misleading headers
sub add {
my ($self, $mime, $check_cb) = @_; # mime = Email::MIME
- my ($name, $email) = extract_author_info($mime);
- my $hdr = $mime->header_obj;
- my @at = msg_datestamp($hdr);
- my @ct = msg_timestamp($hdr);
- my $author_time_raw = git_timestamp(@at);
- my $commit_time_raw = git_timestamp(@ct);
-
- my $subject = $mime->header('Subject');
- $subject = '(no subject)' unless defined $subject;
- # Mime decoding can create nulls replace them with spaces to protect git
- $subject =~ tr/\0/ /;
- utf8::encode($subject);
-
+ my ($name, $email, $at, $ct, $subject) = extract_cmt_info($mime);
my $path_type = $self->{path_type};
my $path;
if ($path_type eq '2/38') {
}
print $w "commit $ref\nmark :$commit\n",
- "author $name <$email> $author_time_raw\n",
- "committer $self->{ident} $commit_time_raw\n" or wfail;
+ "author $name <$email> $at\n",
+ "committer $self->{ident} $ct\n" or wfail;
print $w "data ", (length($subject) + 1), "\n",
$subject, "\n\n" or wfail;
if ($tip ne '') {
"$dt.$b64" . '@z';
}
-sub clean_purge_buffer {
- my ($oids, $buf) = @_;
- my $cmt_msg = 'purged '.join(' ',@$oids)."\n";
+sub rewrite_commit ($$$$) {
+ my ($self, $oids, $buf, $mime) = @_;
+ my ($name, $email, $at, $ct, $subject);
+ if ($mime) {
+ ($name, $email, $at, $ct, $subject) = extract_cmt_info($mime);
+ } else {
+ $name = $email = '';
+ $subject = 'purged '.join(' ', @$oids);
+ }
@$oids = ();
-
+ $subject .= "\n";
foreach my $i (0..$#$buf) {
my $l = $buf->[$i];
if ($l =~ /^author .* ([0-9]+ [\+-]?[0-9]+)$/) {
- $buf->[$i] = "author <> $1\n";
+ $at //= $1;
+ $buf->[$i] = "author $name <$email> $at\n";
+ } elsif ($l =~ /^committer .* ([0-9]+ [\+-]?[0-9]+)$/) {
+ $ct //= $1;
+ $buf->[$i] = "committer $self->{ident} $ct\n";
} elsif ($l =~ /^data ([0-9]+)/) {
- $buf->[$i++] = "data " . length($cmt_msg) . "\n";
- $buf->[$i] = $cmt_msg;
+ $buf->[$i++] = "data " . length($subject) . "\n";
+ $buf->[$i] = $subject;
last;
}
}
}
-sub purge_oids {
- my ($self, $purge) = @_;
- my $tmp = "refs/heads/purge-".((keys %$purge)[0]);
+# returns the new commit OID if a replacement was done
+# returns undef if nothing was done
+sub replace_oids {
+ my ($self, $mime, $replace_map) = @_; # oid => raw string
+ my $tmp = "refs/heads/replace-".((keys %$replace_map)[0]);
my $old = $self->{'ref'};
my $git = $self->{git};
my @export = (qw(fast-export --no-data --use-done-feature), $old);
my $rd = $git->popen(@export);
my ($r, $w) = $self->gfi_start;
my @buf;
- my $npurge = 0;
+ my $nreplace = 0;
my @oids;
my ($done, $mark);
my $tree = $self->{-tree};
} elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
my ($oid, $path) = ($1, $2);
$tree->{$path} = 1;
- if ($purge->{$oid}) {
+ my $sref = $replace_map->{$oid};
+ if (defined $sref) {
push @oids, $oid;
- my $cmd = "M 100644 inline $path\ndata 0\n\n";
- push @buf, $cmd;
+ my $n = length($$sref);
+ push @buf, "M 100644 inline $path\ndata $n\n";
+ push @buf, $$sref; # hope CoW works...
+ push @buf, "\n";
} else {
push @buf, $_;
}
push @buf, $_ if $tree->{$path};
} elsif ($_ eq "\n") {
if (@oids) {
- my $out = join('', @buf);
- $out =~ s/^/# /sgm;
- warn "purge rewriting\n", $out, "\n";
- clean_purge_buffer(\@oids, \@buf);
- $npurge++;
+ if (!$mime) {
+ my $out = join('', @buf);
+ $out =~ s/^/# /sgm;
+ warn "purge rewriting\n", $out, "\n";
+ }
+ rewrite_commit($self, \@oids, \@buf, $mime);
+ $nreplace++;
}
$w->print(@buf, "\n") or wfail;
@buf = ();
$w->print(@buf) or wfail;
}
die 'done\n not seen from fast-export' unless $done;
- chomp(my $cmt = $self->get_mark(":$mark")) if $npurge;
+ chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace;
$self->{nchg} = 0; # prevent _update_git_info until update-ref:
$self->done;
my @git = ('git', "--git-dir=$git->{git_dir}");
- run_die([@git, qw(update-ref), $old, $tmp]) if $npurge;
+ run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace;
run_die([@git, qw(update-ref -d), $tmp]);
- return if $npurge == 0;
+ return if $nreplace == 0;
run_die([@git, qw(-c gc.reflogExpire=now gc --prune=all)]);
+
+ # check that old OIDs are gone
my $err = 0;
- foreach my $oid (keys %$purge) {
+ foreach my $oid (keys %$replace_map) {
my @info = $git->check($oid);
if (@info) {
- warn "$oid not purged\n";
+ warn "$oid not replaced\n";
$err++;
}
}
_update_git_info($self, 0);
- die "Failed to purge $err object(s)\n" if $err;
+ die "Failed to replace $err object(s)\n" if $err;
$cmt;
}
sub set_nntp_headers ($$$$$) {
my ($self, $hdr, $ng, $n, $mid) = @_;
+ # why? leafnode requires a Path: header for some inexplicable
+ # reason. We'll fake the shortest one possible.
+ $hdr->header_set('Path', 'y');
+
+ # leafnode (and maybe other NNTP clients) have trouble dealing
+ # with v2 messages which have multiple Message-IDs (either due
+ # to our own content-based dedupe or buggy git-send-email versions).
+ my @mids = $hdr->header('Message-ID');
+ if (scalar(@mids) > 1) {
+ my $mid0 = "<$mid>";
+ $hdr->header_set('Message-ID', $mid0);
+ my @alt = $hdr->header('X-Alt-Message-ID');
+ my %seen = map { $_ => 1 } (@alt, $mid0);
+ foreach my $m (@mids) {
+ next if $seen{$m}++;
+ push @alt, $m;
+ }
+ $hdr->header_set('X-Alt-Message-ID', @alt);
+ }
+
# clobber some
my $xref = xref($self, $ng, $n, $mid);
$hdr->header_set('Xref', $xref);
my $hdr = $_[0]->header_obj->as_string;
utf8::encode($hdr);
$hdr =~ s/(?<!\r)\n/\r\n/sg;
+
+ # for leafnode compatibility, we need to ensure Message-ID headers
+ # are only a single line. We can't subclass Email::Simple::Header
+ # and override _default_fold_at in here, either; since that won't
+ # affect messages already in the archive.
+ $hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm;
+
$hdr
}
}
}
return unless defined $flag;
- $self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
+ my $xdb = eval { Search::Xapian::WritableDatabase->new($dir, $flag) };
+ if ($@) {
+ die "Failed opening $dir: ", $@;
+ }
+ $self->{xdb} = $xdb;
}
sub add_val ($$$) {
bless { mid => $mid }, $class;
}
-sub get {
- my ($class, $head, $db, $mid) = @_;
- my $doc_id = $head->get_docid;
- load_expand(wrap($class, $mid), $db->get_document($doc_id));
-}
-
sub get_val ($$) {
my ($doc, $col) = @_;
Search::Xapian::sortable_unserialise($doc->get_value($col));
use PublicInbox::MIME;
use PublicInbox::Git;
use PublicInbox::Import;
-use PublicInbox::MID qw(mids);
+use PublicInbox::MID qw(mids references);
use PublicInbox::ContentId qw(content_id content_digest);
use PublicInbox::Inbox;
use PublicInbox::OverIdx;
});
}
+# indexes a message, returns true if checkpointing is needed
+sub do_idx ($$$$$$$) {
+ my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_;
+ $self->{over}->add_overview($mime, $len, $num, $oid, $mid0);
+ my $npart = $self->{partitions};
+ my $part = $num % $npart;
+ my $idx = idx_part($self, $part);
+ $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime);
+ my $n = $self->{transact_bytes} += $len;
+ $n >= (PublicInbox::SearchIdx::BATCH_BYTES * $npart);
+}
+
sub _add {
my ($self, $mime, $check_cb) = @_;
$self->{last_commit}->[$self->{epoch_max}] = $cmt;
my ($oid, $len, $msgref) = @{$im->{last_object}};
- $self->{over}->add_overview($mime, $len, $num, $oid, $mid0);
- my $nparts = $self->{partitions};
- my $part = $num % $nparts;
- my $idx = $self->idx_part($part);
- $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime);
- my $n = $self->{transact_bytes} += $len;
- if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) {
+ if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) {
$self->checkpoint;
}
});
}
-sub purge_oids ($$) {
- my ($self, $purge) = @_; # $purge = { $object_id => 1, ... }
+# returns an array mapping [ epoch => latest_commit ]
+# latest_commit may be undef if nothing was done to that epoch
+# $replace_map = { $object_id => $strref, ... }
+sub _replace_oids ($$$) {
+ my ($self, $mime, $replace_map) = @_;
$self->done;
my $pfx = "$self->{-inbox}->{mainrepo}/git";
- my $purges = [];
+ my $rewrites = []; # epoch => commit
my $max = $self->{epoch_max};
unless (defined($max)) {
defined(my $latest = git_dir_latest($self, \$max)) or return;
$self->{epoch_max} = $max;
}
+
foreach my $i (0..$max) {
my $git_dir = "$pfx/$i.git";
-d $git_dir or next;
my $git = PublicInbox::Git->new($git_dir);
my $im = $self->import_init($git, 0, 1);
- $purges->[$i] = $im->purge_oids($purge);
+ $rewrites->[$i] = $im->replace_oids($mime, $replace_map);
$im->done;
}
- $purges;
+ $rewrites;
}
sub content_ids ($) {
0
}
-sub remove_internal ($$$$) {
- my ($self, $mime, $cmt_msg, $purge) = @_;
+# used for removing or replacing (purging)
+sub rewrite_internal ($$;$$$) {
+ my ($self, $old_mime, $cmt_msg, $new_mime, $sref) = @_;
$self->idx_init;
- my $im = $self->importer unless $purge;
+ my ($im, $need_reindex, $replace_map);
+ if ($sref) {
+ $replace_map = {}; # oid => sref
+ $need_reindex = [] if $new_mime;
+ } else {
+ $im = $self->importer;
+ }
my $over = $self->{over};
- my $cids = content_ids($mime);
+ my $cids = content_ids($old_mime);
my $parts = $self->{idx_parts};
- my $mm = $self->{mm};
my $removed;
- my $mids = mids($mime->header_obj);
+ my $mids = mids($old_mime->header_obj);
# We avoid introducing new blobs into git since the raw content
# can be slightly different, so we do not need the user-supplied
# message now that we have the mids and content_id
- $mime = undef;
+ $old_mime = undef;
my $mark;
foreach my $mid (@$mids) {
- my %gone;
+ my %gone; # num => [ smsg, raw ]
my ($id, $prev);
while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
my $msg = get_blob($self, $smsg);
}
foreach my $num (keys %gone) {
my ($smsg, $orig) = @{$gone{$num}};
- $mm->num_delete($num);
# $removed should only be set once assuming
# no bugs in our deduplication code:
$removed = $smsg;
my $oid = $smsg->{blob};
- if ($purge) {
- $purge->{$oid} = 1;
+ if ($replace_map) {
+ $replace_map->{$oid} = $sref;
} else {
($mark, undef) = $im->remove($orig, $cmt_msg);
}
$orig = undef;
+ if ($need_reindex) { # ->replace
+ push @$need_reindex, $smsg;
+ } else { # ->purge or ->remove
+ $self->{mm}->num_delete($num);
+ }
unindex_oid_remote($self, $oid, $mid);
}
}
my $cmt = $im->get_mark($mark);
$self->{last_commit}->[$self->{epoch_max}] = $cmt;
}
- if ($purge && scalar keys %$purge) {
- return purge_oids($self, $purge);
+ if ($replace_map && scalar keys %$replace_map) {
+ my $rewrites = _replace_oids($self, $new_mime, $replace_map);
+ return { rewrites => $rewrites, need_reindex => $need_reindex };
}
$removed;
}
sub remove {
my ($self, $mime, $cmt_msg) = @_;
$self->{-inbox}->with_umask(sub {
- remove_internal($self, $mime, $cmt_msg, undef);
+ rewrite_internal($self, $mime, $cmt_msg);
});
}
+sub _replace ($$;$$) {
+ my ($self, $old_mime, $new_mime, $sref) = @_;
+ my $rewritten = $self->{-inbox}->with_umask(sub {
+ rewrite_internal($self, $old_mime, undef, $new_mime, $sref);
+ }) or return;
+
+ my $rewrites = $rewritten->{rewrites};
+ # ->done is called if there are rewrites since we gc+prune from git
+ $self->idx_init if @$rewrites;
+
+ for my $i (0..$#$rewrites) {
+ defined(my $cmt = $rewrites->[$i]) or next;
+ $self->{last_commit}->[$i] = $cmt;
+ }
+ $rewritten;
+}
+
# public
sub purge {
my ($self, $mime) = @_;
- my $purges = $self->{-inbox}->with_umask(sub {
- remove_internal($self, $mime, undef, {});
- }) or return;
- $self->idx_init if @$purges; # ->done is called on purges
- for my $i (0..$#$purges) {
- defined(my $cmt = $purges->[$i]) or next;
- $self->{last_commit}->[$i] = $cmt;
+ my $rewritten = _replace($self, $mime, undef, \'') or return;
+ $rewritten->{rewrites}
+}
+
+# returns the git object_id of $fh, does not write the object to FS
+sub git_hash_raw ($$) {
+ my ($self, $raw) = @_;
+ # grab the expected OID we have to reindex:
+ open my $tmp_fh, '+>', undef or die "failed to open tmp: $!";
+ $tmp_fh->autoflush(1);
+ print $tmp_fh $$raw or die "print \$tmp_fh: $!";
+ sysseek($tmp_fh, 0, 0) or die "seek failed: $!";
+
+ my ($r, $w);
+ pipe($r, $w) or die "failed to create pipe: $!";
+ my $rdr = { 0 => fileno($tmp_fh), 1 => fileno($w) };
+ my $git_dir = $self->{-inbox}->git->{git_dir};
+ my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)];
+ my $pid = spawn($cmd, undef, $rdr);
+ close $w;
+ local $/ = "\n";
+ chomp(my $oid = <$r>);
+ waitpid($pid, 0) == $pid or die "git hash-object did not finish";
+ die "git hash-object failed: $?" if $?;
+ $oid =~ /\A[a-f0-9]{40}\z/ or die "OID not expected: $oid";
+ $oid;
+}
+
+sub _check_mids_match ($$$) {
+ my ($old_list, $new_list, $hdrs) = @_;
+ my %old_mids = map { $_ => 1 } @$old_list;
+ my %new_mids = map { $_ => 1 } @$new_list;
+ my @old = keys %old_mids;
+ my @new = keys %new_mids;
+ my $err = "$hdrs may not be changed when replacing\n";
+ die $err if scalar(@old) != scalar(@new);
+ delete @new_mids{@old};
+ delete @old_mids{@new};
+ die $err if (scalar(keys %old_mids) || scalar(keys %new_mids));
+}
+
+# Changing Message-IDs or References with ->replace isn't supported.
+# The rules for dealing with messages with multiple or conflicting
+# Message-IDs are pretty complex and rethreading hasn't been fully
+# implemented, yet.
+sub check_mids_match ($$) {
+ my ($old_mime, $new_mime) = @_;
+ my $old = $old_mime->header_obj;
+ my $new = $new_mime->header_obj;
+ _check_mids_match(mids($old), mids($new), 'Message-ID(s)');
+ _check_mids_match(references($old), references($new),
+ 'References/In-Reply-To');
+}
+
+# public
+sub replace ($$$) {
+ my ($self, $old_mime, $new_mime) = @_;
+
+ check_mids_match($old_mime, $new_mime);
+
+ # mutt will always add Content-Length:, Status:, Lines: when editing
+ PublicInbox::Import::drop_unwanted_headers($new_mime);
+
+ my $raw = $new_mime->as_string;
+ my $expect_oid = git_hash_raw($self, \$raw);
+ my $rewritten = _replace($self, $old_mime, $new_mime, \$raw) or return;
+ my $need_reindex = $rewritten->{need_reindex};
+
+ # just in case we have bugs in deduplication code:
+ my $n = scalar(@$need_reindex);
+ if ($n > 1) {
+ my $list = join(', ', map {
+ "$_->{num}: <$_->{mid}>"
+ } @$need_reindex);
+ warn <<"";
+W: rewritten $n messages matching content of original message (expected: 1).
+W: possible bug in public-inbox, NNTP article IDs and Message-IDs follow:
+W: $list
+
+ }
+
+ # make sure we really got the OID:
+ my ($oid, $type, $len) = $self->{-inbox}->git->check($expect_oid);
+ $oid eq $expect_oid or die "BUG: $expect_oid not found after replace";
+
+ # don't leak FDs to Xapian:
+ $self->{-inbox}->git->cleanup;
+
+ # reindex modified messages:
+ for my $smsg (@$need_reindex) {
+ my $num = $smsg->{num};
+ my $mid0 = $smsg->{mid};
+ do_idx($self, \$raw, $new_mime, $len, $num, $oid, $mid0);
}
- $purges;
+ $rewritten->{rewrites};
}
sub last_commit_part ($$;$) {
}
$sync->{mm_tmp}->mid_delete($mid0) or
die "failed to delete <$mid0> for article #$num\n";
-
- $self->{over}->add_overview($mime, $len, $num, $oid, $mid0);
- my $nparts = $self->{partitions};
- my $part = $num % $nparts;
- my $idx = $self->idx_part($part);
- $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime);
- my $n = $self->{transact_bytes} += $len;
$sync->{nr}++;
- if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) {
+ if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) {
$git->cleanup;
$sync->{mm_tmp}->atfork_prepare;
$self->done; # release lock
--- /dev/null
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Used for editing messages in a public-inbox.
+# Supports v2 inboxes only, for now.
+use strict;
+use warnings;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use PublicInbox::AdminEdit;
+use File::Temp qw(tempfile);
+use PublicInbox::ContentId qw(content_id);
+use PublicInbox::MID qw(mid_clean mids);
+PublicInbox::Admin::check_require('-index');
+require PublicInbox::MIME;
+require PublicInbox::InboxWritable;
+require PublicInbox::Import;
+
+my $usage = "$0 -m MESSAGE_ID [--all] [INBOX_DIRS]";
+my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 };
+my @opt = qw(mid|m=s file|F=s raw);
+GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or
+ die "bad command-line args\n$usage\n";
+
+my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f"
+unless (defined $editor) {
+ my $k = 'publicinbox.mailEditor';
+ if (my $cfg = PublicInbox::Admin::config()) {
+ $editor = $cfg->{lc($k)};
+ }
+ unless (defined $editor) {
+ warn "\`$k' not configured, trying \`git var GIT_EDITOR'\n";
+ chomp($editor = `git var GIT_EDITOR`);
+ warn "Will use $editor to edit mail\n";
+ }
+}
+
+my $mid = $opt->{mid};
+my $file = $opt->{file};
+if (defined $mid && defined $file) {
+ die "the --mid and --file options are mutually exclusive\n";
+}
+
+my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
+PublicInbox::AdminEdit::check_editable(\@ibxs);
+
+my $found = {}; # cid => [ [ibx, smsg] [, [ibx, smsg] ] ]
+
+sub find_mid ($) {
+ my ($mid) = @_;
+ foreach my $ibx (@ibxs) {
+ my $over = $ibx->over;
+ my ($id, $prev);
+ while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
+ my $ref = $ibx->msg_by_smsg($smsg);
+ my $mime = PublicInbox::MIME->new($ref);
+ my $cid = content_id($mime);
+ my $tuple = [ $ibx, $smsg ];
+ push @{$found->{$cid} ||= []}, $tuple
+ }
+ delete @$ibx{qw(over mm git search)}; # cleanup
+ }
+ $found;
+}
+
+sub show_cmd ($$) {
+ my ($ibx, $smsg) = @_;
+ " GIT_DIR=$ibx->{mainrepo}/all.git \\\n git show $smsg->{blob}\n";
+}
+
+sub show_found () {
+ foreach my $to_edit (values %$found) {
+ foreach my $tuple (@$to_edit) {
+ my ($ibx, $smsg) = @$tuple;
+ warn show_cmd($ibx, $smsg);
+ }
+ }
+}
+
+if (defined($mid)) {
+ $mid = mid_clean($mid);
+ $found = find_mid($mid);
+ my $nr = scalar(keys %$found);
+ die "No message found for <$mid>\n" unless $nr;
+ if ($nr > 1) {
+ warn <<"";
+Multiple messages with different content found matching
+<$mid>:
+
+ show_found();
+ die "Use --force to edit all of them\n" if !$opt->{force};
+ warn "Will edit all of them\n";
+ }
+} else {
+ open my $fh, '<', $file or die "open($file) failed: $!";
+ my $orig = do { local $/; <$fh> };
+ my $mime = PublicInbox::MIME->new(\$orig);
+ my $mids = mids($mime->header_obj);
+ find_mid($_) for (@$mids); # populates $found
+ my $cid = content_id($mime);
+ my $to_edit = $found->{$cid};
+ unless ($to_edit) {
+ my $nr = scalar(keys %$found);
+ if ($nr > 0) {
+ warn <<"";
+$nr matches to Message-ID(s) in $file, but none matched content
+Partial matches below:
+
+ show_found();
+ } elsif ($nr == 0) {
+ $mids = join('', map { " <$_>\n" } @$mids);
+ warn <<"";
+No matching messages found matching Message-ID(s) in $file
+$mids
+
+ }
+ exit 1;
+ }
+ $found = { $cid => $to_edit };
+}
+
+my $tmpl = 'public-inbox-edit-XXXXXX';
+foreach my $to_edit (values %$found) {
+ my ($edit_fh, $edit_fn) = tempfile($tmpl, TMPDIR => 1, UNLINK => 1);
+ $edit_fh->autoflush(1);
+ my ($ibx, $smsg) = @{$to_edit->[0]};
+ my $old_raw = $ibx->msg_by_smsg($smsg);
+ delete @$ibx{qw(over mm git search)}; # cleanup
+
+ my $tmp = $$old_raw;
+ if (!$opt->{raw}) {
+ my $oid = $smsg->{blob};
+ print $edit_fh "From mboxrd\@$oid Thu Jan 1 00:00:00 1970\n";
+ $tmp =~ s/^(>*From )/>$1/gm;
+ }
+ print $edit_fh $tmp or
+ die "failed to write tempfile for editing: $!";
+
+ # run the editor, respecting spaces/quote
+retry_edit:
+ if (system(qw(sh -c), qq(eval "$editor" '"\$@"'), '--', $edit_fn)) {
+ if (!(-t STDIN) && !$opt->{force}) {
+ die "E: $editor failed: $?\n";
+ }
+ print STDERR "$editor failed, ";
+ print STDERR "continuing as forced\n" if $opt->{force};
+ while (!$opt->{force}) {
+ print STDERR "(r)etry, (c)ontinue, (q)uit?\n";
+ chomp(my $op = <STDIN> || '');
+ $op = lc($op);
+ goto retry_edit if $op eq 'r';
+ exit $? if $op eq 'q';
+ last if $op eq 'c'; # continuing
+ print STDERR "\`$op' not recognized\n";
+ }
+ }
+
+ # reread the edited file, not using $edit_fh since $EDITOR may
+ # rename/relink $edit_fn
+ open my $new_fh, '<', $edit_fn or
+ die "can't read edited file ($edit_fn): $!\n";
+ my $new_raw = do { local $/; <$new_fh> };
+
+ if (!$opt->{raw}) {
+ # get rid of the From we added
+ $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+
+ # check if user forgot to purge (in mutt) after editing
+ if ($new_raw =~ /^From /sm) {
+ if (-t STDIN) {
+ print STDERR <<'';
+Extra "From " lines detected in new mbox.
+Did you forget to purge the original message from the mbox after editing?
+
+ while (1) {
+ print STDERR <<"";
+(y)es to re-edit, (n)o to continue
+
+ chomp(my $op = <STDIN> || '');
+ $op = lc($op);
+ goto retry_edit if $op eq 'y';
+ last if $op eq 'n'; # continuing
+ print STDERR "\`$op' not recognized\n";
+ }
+ } else { # non-interactive path
+ # unlikely to happen, as extra From lines are
+ # only a common mistake (for me) with
+ # interactive use
+ warn <<"";
+W: possible message boundary splitting error
+
+ }
+ }
+ # unescape what we escaped:
+ $new_raw =~ s/^>(>*From )/$1/gm;
+ }
+
+ my $new_mime = PublicInbox::MIME->new(\$new_raw);
+ my $old_mime = PublicInbox::MIME->new($old_raw);
+
+ # make sure we don't compare unwanted headers, since mutt adds
+ # Content-Length, Status, and Lines headers:
+ PublicInbox::Import::drop_unwanted_headers($new_mime);
+ PublicInbox::Import::drop_unwanted_headers($old_mime);
+
+ # allow changing Received: and maybe other headers which can
+ # contain sensitive info.
+ my $nhdr = $new_mime->header_obj;
+ my $ohdr = $old_mime->header_obj;
+ if (($nhdr->as_string eq $ohdr->as_string) &&
+ (content_id($new_mime) eq content_id($old_mime))) {
+ warn "No change detected to:\n", show_cmd($ibx, $smsg);
+
+ next unless $opt->{verbose};
+ # should we consider this machine-parseable?
+ PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, []);
+ next;
+ }
+
+ foreach my $tuple (@$to_edit) {
+ $ibx = PublicInbox::InboxWritable->new($tuple->[0]);
+ $smsg = $tuple->[1];
+ my $im = $ibx->importer(0);
+ my $commits = $im->replace($old_mime, $new_mime);
+ $im->done;
+ unless ($commits) {
+ warn "Failed to replace:\n", show_cmd($ibx, $smsg);
+ next;
+ }
+ next unless $opt->{verbose};
+ # should we consider this machine-parseable?
+ PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits);
+ }
+}
use strict;
use warnings;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-use PublicInbox::Admin qw(resolve_repo_dir);
+use PublicInbox::AdminEdit;
PublicInbox::Admin::check_require('-index');
require PublicInbox::Filter::Base;
-require PublicInbox::Config;
require PublicInbox::MIME;
require PublicInbox::V2Writable;
{ no warnings 'once'; *REJECT = *PublicInbox::Filter::Base::REJECT }
my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message";
-my $config = eval { PublicInbox::Config->new };
-my $cfgfile = PublicInbox::Config::default_file();
-my ($all, $force);
-my $verbose = 1;
-my %opts = (
- 'all' => \$all,
- 'force|f' => \$force,
- 'verbose|v!' => \$verbose,
-);
-GetOptions(%opts) or die "bad command-line args\n", $usage, "\n";
+my $opt = { verbose => 1, all => 0, -min_inbox_version => 2 };
+GetOptions($opt, @PublicInbox::AdminEdit::OPT) or
+ die "bad command-line args\n$usage\n";
-# TODO: clean this up and share code with -index via ::Admin
-my %dir2ibx; # ( path => Inbox object )
-my @inboxes;
-$config and $config->each_inbox(sub {
- my ($ibx) = @_;
- push @inboxes, $ibx if $all && $ibx->{version} != 1;
- $dir2ibx{$ibx->{mainrepo}} = $ibx;
-});
-
-if ($all) {
- $config or die "--all specified, but $cfgfile not readable\n";
- @ARGV and die "--all specified, but directories specified\n";
-} else {
- my @err;
- my @dirs = scalar(@ARGV) ? @ARGV : ('.');
- my $u = 0;
-
- foreach my $dir (@dirs) {
- my $v;
- my $dir = resolve_repo_dir($dir, \$v);
- if ($v == 1) {
- push @err, $dir;
- next;
- }
- my $ibx = $dir2ibx{$dir} ||= do {
- warn "$dir not configured in $cfgfile\n";
- $u++;
- my $name = "unconfigured-$u";
- PublicInbox::Inbox->new({
- version => 2,
- name => $name,
- -primary_address => "$name\@example.com",
- mainrepo => $dir,
- });
- };
- push @inboxes, $ibx;
- }
-
- if (@err) {
- die "v1 inboxes currently not supported by -purge\n\t",
- join("\n\t", @err), "\n";
- }
-}
-
-foreach my $ibx (@inboxes) {
- my $lvl = $ibx->{indexlevel};
- if (defined $lvl) {
- PublicInbox::Admin::indexlevel_ok_or_die($lvl);
- next;
- }
-
- # Undefined indexlevel, so `full'...
- # Search::Xapian exists and the DB can be read, at least, fine
- $ibx->search and next;
-
- # it's possible for a Xapian directory to exist, but Search::Xapian
- # to go missing/broken. Make sure it's purged in that case:
- $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n";
-
- # $ibx->{search} is populated by $ibx->over call
- my $xdir_ro = $ibx->{search}->xdir(1);
- my $npart = 0;
- foreach my $part (<$xdir_ro/*>) {
- if (-d $part && $part =~ m!/[0-9]+\z!) {
- my $bytes = 0;
- $bytes += -s $_ foreach glob("$part/*");
- $npart++ if $bytes;
- }
- }
- if ($npart) {
- PublicInbox::Admin::require_or_die('-search');
- } else {
- # somebody could "rm -r" all the Xapian directories;
- # let them purge the overview, at least
- $ibx->{indexlevel} ||= 'basic';
- }
-}
+my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
+PublicInbox::AdminEdit::check_editable(\@ibxs);
my $data = do { local $/; scalar <STDIN> };
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
my $n_purged = 0;
-foreach my $ibx (@inboxes) {
+foreach my $ibx (@ibxs) {
my $mime = PublicInbox::MIME->new($data);
my $v2w = PublicInbox::V2Writable->new($ibx, 0);
$v2w->done;
- if ($verbose) { # should we consider this machine-parseable?
- print "$ibx->{mainrepo}:";
- if (scalar @$commits) {
- print join("\n\t", '', @$commits), "\n";
- } else {
- print " NONE\n";
- }
+ if ($opt->{verbose}) { # should we consider this machine-parseable?
+ PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits);
}
$n_purged += scalar @$commits;
}
# behave like "rm -f"
-exit(0) if ($force || $n_purged);
+exit(0) if ($opt->{force} || $n_purged);
-warn "Not found\n" if $verbose;
+warn "Not found\n" if $opt->{verbose};
exit(1);
use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD);
use POSIX qw(dup2);
+use strict;
+use warnings;
sub stream_to_string {
my ($res) = @_;
my $cur_int = ($cur_maj << 24) | ($cur_min << 16);
if ($cur_int < $req_int) {
return 0 if $maybe;
- plan skip_all => "git $req+ required, have $git_ver";
+ plan skip_all => "git $req+ required, have $cur_maj.$cur_min";
}
1;
}
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# edit frontend behavior test (t/replace.t for backend)
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+require './t/common.perl';
+require_git(2.6);
+require PublicInbox::Inbox;
+require PublicInbox::InboxWritable;
+require PublicInbox::Config;
+use PublicInbox::MID qw(mid_clean);
+
+my @mods = qw(IPC::Run DBI DBD::SQLite);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "missing $mod for $0" if $@;
+};
+IPC::Run->import(qw(run));
+
+my $cmd_pfx = 'blib/script/public-inbox';
+my $tmpdir = tempdir('pi-edit-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $mainrepo = "$tmpdir/v2";
+my $ibx = PublicInbox::Inbox->new({
+ mainrepo => $mainrepo,
+ name => 'test-v2edit',
+ version => 2,
+ -primary_address => 'test@example.com',
+ indexlevel => 'basic',
+});
+$ibx = PublicInbox::InboxWritable->new($ibx, {nproc=>1});
+my $cfgfile = "$tmpdir/config";
+local $ENV{PI_CONFIG} = $cfgfile;
+my $file = 't/data/0001.patch';
+open my $fh, '<', $file or die "open: $!";
+my $raw = do { local $/; <$fh> };
+my $im = $ibx->importer(0);
+my $mime = PublicInbox::MIME->new($raw);
+my $mid = mid_clean($mime->header('Message-Id'));
+ok($im->add($mime), 'add message to be edited');
+$im->done;
+my ($in, $out, $err, $cmd, $cur, $t);
+my $__git_dir = "--git-dir=$ibx->{mainrepo}/git/0.git";
+
+$t = '-F FILE'; {
+ $in = $out = $err = '';
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean prefix/bool pfx/'";
+ $cmd = [ "$cmd_pfx-edit", "-F$file", $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t edit OK");
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ like($cur->header('Subject'), qr/bool pfx/, "$t message edited");
+ like($out, qr/[a-f0-9]{40}/, "$t shows commit on success");
+}
+
+$t = '-m MESSAGE_ID'; {
+ $in = $out = $err = '';
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t edit OK");
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ like($cur->header('Subject'), qr/boolean prefix/, "$t message edited");
+ like($out, qr/[a-f0-9]{40}/, "$t shows commit on success");
+}
+
+$t = 'no-op -m MESSAGE_ID'; {
+ $in = $out = $err = '';
+ my $before = `git $__git_dir rev-parse HEAD`;
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t succeeds");
+ my $prev = $cur;
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ is_deeply($cur, $prev, "$t makes no change");
+ like($cur->header('Subject'), qr/boolean prefix/,
+ "$t does not change message");
+ like($out, qr/NONE/, 'noop shows NONE');
+ my $after = `git $__git_dir rev-parse HEAD`;
+ is($after, $before, 'git head unchanged');
+}
+
+$t = 'no-op -m MESSAGE_ID w/Status: header'; { # because mutt does it
+ $in = $out = $err = '';
+ my $before = `git $__git_dir rev-parse HEAD`;
+ local $ENV{MAIL_EDITOR} =
+ "$^X -i -p -e 's/^Subject:.*/Status: RO\\n\$&/'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t succeeds");
+ my $prev = $cur;
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ is_deeply($cur, $prev, "$t makes no change");
+ like($cur->header('Subject'), qr/boolean prefix/,
+ "$t does not change message");
+ is($cur->header('Status'), undef, 'Status header not added');
+ like($out, qr/NONE/, 'noop shows NONE');
+ my $after = `git $__git_dir rev-parse HEAD`;
+ is($after, $before, 'git head unchanged');
+}
+
+$t = '-m MESSAGE_ID can change Received: headers'; {
+ $in = $out = $err = '';
+ my $before = `git $__git_dir rev-parse HEAD`;
+ local $ENV{MAIL_EDITOR} =
+ "$^X -i -p -e 's/^Subject:.*/Received: x\\n\$&/'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t succeeds");
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ like($cur->header('Subject'), qr/boolean prefix/,
+ "$t does not change Subject");
+ is($cur->header('Received'), 'x', 'added Received header');
+}
+
+$t = '-m miss'; {
+ $in = $out = $err = '';
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean/FAIL/'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid-miss", $mainrepo ];
+ ok(!run($cmd, \$in, \$out, \$err), "$t fails on invalid MID");
+ like($err, qr/No message found/, "$t shows error");
+}
+
+$t = 'non-interactive editor failure'; {
+ $in = $out = $err = '';
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 'END { exit 1 }'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ];
+ ok(!run($cmd, \$in, \$out, \$err), "$t detected");
+ like($err, qr/END \{ exit 1 \}' failed:/, "$t shows error");
+}
+
+$t = 'mailEditor set in config'; {
+ $in = $out = $err = '';
+ my $rc = system(qw(git config), "--file=$cfgfile",
+ 'publicinbox.maileditor',
+ "$^X -i -p -e 's/boolean prefix/bool pfx/'");
+ is($rc, 0, 'set publicinbox.mailEditor');
+ local $ENV{MAIL_EDITOR};
+ local $ENV{GIT_EDITOR} = 'echo should not run';
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t edited message");
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ like($cur->header('Subject'), qr/bool pfx/, "$t message edited");
+ unlike($out, qr/should not run/, 'did not run GIT_EDITOR');
+}
+
+$t = '--raw and mbox escaping'; {
+ $in = $out = $err = '';
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^\$/\\nFrom not mbox\\n/'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", '--raw', $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t succeeds");
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ like($cur->body, qr/^From not mbox/sm, 'put "From " line into body');
+
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^>From not/\$& an/'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t succeeds with mbox escaping");
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ like($cur->body, qr/^From not an mbox/sm,
+ 'changed "From " line unescaped');
+
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^From not an mbox\\n//s'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", '--raw', $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t succeeds again");
+ $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid));
+ unlike($cur->body, qr/^From not an mbox/sm, "$t restored body");
+}
+
+$t = 'reuse Message-ID'; {
+ my @warn;
+ local $SIG{__WARN__} = sub { push @warn, @_ };
+ ok($im->add($mime), "$t and re-add");
+ $im->done;
+ like($warn[0], qr/reused for mismatched content/, "$t got warning");
+}
+
+$t = 'edit ambiguous Message-ID with -m'; {
+ $in = $out = $err = '';
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ];
+ ok(!run($cmd, \$in, \$out, \$err), "$t fails w/o --force");
+ like($err, qr/Multiple messages with different content found matching/,
+ "$t shows matches");
+ like($err, qr/GIT_DIR=.*git show/is, "$t shows git commands");
+}
+
+$t .= ' and --force'; {
+ $in = $out = $err = '';
+ local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Subject:x/i'";
+ $cmd = [ "$cmd_pfx-edit", "-m$mid", '--force', $mainrepo ];
+ ok(run($cmd, \$in, \$out, \$err), "$t succeeds");
+ like($err, qr/Will edit all of them/, "$t notes all will be edited");
+ my @dump = `git $__git_dir cat-file --batch --batch-all-objects`;
+ chomp @dump;
+ is_deeply([grep(/^Subject:/i, @dump)], [qw(Subject:x Subject:x)],
+ "$t edited both messages");
+}
+
+done_testing();
my $raw = $gcf->cat_file($f);
is($x[2], length($$raw), 'length matches');
- {
- my $size;
- my $rv = $gcf->cat_file($f, sub {
- my ($in, $left) = @_;
- $size = $$left;
- 'nothing'
- });
- is($rv, 'nothing', 'returned from callback without reading');
- is($size, $x[2], 'set size for callback correctly');
- }
-
- eval { $gcf->cat_file($f, sub { die 'OMG' }) };
- like($@, qr/\bOMG\b/, 'died in callback propagated');
is(${$gcf->cat_file($f)}, $$raw, 'not broken after failures');
-
- {
- my ($buf, $r);
- my $rv = $gcf->cat_file($f, sub {
- my ($in, $left) = @_;
- $r = read($in, $buf, 2);
- $$left -= $r;
- 'blah'
- });
- is($r, 2, 'only read 2 bytes');
- is($buf, '--', 'partial read succeeded');
- is($rv, 'blah', 'return value propagated');
- }
is(${$gcf->cat_file($f)}, $$raw, 'not broken after partial read');
}
my $gcf = PublicInbox::Git->new($dir);
my $rsize;
- is($gcf->cat_file($buf, sub {
- $rsize = ${$_[1]};
- 'x';
- }), 'x', 'checked input');
- is($rsize, $size, 'got correct size on big file');
-
my $x = $gcf->cat_file($buf, \$rsize);
is($rsize, $size, 'got correct size ref on big file');
is(length($$x), $size, 'read correct number of bytes');
- my $rline;
- $gcf->cat_file($buf, sub {
- my ($in, $left) = @_;
- $rline = <$in>;
- $$left -= length($rline);
- });
- {
- open my $fh, '<', $big_data or die "open failed: $!\n";
- is($rline, <$fh>, 'first line matches');
- };
-
- my $all;
- $gcf->cat_file($buf, sub {
- my ($in, $left) = @_;
- my $x = read($in, $all, $$left);
- $$left -= $x;
- });
- {
- open my $fh, '<', $big_data or die "open failed: $!\n";
- local $/;
- is($all, <$fh>, 'entire read matches');
- };
-
my $ref = $gcf->qx(qw(cat-file blob), $buf);
- is($all, $ref, 'qx read giant single string');
-
my @ref = $gcf->qx(qw(cat-file blob), $buf);
- is($all, join('', @ref), 'qx returned array when wanted');
my $nl = scalar @ref;
ok($nl > 1, "qx returned array length of $nl");
ok($date >= $t0, 'valid date after start');
ok($date <= $t1, 'valid date before stop');
}
+ if ('leafnode interop') {
+ my $for_leafnode = PublicInbox::MIME->new(<<"");
+From: longheader\@example.com
+To: $addr
+Subject: none
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+
+ my $long_hdr = 'for-leafnode-'.('y'x200).'@example.com';
+ $for_leafnode->header_set('Message-ID', "<$long_hdr>");
+ $im->add($for_leafnode);
+ $im->done;
+ if ($version == 1) {
+ my $s = PublicInbox::SearchIdx->new($mainrepo, 1);
+ $s->index_sync;
+ }
+ my $hdr = $n->head("<$long_hdr>");
+ my $expect = qr/\AMessage-ID: /i . qr/\Q<$long_hdr>\E/;
+ ok(scalar(grep(/$expect/, @$hdr)), 'Message-ID not folded');
+ ok(scalar(grep(/^Path:/, @$hdr)), 'Path: header found');
+
+ # it's possible for v2 messages to have 2+ Message-IDs,
+ # but leafnode can't handle it
+ if ($version != 1) {
+ my @mids = ("<$long_hdr>", '<2mid@wtf>');
+ $for_leafnode->header_set('Message-ID', @mids);
+ $for_leafnode->body_set('not-a-dupe');
+ my $warn = '';
+ $SIG{__WARN__} = sub { $warn .= join('', @_) };
+ $im->add($for_leafnode);
+ $im->done;
+ like($warn, qr/reused/, 'warned for reused MID');
+ $hdr = $n->head('<2mid@wtf>');
+ my @hmids = grep(/\AMessage-ID: /i, @$hdr);
+ is(scalar(@hmids), 1, 'Single Message-ID in header');
+ like($hmids[0], qr/: <2mid\@wtf>/, 'got expected mid');
+ }
+ }
# pipelined requests:
{
# a successful case:
ok(IPC::Run::run([$purge, $mainrepo], \$raw, \$out, \$err), 'match OK');
-like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted');
+like($out, qr/\b[a-f0-9]{40,}/m, 'removed commit noted');
# add (old) vger filter to config file
print $cfg_fh <<EOF or die "print $!";
ok(chdir('/'), "chdir / OK for --all test");
ok(IPC::Run::run([$purge, '--all'], \$pre_scrub, \$out, \$err),
'scrub purge OK');
-like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted');
+like($out, qr/\b[a-f0-9]{40,}/m, 'removed commit noted');
# diag "out: $out"; diag "err: $err";
$out = $err = '';
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use PublicInbox::MIME;
+use PublicInbox::InboxWritable;
+use File::Temp qw/tempdir/;
+require './t/common.perl';
+require_git(2.6); # replace is v2 only, for now...
+foreach my $mod (qw(DBD::SQLite)) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for $0" if $@;
+}
+
+sub test_replace ($$$) {
+ my ($v, $level, $opt) = @_;
+ diag "v$v $level replace";
+ my $this = "pi-$v-$level-replace";
+ my $tmpdir = tempdir("$this-tmp-XXXXXX", TMPDIR => 1, CLEANUP => 1);
+ my $ibx = PublicInbox::Inbox->new({
+ mainrepo => "$tmpdir/testbox",
+ name => $this,
+ version => $v,
+ -primary_address => 'test@example.com',
+ indexlevel => $level,
+ });
+
+ my $orig = PublicInbox::MIME->new(<<'EOF');
+From: Barbra Streisand <effect@example.com>
+To: test@example.com
+Subject: confidential
+Message-ID: <replace@example.com>
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+
+Top secret info about my house in Malibu...
+EOF
+ my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer;
+ # fake a bunch of epochs
+ $im->{rotate_bytes} = $opt->{rotate_bytes} if $opt->{rotate_bytes};
+
+ if ($opt->{pre}) {
+ $opt->{pre}->($im, 1, 2);
+ $orig->header_set('References', '<1@example.com>');
+ }
+ ok($im->add($orig), 'add message to be replaced');
+ if ($opt->{post}) {
+ $opt->{post}->($im, 3, { 4 => 'replace@example.com' });
+ }
+ $im->done;
+ my $thread_a = $ibx->over->get_thread('replace@example.com');
+
+ my %before = map {; delete($_->{blob}) => $_ } @{$ibx->recent};
+ my $reject = PublicInbox::MIME->new($orig->as_string);
+ foreach my $mid (['<replace@example.com>', '<extra@example.com>'],
+ [], ['<replaced@example.com>']) {
+ $reject->header_set('Message-ID', @$mid);
+ my $ok = eval { $im->replace($orig, $reject) };
+ like($@, qr/Message-ID.*may not be changed/,
+ '->replace died on Message-ID change');
+ ok(!$ok, 'no replacement happened');
+ }
+
+ # prepare the replacement
+ my $expect = "Move along, nothing to see here\n";
+ my $repl = PublicInbox::MIME->new($orig->as_string);
+ $repl->header_set('From', '<redactor@example.com>');
+ $repl->header_set('Subject', 'redacted');
+ $repl->header_set('Date', 'Sat, 02 Oct 2010 00:00:00 +0000');
+ $repl->body_str_set($expect);
+
+ my @warn;
+ local $SIG{__WARN__} = sub { push @warn, @_ };
+ ok(my $cmts = $im->replace($orig, $repl), 'replaced message');
+ my $changed_epochs = 0;
+ for my $tip (@$cmts) {
+ next if !defined $tip;
+ $changed_epochs++;
+ like($tip, qr/\A[a-f0-9]{40}\z/,
+ 'replace returned current commit');
+ }
+ is($changed_epochs, 1, 'only one epoch changed');
+
+ $im->done;
+ my $m = PublicInbox::MIME->new($ibx->msg_by_mid('replace@example.com'));
+ is($m->body, $expect, 'replaced message');
+ is_deeply(\@warn, [], 'no warnings on noop');
+
+ my @cat = qw(cat-file --buffer --batch --batch-all-objects);
+ my $git = $ibx->git;
+ my @all = $git->qx(@cat);
+ is_deeply([grep(/confidential/, @all)], [], 'nothing confidential');
+ is_deeply([grep(/Streisand/, @all)], [], 'Streisand who?');
+ is_deeply([grep(/\bOct 1993\b/, @all)], [], 'nothing from Oct 1993');
+ my $t19931002 = qr/ 749520000 /;
+ is_deeply([grep(/$t19931002/, @all)], [], "nothing matches $t19931002");
+
+ for my $dir (glob("$ibx->{mainrepo}/git/*.git")) {
+ my ($bn) = ($dir =~ m!([^/]+)\z!);
+ is(system(qw(git --git-dir), $dir, qw(fsck --strict)), 0,
+ "git fsck is clean in epoch $bn");
+ }
+
+ my $thread_b = $ibx->over->get_thread('replace@example.com');
+ is_deeply([sort map { $_->{mid} } @$thread_b],
+ [sort map { $_->{mid} } @$thread_a], 'threading preserved');
+
+ if (my $srch = $ibx->search) {
+ for my $q ('f:streisand', 's:confidential', 'malibu') {
+ my $msgs = $srch->query($q);
+ is_deeply($msgs, [], "no match for $q");
+ }
+ my @ok = ('f:redactor', 's:redacted', 'nothing to see');
+ if ($opt->{pre}) {
+ push @ok, 'm:1@example.com', 'm:2@example.com',
+ 's:message2', 's:message1';
+ }
+ if ($opt->{post}) {
+ push @ok, 'm:3@example.com', 'm:4@example.com',
+ 's:message3', 's:message4';
+ }
+ for my $q (@ok) {
+ my $msgs = $srch->query($q);
+ ok($msgs->[0], "got match for $q");
+ }
+ }
+
+ # check overview matches:
+ my %after = map {; delete($_->{blob}) => $_ } @{$ibx->recent};
+ my @before_blobs = keys %before;
+ foreach my $blob (@before_blobs) {
+ delete $before{$blob} if delete $after{$blob};
+ }
+
+ is(scalar keys %before, 1, 'one unique blob from before left');
+ is(scalar keys %after, 1, 'one unique blob from after left');
+ foreach my $blob (keys %before) {
+ is($git->check($blob), undef, 'old blob not found');
+ my $smsg = $before{$blob};
+ is($smsg->{subject}, 'confidential', 'before subject');
+ is($smsg->{mid}, 'replace@example.com', 'before MID');
+ }
+ foreach my $blob (keys %after) {
+ ok($git->check($blob), 'new blob found');
+ my $smsg = $after{$blob};
+ is($smsg->{subject}, 'redacted', 'after subject');
+ is($smsg->{mid}, 'replace@example.com', 'before MID');
+ }
+ @warn = ();
+ is($im->replace($orig, $repl), undef, 'no-op replace returns undef');
+ is($im->purge($orig), undef, 'no-op purge returns undef');
+ is_deeply(\@warn, [], 'no warnings on noop');
+}
+
+sub pad_msgs {
+ my ($im, @range) = @_;
+ for my $i (@range) {
+ my $irt;
+ if (ref($i) eq 'HASH') {
+ ($i, $irt) = each %$i;
+ }
+ my $sec = sprintf('%0d', $i);
+ my $mime = PublicInbox::MIME->new(<<EOF);
+From: foo\@example.com
+To: test\@example.com
+Message-ID: <$i\@example.com>
+Date: Fri, 02, Jan 1970 00:00:$sec +0000
+Subject: message$i
+
+message number$i
+EOF
+
+ if (defined($irt)) {
+ $mime->header_set('References', "<$irt>");
+ }
+
+ $im->add($mime);
+ }
+}
+
+my $opt = { pre => *pad_msgs };
+test_replace(2, 'basic', {});
+test_replace(2, 'basic', $opt);
+test_replace(2, 'basic', $opt = { %$opt, post => *pad_msgs });
+test_replace(2, 'basic', $opt = { %$opt, rotate_bytes => 1 });
+
+SKIP: if ('test xapian') {
+ require PublicInbox::Search;
+ PublicInbox::Search::load_xapian() or skip 'Search::Xapian missing', 8;
+ for my $l (qw(medium)) {
+ test_replace(2, $l, {});
+ $opt = { pre => *pad_msgs };
+ test_replace(2, $l, $opt);
+ test_replace(2, $l, $opt = { %$opt, post => *pad_msgs });
+ test_replace(2, $l, $opt = { %$opt, rotate_bytes => 1 });
+ }
+};
+
+done_testing();