From: Eric Wong Date: Fri, 14 Jun 2019 16:23:13 +0000 (+0000) Subject: Merge remote-tracking branch 'origin/manifest' into next X-Git-Tag: v1.2.0~178 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=3c30532aed6256a984c535530c6667552c2e6a84;hp=d3c94cf92e8a4693aa691f3464c94c00be543cfc Merge remote-tracking branch 'origin/manifest' into next * origin/manifest: git: ensure ->modified returns an integer www: support $INBOX/git/$EPOCH.git for v2 cloning www: wire up /$INBOX/manifest.js.gz, too wwwlisting: generate grokmirror-compatible manifest.js.gz wwwlisting: allow hiding entries from manifest --- diff --git a/Documentation/include.mk b/Documentation/include.mk index b064f295..8501adc8 100644 --- a/Documentation/include.mk +++ b/Documentation/include.mk @@ -32,6 +32,8 @@ podtext = $(PODTEXT) $(PODTEXT_OPTS) # MakeMaker only seems to support manpage sections 1 and 3... m1 = m1 += public-inbox-compact +m1 += public-inbox-convert +m1 += public-inbox-edit m1 += public-inbox-httpd m1 += public-inbox-index m1 += public-inbox-mda @@ -150,7 +152,7 @@ rsync-doc: $(RSYNC) --chmod=Fugo=r -av $(rsync_docs) $(rsync_xdocs) $(RSYNC_DEST) clean-doc: - $(RM) $(man1) $(man5) $(man7) $(gz_docs) $(docs_html) $(mantxt) + $(RM) $(man1) $(man5) $(man7) $(man8) $(gz_docs) $(docs_html) $(mantxt) $(RM) $(gz_xdocs) $(xdocs_html) $(xdocs) clean :: clean-doc diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index db81bf1c..a86132be 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -234,6 +234,10 @@ C, but may be overridden. Default: basename of C, /var/www/htdocs/cgit/ or /usr/share/cgit/ +=item publicinbox.mailEditor + +See L + =item publicinbox.wwwlisting Enable a HTML listing style when the root path of the URL '/' is accessed. diff --git a/Documentation/public-inbox-edit.pod b/Documentation/public-inbox-edit.pod new file mode 100644 index 00000000..97c7c92a --- /dev/null +++ b/Documentation/public-inbox-edit.pod @@ -0,0 +1,109 @@ +=head1 NAME + +public-inbox-edit - edit messages in a public inbox + +=head1 SYNOPSIS + + public-inbox-edit -m MESSAGE-ID --all|INBOX_DIR + + public-inbox-edit -F RAW_FILE --all|INBOX_DIR [.. INBOX_DIR] + +=head1 DESCRIPTION + +public-inbox-edit allows editing messages in a given inbox +to remove sensitive information. It is only intended as a +last resort, as it will cause discontiguous git history and +draw more attention to the sensitive data in mirrors. + +=head1 OPTIONS + +=over + +=item --all + +Edit the message in all inboxes configured in ~/.public-inbox/config. +This is an alternative to specifying individual inboxes directories +on the command-line. + +=item -m MESSAGE-ID + +Edits the message corresponding to the given C. +If the C is ambiguous, C<--force> or using the +C<--file> of the original will be required. + +=item -F FILE + +Edits the message corresponding to the Message-ID: header +and content given in C. This requires the unmodified +raw message, and the contents of C will not itself +be modified. This is useful if a Message-ID is ambiguous +due to filtering/munging rules or other edits. + +=item --force + +Forcibly perform the edit even if Message-ID is ambiguous. + +=item --raw + +Do not perform "From " line escaping. By default, this +generates a mboxrd variant file to detect unpurged messages +in the new mbox. This makes sense if your configured +C is a regular editor and not +something like C + +=back + +=head1 CONFIGURATION + +=over 8 + +=item publicinbox.mailEditor + +The command to perform the edit with. An example of this would be +C, and the user would then use the facilities in L +to edit the mail. This is useful for editing attachments or +Base64-encoded emails which are more difficult to edit with a +normal editor (configured via C, C or C). + +Default: none + +=back + +=head1 ENVIRONMENT + +=over 8 + +=for comment MAIL_EDITOR is undocumented (unstable, don't want naming conflicts) + +=item GIT_EDITOR / VISUAL / EDITOR + +public-inbox-edit will fall back to using one of these variables +(in that order) if C is unset. + +=item PI_CONFIG + +The default config file, normally "~/.public-inbox/config". +See L + +=back + +=head1 LIMITATIONS + +Only L repositories are supported. + +=head1 CONTACT + +Feedback welcome via plain-text mail to L + +The mail archives are hosted at L +and L + +=head1 COPYRIGHT + +Copyright 2019 all contributors L + +License: AGPL-3.0+ L + +=head1 SEE ALSO + +L diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 6d2a4203..610dacbe 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -42,6 +42,13 @@ Xapian database. This does not touch the NNTP article number database. +=item --prune + +Run L to prune and expire reflogs if discontiguous history +is detected. This is intended to be used in mirrors after running +L or L to ensure data +is expunged from mirrors. + =back =head1 FILES @@ -52,8 +59,6 @@ C<$GIT_DIR/public-inbox/> directory. v2 repositories are described in L. -=back - =head1 ENVIRONMENT =over 8 diff --git a/INSTALL b/INSTALL index 0246299b..a661c776 100644 --- a/INSTALL +++ b/INSTALL @@ -36,6 +36,9 @@ Beyond that, there is a long list of Perl modules required, starting with: pkg: p5-TimeDate rpm: perl-TimeDate +* Digest::SHA typically installed with Perl + rpm: perl-Digest-SHA + * Email::MIME deb: libemail-mime-perl pkg: p5-Email-MIME rpm: perl-Email-MIME @@ -132,8 +135,8 @@ above, so there is no need to explicitly install them: (for public-inbox-watch, pulled in by Plack) - IO::Compress::Gzip deb: perl-modules (or libio-compress-perl) - rpm: perl-PerlIO-gzip pkg: perl5 + rpm: perl-IO-Compress (for gzipped mbox over HTTP) Uncommonly needed modules: @@ -156,18 +159,25 @@ Optional packages testing and development: pkg: p5-IPC-Run rpm: perl-IPC-Run +- Plack::Test deb: libplack-test-perl + pkg: p5-Plack + rpm: perl-Plack-Test + +- Test::Simple deb: perl-modules-5.$MINOR + pkg: perl5 + rpm: perl-Test-Simple + - XML::Feed deb: libxml-feed-perl pkg: p5-XML-Feed rpm: perl-XML-Feed -- Test::HTTP::Server::Simple deb: libtest-http-server-simple-perl - pkg: p5-Test-HTTP-Server-Simple - rpm: perl-Test-HTTP-Server-Simple - (XXX is this really needed?) - standard MakeMaker installation (Perl) -------------------------------------- +To use MakeMaker, you need to ensure ExtUtils::MakeMaker is available. +This is typically installed with Perl, but RPM-based systems will likely +need to install the `perl-ExtUtils-MakeMaker' package. + Once the dependencies are installed, you should be able to build and install the system (into /usr/local) with: diff --git a/MANIFEST b/MANIFEST index 9a88f135..ae637f24 100644 --- a/MANIFEST +++ b/MANIFEST @@ -13,6 +13,7 @@ Documentation/public-inbox-compact.pod Documentation/public-inbox-config.pod Documentation/public-inbox-convert.pod Documentation/public-inbox-daemon.pod +Documentation/public-inbox-edit.pod Documentation/public-inbox-httpd.pod Documentation/public-inbox-index.pod Documentation/public-inbox-mda.pod @@ -68,6 +69,7 @@ examples/unsubscribe.psgi examples/varnish-4.vcl lib/PublicInbox/Address.pm lib/PublicInbox/Admin.pm +lib/PublicInbox/AdminEdit.pm lib/PublicInbox/AltId.pm lib/PublicInbox/Cgit.pm lib/PublicInbox/Config.pm @@ -149,6 +151,7 @@ sa_config/root/etc/spamassassin/public-inbox.pre sa_config/user/.spamassassin/user_prefs script/public-inbox-compact script/public-inbox-convert +script/public-inbox-edit script/public-inbox-httpd script/public-inbox-index script/public-inbox-init @@ -184,6 +187,7 @@ t/content_id.t t/convert-compact.t t/data/0001.patch t/ds-leak.t +t/edit.t t/emergency.t t/fail-bin/spamc t/feed.t @@ -236,6 +240,7 @@ t/psgi_text.t t/psgi_v2.t t/purge.t t/qspawn.t +t/replace.t t/reply.t t/search-thr-index.t t/search.t diff --git a/Makefile.PL b/Makefile.PL index b1274ad1..113f8c77 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -28,6 +28,11 @@ WriteMakefile( # We also depend on git. # Keep this sorted and synced to the INSTALL document 'Date::Parse' => 0, + + # libperl$PERL_VERSION, + # `perl5' on FreeBSD + # perl-Digest-SHA on RH-based + 'Digest::SHA' => 0, 'Email::MIME' => 0, # the following should be pulled in by Email::MIME: @@ -44,6 +49,14 @@ WriteMakefile( # We have more test dependencies, but do not force # users to install them. See INSTALL + + # All Perl installs I know about have these, but RH-based + # distros make them separate even though 'perl' pulls them in + 'File::Path' => 0, + 'File::Temp' => 0, + 'Getopt::Long' => 0, + 'Exporter' => 0, + # ExtUtils::MakeMaker # this file won't run w/o it... }, MAN3PODS => \%man3, ); @@ -57,14 +70,17 @@ N = \$\$(( \$\$(nproc 2>/dev/null || gnproc 2>/dev/null || echo 2) + 1 )) -include config.mak -include Documentation/include.mk SCRIPTS := scripts/ssoma-replay -my_syntax := \$(addsuffix .syntax, $PM_FILES \$(EXE_FILES) \$(SCRIPTS)) - +syn_files := $PM_FILES \$(EXE_FILES) \$(SCRIPTS) +my_syntax := \$(addsuffix .syntax, \$(syn_files)) +changed = \$(shell git ls-files -m) %.syntax :: - @\$(PERL) -I lib -c \$(subst .syntax,,\$@) + @\$(PERL) -w -I lib -c \$(subst .syntax,,\$@) syntax:: \$(my_syntax) +dsyn :: \$(addsuffix .syntax, \$(filter \$(changed), \$(syn_files))) + check-manifest :: MANIFEST if git ls-files >\$?.gen 2>&1; then diff -u \$? \$?.gen; fi diff --git a/ci/deps.perl b/ci/deps.perl index faca4590..62870c1f 100755 --- a/ci/deps.perl +++ b/ci/deps.perl @@ -9,9 +9,11 @@ my $usage = "$0 PKG_FMT PROFILE [PROFILE_MOD]"; my $pkg_fmt = shift; @ARGV or die $usage, "\n"; +my @test_essential = qw(Test::Simple Plack::Test); + # package profiles my $profiles = { - # the smallest possible profile + # the smallest possible profile for testing # TODO: trim this, Plack pulls in Filesys::Notify::Simple, # and we don't need that for mda-only installs essential => [ qw( @@ -19,14 +21,16 @@ my $profiles = { perl Date::Parse Devel::Peek + Digest::SHA Email::Simple Email::MIME Email::MIME::ContentType Encode + ExtUtils::MakeMaker Filesys::Notify::Simple Plack URI::Escape - ) ], + ), @test_essential ], # everything optional for normal use optional => [ qw( @@ -44,10 +48,9 @@ my $profiles = { xapian-compact ) ], - # developer stuff + # optional developer stuff devtest => [ qw( IPC::Run - Test::HTTP::Server::Simple XML::Feed curl w3m @@ -86,22 +89,41 @@ my $non_auto = { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', }, + 'Digest::SHA' => { + deb => 'perl', # libperl5.XX, but the XX varies + pkg => 'perl5', + }, 'Encode' => { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', rpm => 'perl-Encode', }, + 'ExtUtils::MakeMaker' => { + deb => 'perl', # perl-modules-5.xx + pkg => 'perl5', + rpm => 'perl-ExtUtils-MakeMaker', + }, 'IO::Compress::Gzip' => { deb => 'perl', # perl-modules-5.xx pkg => 'perl5', - rpm => 'perl-PerlIO-gzip', + rpm => 'perl-IO-Compress', }, 'DBD::SQLite' => { deb => 'libdbd-sqlite3-perl' }, + 'Plack::Test' => { + deb => 'libplack-perl', + pkg => 'p5-Plack', + rpm => 'perl-Plack-Test', + }, 'URI::Escape' => { deb => 'liburi-perl', pkg => 'p5-URI', rpm => 'perl-URI', }, + 'Test::Simple' => { + deb => 'perl', # perl-modules-5.XX, but the XX varies + pkg => 'perl5', + rpm => 'perl-Test-Simple', + }, 'highlight.pm' => { deb => 'libhighlight-perl', pkg => [], diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 4a862c6d..8a2f2043 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -9,6 +9,8 @@ use warnings; use Cwd 'abs_path'; use base qw(Exporter); our @EXPORT_OK = qw(resolve_repo_dir); +my $CFG; # all the admin stuff is a singleton +require PublicInbox::Config; sub resolve_repo_dir { my ($cd, $ver) = @_; @@ -66,36 +68,65 @@ $ibx->{mainrepo} has unexpected indexlevel in Xapian: $m $l; } -sub resolve_inboxes { - my ($argv, $warn_on_unconfigured) = @_; - require PublicInbox::Config; +sub unconfigured_ibx ($$) { + my ($dir, $i) = @_; + my $name = "unconfigured-$i"; + PublicInbox::Inbox->new({ + name => $name, + address => [ "$name\@example.com" ], + mainrepo => $dir, + # TODO: consumers may want to warn on this: + #-unconfigured => 1, + }); +} + +sub config () { $CFG //= eval { PublicInbox::Config->new } } + +sub resolve_inboxes ($;$) { + my ($argv, $opt) = @_; require PublicInbox::Inbox; + $opt ||= {}; - my @ibxs = map { resolve_repo_dir($_) } @$argv; - push(@ibxs, resolve_repo_dir()) unless @ibxs; + my $cfg = config(); + if ($opt->{all}) { + my $cfgfile = PublicInbox::Config::default_file(); + $cfg or die "--all specified, but $cfgfile not readable\n"; + @$argv and die "--all specified, but directories specified\n"; + } + my $min_ver = $opt->{-min_inbox_version} || 0; + my (@old, @ibxs); my %dir2ibx; - if (my $config = eval { PublicInbox::Config->new }) { - $config->each_inbox(sub { + if ($cfg) { + $cfg->each_inbox(sub { my ($ibx) = @_; + $ibx->{version} ||= 1; $dir2ibx{abs_path($ibx->{mainrepo})} = $ibx; }); - } elsif ($warn_on_unconfigured) { - # do we really care about this? It's annoying... - warn $warn_on_unconfigured, "\n"; } - for my $i (0..$#ibxs) { - my $dir = $ibxs[$i]; - $ibxs[$i] = $dir2ibx{$dir} ||= do { - my $name = "unconfigured-$i"; - PublicInbox::Inbox->new({ - name => $name, - address => [ "$name\@example.com" ], - mainrepo => $dir, - # TODO: consumers may want to warn on this: - #-unconfigured => 1, - }); - }; + if ($opt->{all}) { + my @all = values %dir2ibx; + @all = grep { $_->{version} >= $min_ver } @all; + push @ibxs, @all; + } else { # directories specified on the command-line + my $i = 0; + my @dirs = @$argv; + push @dirs, '.' unless @dirs; + foreach (@dirs) { + my $v; + my $dir = resolve_repo_dir($_, \$v); + if ($v < $min_ver) { + push @old, $dir; + next; + } + my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i); + $i++; + push @ibxs, $ibx; + } + } + if (@old) { + die "inboxes $min_ver inboxes not supported by $0\n\t", + join("\n\t", @old), "\n"; } @ibxs; } diff --git a/lib/PublicInbox/AdminEdit.pm b/lib/PublicInbox/AdminEdit.pm new file mode 100644 index 00000000..169feba0 --- /dev/null +++ b/lib/PublicInbox/AdminEdit.pm @@ -0,0 +1,67 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ + +# common stuff between -edit, -purge (and maybe -learn in the future) +package PublicInbox::AdminEdit; +use strict; +use warnings; +use PublicInbox::Admin; +our @OPT = qw(all force|f verbose|v!); + +sub check_editable ($) { + my ($ibxs) = @_; + + foreach my $ibx (@$ibxs) { + my $lvl = $ibx->{indexlevel}; + if (defined $lvl) { + PublicInbox::Admin::indexlevel_ok_or_die($lvl); + next; + } + + # Undefined indexlevel, so `full'... + # Search::Xapian exists and the DB can be read, at least, fine + $ibx->search and next; + + # it's possible for a Xapian directory to exist, + # but Search::Xapian to go missing/broken. + # Make sure it's purged in that case: + $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n"; + + # $ibx->{search} is populated by $ibx->over call + my $xdir_ro = $ibx->{search}->xdir(1); + my $npart = 0; + foreach my $part (<$xdir_ro/*>) { + if (-d $part && $part =~ m!/[0-9]+\z!) { + my $bytes = 0; + $bytes += -s $_ foreach glob("$part/*"); + $npart++ if $bytes; + } + } + if ($npart) { + PublicInbox::Admin::require_or_die('-search'); + } else { + # somebody could "rm -r" all the Xapian directories; + # let them purge the overview, at least + $ibx->{indexlevel} ||= 'basic'; + } + } +} + +# takes the output of V2Writable::purge and V2Writable::replace +# $rewrites = [ array commits keyed by epoch ] +sub show_rewrites ($$$) { + my ($fh, $ibx, $rewrites) = @_; + print $fh "$ibx->{mainrepo}:"; + if (scalar @$rewrites) { + my $epoch = -1; + my @out = map {; + ++$epoch; + "$epoch.git: ".(defined($_) ? $_ : '(unchanged)') + } @$rewrites; + print $fh join("\n\t", '', @out), "\n"; + } else { + print $fh " NONE\n"; + } +} + +1; diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 82510b99..f5c7a95c 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -145,41 +145,24 @@ again: fail($self, "Unexpected result from git cat-file: $head"); my $size = $1; - my $ref_type = $ref ? ref($ref) : ''; - my $rv; my $left = $size; - $$ref = $size if ($ref_type eq 'SCALAR'); - my $cb_err; - - if ($ref_type eq 'CODE') { - $rv = eval { $ref->($in, \$left) }; - $cb_err = $@; - # drain the rest - my $max = 8192; - while ($left > 0) { - my $r = read($in, my $x, $left > $max ? $max : $left); - defined($r) or fail($self, "read failed: $!"); - $r == 0 and fail($self, 'exited unexpectedly'); - $left -= $r; - } - } else { - my $offset = 0; - my $buf = ''; - while ($left > 0) { - my $r = read($in, $buf, $left, $offset); - defined($r) or fail($self, "read failed: $!"); - $r == 0 and fail($self, 'exited unexpectedly'); - $left -= $r; - $offset += $r; - } - $rv = \$buf; + $$ref = $size if $ref; + + my $offset = 0; + my $buf = ''; + while ($left > 0) { + my $r = read($in, $buf, $left, $offset); + defined($r) or fail($self, "read failed: $!"); + $r == 0 and fail($self, 'exited unexpectedly'); + $left -= $r; + $offset += $r; } + $rv = \$buf; - my $r = read($in, my $buf, 1); + my $r = read($in, my $lf, 1); defined($r) or fail($self, "read failed: $!"); - fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n"); - die $cb_err if $cb_err; + fail($self, 'newline missing after blob') if ($r != 1 || $lf ne "\n"); $rv; } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 2c4bad92..137b2b78 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -277,7 +277,7 @@ sub git_timestamp { "$ts $zone"; } -sub extract_author_info ($) { +sub extract_cmt_info ($) { my ($mime) = @_; my $sender = ''; @@ -314,7 +314,17 @@ sub extract_author_info ($) { $name = ''; warn "no name in From: $from or Sender: $sender\n"; } - ($name, $email); + + my $hdr = $mime->header_obj; + + my $subject = $hdr->header('Subject'); + $subject = '(no subject)' unless defined $subject; + # Mime decoding can create nulls replace them with spaces to protect git + $subject =~ tr/\0/ /; + utf8::encode($subject); + my $at = git_timestamp(my @at = msg_datestamp($hdr)); + my $ct = git_timestamp(my @ct = msg_timestamp($hdr)); + ($name, $email, $at, $ct, $subject); } # kill potentially confusing/misleading headers @@ -361,19 +371,7 @@ sub clean_tree_v2 ($$$) { sub add { my ($self, $mime, $check_cb) = @_; # mime = Email::MIME - my ($name, $email) = extract_author_info($mime); - my $hdr = $mime->header_obj; - my @at = msg_datestamp($hdr); - my @ct = msg_timestamp($hdr); - my $author_time_raw = git_timestamp(@at); - my $commit_time_raw = git_timestamp(@ct); - - my $subject = $mime->header('Subject'); - $subject = '(no subject)' unless defined $subject; - # Mime decoding can create nulls replace them with spaces to protect git - $subject =~ tr/\0/ /; - utf8::encode($subject); - + my ($name, $email, $at, $ct, $subject) = extract_cmt_info($mime); my $path_type = $self->{path_type}; my $path; if ($path_type eq '2/38') { @@ -416,8 +414,8 @@ sub add { } print $w "commit $ref\nmark :$commit\n", - "author $name <$email> $author_time_raw\n", - "committer $self->{ident} $commit_time_raw\n" or wfail; + "author $name <$email> $at\n", + "committer $self->{ident} $ct\n" or wfail; print $w "data ", (length($subject) + 1), "\n", $subject, "\n\n" or wfail; if ($tip ne '') { @@ -486,33 +484,45 @@ sub digest2mid ($$) { "$dt.$b64" . '@z'; } -sub clean_purge_buffer { - my ($oids, $buf) = @_; - my $cmt_msg = 'purged '.join(' ',@$oids)."\n"; +sub rewrite_commit ($$$$) { + my ($self, $oids, $buf, $mime) = @_; + my ($name, $email, $at, $ct, $subject); + if ($mime) { + ($name, $email, $at, $ct, $subject) = extract_cmt_info($mime); + } else { + $name = $email = ''; + $subject = 'purged '.join(' ', @$oids); + } @$oids = (); - + $subject .= "\n"; foreach my $i (0..$#$buf) { my $l = $buf->[$i]; if ($l =~ /^author .* ([0-9]+ [\+-]?[0-9]+)$/) { - $buf->[$i] = "author <> $1\n"; + $at //= $1; + $buf->[$i] = "author $name <$email> $at\n"; + } elsif ($l =~ /^committer .* ([0-9]+ [\+-]?[0-9]+)$/) { + $ct //= $1; + $buf->[$i] = "committer $self->{ident} $ct\n"; } elsif ($l =~ /^data ([0-9]+)/) { - $buf->[$i++] = "data " . length($cmt_msg) . "\n"; - $buf->[$i] = $cmt_msg; + $buf->[$i++] = "data " . length($subject) . "\n"; + $buf->[$i] = $subject; last; } } } -sub purge_oids { - my ($self, $purge) = @_; - my $tmp = "refs/heads/purge-".((keys %$purge)[0]); +# returns the new commit OID if a replacement was done +# returns undef if nothing was done +sub replace_oids { + my ($self, $mime, $replace_map) = @_; # oid => raw string + my $tmp = "refs/heads/replace-".((keys %$replace_map)[0]); my $old = $self->{'ref'}; my $git = $self->{git}; my @export = (qw(fast-export --no-data --use-done-feature), $old); my $rd = $git->popen(@export); my ($r, $w) = $self->gfi_start; my @buf; - my $npurge = 0; + my $nreplace = 0; my @oids; my ($done, $mark); my $tree = $self->{-tree}; @@ -535,10 +545,13 @@ sub purge_oids { } elsif (/^M 100644 ([a-f0-9]+) (\w+)/) { my ($oid, $path) = ($1, $2); $tree->{$path} = 1; - if ($purge->{$oid}) { + my $sref = $replace_map->{$oid}; + if (defined $sref) { push @oids, $oid; - my $cmd = "M 100644 inline $path\ndata 0\n\n"; - push @buf, $cmd; + my $n = length($$sref); + push @buf, "M 100644 inline $path\ndata $n\n"; + push @buf, $$sref; # hope CoW works... + push @buf, "\n"; } else { push @buf, $_; } @@ -547,11 +560,13 @@ sub purge_oids { push @buf, $_ if $tree->{$path}; } elsif ($_ eq "\n") { if (@oids) { - my $out = join('', @buf); - $out =~ s/^/# /sgm; - warn "purge rewriting\n", $out, "\n"; - clean_purge_buffer(\@oids, \@buf); - $npurge++; + if (!$mime) { + my $out = join('', @buf); + $out =~ s/^/# /sgm; + warn "purge rewriting\n", $out, "\n"; + } + rewrite_commit($self, \@oids, \@buf, $mime); + $nreplace++; } $w->print(@buf, "\n") or wfail; @buf = (); @@ -569,28 +584,30 @@ sub purge_oids { $w->print(@buf) or wfail; } die 'done\n not seen from fast-export' unless $done; - chomp(my $cmt = $self->get_mark(":$mark")) if $npurge; + chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace; $self->{nchg} = 0; # prevent _update_git_info until update-ref: $self->done; my @git = ('git', "--git-dir=$git->{git_dir}"); - run_die([@git, qw(update-ref), $old, $tmp]) if $npurge; + run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace; run_die([@git, qw(update-ref -d), $tmp]); - return if $npurge == 0; + return if $nreplace == 0; run_die([@git, qw(-c gc.reflogExpire=now gc --prune=all)]); + + # check that old OIDs are gone my $err = 0; - foreach my $oid (keys %$purge) { + foreach my $oid (keys %$replace_map) { my @info = $git->check($oid); if (@info) { - warn "$oid not purged\n"; + warn "$oid not replaced\n"; $err++; } } _update_git_info($self, 0); - die "Failed to purge $err object(s)\n" if $err; + die "Failed to replace $err object(s)\n" if $err; $cmt; } diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index be80560f..8a31b910 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -434,6 +434,26 @@ sub xref ($$$$) { sub set_nntp_headers ($$$$$) { my ($self, $hdr, $ng, $n, $mid) = @_; + # why? leafnode requires a Path: header for some inexplicable + # reason. We'll fake the shortest one possible. + $hdr->header_set('Path', 'y'); + + # leafnode (and maybe other NNTP clients) have trouble dealing + # with v2 messages which have multiple Message-IDs (either due + # to our own content-based dedupe or buggy git-send-email versions). + my @mids = $hdr->header('Message-ID'); + if (scalar(@mids) > 1) { + my $mid0 = "<$mid>"; + $hdr->header_set('Message-ID', $mid0); + my @alt = $hdr->header('X-Alt-Message-ID'); + my %seen = map { $_ => 1 } (@alt, $mid0); + foreach my $m (@mids) { + next if $seen{$m}++; + push @alt, $m; + } + $hdr->header_set('X-Alt-Message-ID', @alt); + } + # clobber some my $xref = xref($self, $ng, $n, $mid); $hdr->header_set('Xref', $xref); @@ -515,6 +535,13 @@ sub _header ($) { my $hdr = $_[0]->header_obj->as_string; utf8::encode($hdr); $hdr =~ s/(?{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag); + my $xdb = eval { Search::Xapian::WritableDatabase->new($dir, $flag) }; + if ($@) { + die "Failed opening $dir: ", $@; + } + $self->{xdb} = $xdb; } sub add_val ($$$) { diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 5f3c8af8..96a26b15 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -25,12 +25,6 @@ sub wrap { bless { mid => $mid }, $class; } -sub get { - my ($class, $head, $db, $mid) = @_; - my $doc_id = $head->get_docid; - load_expand(wrap($class, $mid), $db->get_document($doc_id)); -} - sub get_val ($$) { my ($doc, $col) = @_; Search::Xapian::sortable_unserialise($doc->get_value($col)); diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index a8c33ef4..09ed4e7b 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -11,7 +11,7 @@ use PublicInbox::SearchIdxPart; use PublicInbox::MIME; use PublicInbox::Git; use PublicInbox::Import; -use PublicInbox::MID qw(mids); +use PublicInbox::MID qw(mids references); use PublicInbox::ContentId qw(content_id content_digest); use PublicInbox::Inbox; use PublicInbox::OverIdx; @@ -116,6 +116,18 @@ sub add { }); } +# indexes a message, returns true if checkpointing is needed +sub do_idx ($$$$$$$) { + my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_; + $self->{over}->add_overview($mime, $len, $num, $oid, $mid0); + my $npart = $self->{partitions}; + my $part = $num % $npart; + my $idx = idx_part($self, $part); + $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime); + my $n = $self->{transact_bytes} += $len; + $n >= (PublicInbox::SearchIdx::BATCH_BYTES * $npart); +} + sub _add { my ($self, $mime, $check_cb) = @_; @@ -141,13 +153,7 @@ sub _add { $self->{last_commit}->[$self->{epoch_max}] = $cmt; my ($oid, $len, $msgref) = @{$im->{last_object}}; - $self->{over}->add_overview($mime, $len, $num, $oid, $mid0); - my $nparts = $self->{partitions}; - my $part = $num % $nparts; - my $idx = $self->idx_part($part); - $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime); - my $n = $self->{transact_bytes} += $len; - if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) { + if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) { $self->checkpoint; } @@ -291,26 +297,30 @@ sub idx_init { }); } -sub purge_oids ($$) { - my ($self, $purge) = @_; # $purge = { $object_id => 1, ... } +# returns an array mapping [ epoch => latest_commit ] +# latest_commit may be undef if nothing was done to that epoch +# $replace_map = { $object_id => $strref, ... } +sub _replace_oids ($$$) { + my ($self, $mime, $replace_map) = @_; $self->done; my $pfx = "$self->{-inbox}->{mainrepo}/git"; - my $purges = []; + my $rewrites = []; # epoch => commit my $max = $self->{epoch_max}; unless (defined($max)) { defined(my $latest = git_dir_latest($self, \$max)) or return; $self->{epoch_max} = $max; } + foreach my $i (0..$max) { my $git_dir = "$pfx/$i.git"; -d $git_dir or next; my $git = PublicInbox::Git->new($git_dir); my $im = $self->import_init($git, 0, 1); - $purges->[$i] = $im->purge_oids($purge); + $rewrites->[$i] = $im->replace_oids($mime, $replace_map); $im->done; } - $purges; + $rewrites; } sub content_ids ($) { @@ -333,25 +343,31 @@ sub content_matches ($$) { 0 } -sub remove_internal ($$$$) { - my ($self, $mime, $cmt_msg, $purge) = @_; +# used for removing or replacing (purging) +sub rewrite_internal ($$;$$$) { + my ($self, $old_mime, $cmt_msg, $new_mime, $sref) = @_; $self->idx_init; - my $im = $self->importer unless $purge; + my ($im, $need_reindex, $replace_map); + if ($sref) { + $replace_map = {}; # oid => sref + $need_reindex = [] if $new_mime; + } else { + $im = $self->importer; + } my $over = $self->{over}; - my $cids = content_ids($mime); + my $cids = content_ids($old_mime); my $parts = $self->{idx_parts}; - my $mm = $self->{mm}; my $removed; - my $mids = mids($mime->header_obj); + my $mids = mids($old_mime->header_obj); # We avoid introducing new blobs into git since the raw content # can be slightly different, so we do not need the user-supplied # message now that we have the mids and content_id - $mime = undef; + $old_mime = undef; my $mark; foreach my $mid (@$mids) { - my %gone; + my %gone; # num => [ smsg, raw ] my ($id, $prev); while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) { my $msg = get_blob($self, $smsg); @@ -374,17 +390,21 @@ sub remove_internal ($$$$) { } foreach my $num (keys %gone) { my ($smsg, $orig) = @{$gone{$num}}; - $mm->num_delete($num); # $removed should only be set once assuming # no bugs in our deduplication code: $removed = $smsg; my $oid = $smsg->{blob}; - if ($purge) { - $purge->{$oid} = 1; + if ($replace_map) { + $replace_map->{$oid} = $sref; } else { ($mark, undef) = $im->remove($orig, $cmt_msg); } $orig = undef; + if ($need_reindex) { # ->replace + push @$need_reindex, $smsg; + } else { # ->purge or ->remove + $self->{mm}->num_delete($num); + } unindex_oid_remote($self, $oid, $mid); } } @@ -393,8 +413,9 @@ sub remove_internal ($$$$) { my $cmt = $im->get_mark($mark); $self->{last_commit}->[$self->{epoch_max}] = $cmt; } - if ($purge && scalar keys %$purge) { - return purge_oids($self, $purge); + if ($replace_map && scalar keys %$replace_map) { + my $rewrites = _replace_oids($self, $new_mime, $replace_map); + return { rewrites => $rewrites, need_reindex => $need_reindex }; } $removed; } @@ -403,22 +424,125 @@ sub remove_internal ($$$$) { sub remove { my ($self, $mime, $cmt_msg) = @_; $self->{-inbox}->with_umask(sub { - remove_internal($self, $mime, $cmt_msg, undef); + rewrite_internal($self, $mime, $cmt_msg); }); } +sub _replace ($$;$$) { + my ($self, $old_mime, $new_mime, $sref) = @_; + my $rewritten = $self->{-inbox}->with_umask(sub { + rewrite_internal($self, $old_mime, undef, $new_mime, $sref); + }) or return; + + my $rewrites = $rewritten->{rewrites}; + # ->done is called if there are rewrites since we gc+prune from git + $self->idx_init if @$rewrites; + + for my $i (0..$#$rewrites) { + defined(my $cmt = $rewrites->[$i]) or next; + $self->{last_commit}->[$i] = $cmt; + } + $rewritten; +} + # public sub purge { my ($self, $mime) = @_; - my $purges = $self->{-inbox}->with_umask(sub { - remove_internal($self, $mime, undef, {}); - }) or return; - $self->idx_init if @$purges; # ->done is called on purges - for my $i (0..$#$purges) { - defined(my $cmt = $purges->[$i]) or next; - $self->{last_commit}->[$i] = $cmt; + my $rewritten = _replace($self, $mime, undef, \'') or return; + $rewritten->{rewrites} +} + +# returns the git object_id of $fh, does not write the object to FS +sub git_hash_raw ($$) { + my ($self, $raw) = @_; + # grab the expected OID we have to reindex: + open my $tmp_fh, '+>', undef or die "failed to open tmp: $!"; + $tmp_fh->autoflush(1); + print $tmp_fh $$raw or die "print \$tmp_fh: $!"; + sysseek($tmp_fh, 0, 0) or die "seek failed: $!"; + + my ($r, $w); + pipe($r, $w) or die "failed to create pipe: $!"; + my $rdr = { 0 => fileno($tmp_fh), 1 => fileno($w) }; + my $git_dir = $self->{-inbox}->git->{git_dir}; + my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)]; + my $pid = spawn($cmd, undef, $rdr); + close $w; + local $/ = "\n"; + chomp(my $oid = <$r>); + waitpid($pid, 0) == $pid or die "git hash-object did not finish"; + die "git hash-object failed: $?" if $?; + $oid =~ /\A[a-f0-9]{40}\z/ or die "OID not expected: $oid"; + $oid; +} + +sub _check_mids_match ($$$) { + my ($old_list, $new_list, $hdrs) = @_; + my %old_mids = map { $_ => 1 } @$old_list; + my %new_mids = map { $_ => 1 } @$new_list; + my @old = keys %old_mids; + my @new = keys %new_mids; + my $err = "$hdrs may not be changed when replacing\n"; + die $err if scalar(@old) != scalar(@new); + delete @new_mids{@old}; + delete @old_mids{@new}; + die $err if (scalar(keys %old_mids) || scalar(keys %new_mids)); +} + +# Changing Message-IDs or References with ->replace isn't supported. +# The rules for dealing with messages with multiple or conflicting +# Message-IDs are pretty complex and rethreading hasn't been fully +# implemented, yet. +sub check_mids_match ($$) { + my ($old_mime, $new_mime) = @_; + my $old = $old_mime->header_obj; + my $new = $new_mime->header_obj; + _check_mids_match(mids($old), mids($new), 'Message-ID(s)'); + _check_mids_match(references($old), references($new), + 'References/In-Reply-To'); +} + +# public +sub replace ($$$) { + my ($self, $old_mime, $new_mime) = @_; + + check_mids_match($old_mime, $new_mime); + + # mutt will always add Content-Length:, Status:, Lines: when editing + PublicInbox::Import::drop_unwanted_headers($new_mime); + + my $raw = $new_mime->as_string; + my $expect_oid = git_hash_raw($self, \$raw); + my $rewritten = _replace($self, $old_mime, $new_mime, \$raw) or return; + my $need_reindex = $rewritten->{need_reindex}; + + # just in case we have bugs in deduplication code: + my $n = scalar(@$need_reindex); + if ($n > 1) { + my $list = join(', ', map { + "$_->{num}: <$_->{mid}>" + } @$need_reindex); + warn <<""; +W: rewritten $n messages matching content of original message (expected: 1). +W: possible bug in public-inbox, NNTP article IDs and Message-IDs follow: +W: $list + + } + + # make sure we really got the OID: + my ($oid, $type, $len) = $self->{-inbox}->git->check($expect_oid); + $oid eq $expect_oid or die "BUG: $expect_oid not found after replace"; + + # don't leak FDs to Xapian: + $self->{-inbox}->git->cleanup; + + # reindex modified messages: + for my $smsg (@$need_reindex) { + my $num = $smsg->{num}; + my $mid0 = $smsg->{mid}; + do_idx($self, \$raw, $new_mime, $len, $num, $oid, $mid0); } - $purges; + $rewritten->{rewrites}; } sub last_commit_part ($$;$) { @@ -772,15 +896,8 @@ sub reindex_oid ($$$$) { } $sync->{mm_tmp}->mid_delete($mid0) or die "failed to delete <$mid0> for article #$num\n"; - - $self->{over}->add_overview($mime, $len, $num, $oid, $mid0); - my $nparts = $self->{partitions}; - my $part = $num % $nparts; - my $idx = $self->idx_part($part); - $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime); - my $n = $self->{transact_bytes} += $len; $sync->{nr}++; - if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) { + if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) { $git->cleanup; $sync->{mm_tmp}->atfork_prepare; $self->done; # release lock diff --git a/script/public-inbox-edit b/script/public-inbox-edit new file mode 100755 index 00000000..2e2c7616 --- /dev/null +++ b/script/public-inbox-edit @@ -0,0 +1,234 @@ +#!/usr/bin/perl -w +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +# +# Used for editing messages in a public-inbox. +# Supports v2 inboxes only, for now. +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::AdminEdit; +use File::Temp qw(tempfile); +use PublicInbox::ContentId qw(content_id); +use PublicInbox::MID qw(mid_clean mids); +PublicInbox::Admin::check_require('-index'); +require PublicInbox::MIME; +require PublicInbox::InboxWritable; +require PublicInbox::Import; + +my $usage = "$0 -m MESSAGE_ID [--all] [INBOX_DIRS]"; +my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 }; +my @opt = qw(mid|m=s file|F=s raw); +GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or + die "bad command-line args\n$usage\n"; + +my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f" +unless (defined $editor) { + my $k = 'publicinbox.mailEditor'; + if (my $cfg = PublicInbox::Admin::config()) { + $editor = $cfg->{lc($k)}; + } + unless (defined $editor) { + warn "\`$k' not configured, trying \`git var GIT_EDITOR'\n"; + chomp($editor = `git var GIT_EDITOR`); + warn "Will use $editor to edit mail\n"; + } +} + +my $mid = $opt->{mid}; +my $file = $opt->{file}; +if (defined $mid && defined $file) { + die "the --mid and --file options are mutually exclusive\n"; +} + +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); +PublicInbox::AdminEdit::check_editable(\@ibxs); + +my $found = {}; # cid => [ [ibx, smsg] [, [ibx, smsg] ] ] + +sub find_mid ($) { + my ($mid) = @_; + foreach my $ibx (@ibxs) { + my $over = $ibx->over; + my ($id, $prev); + while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) { + my $ref = $ibx->msg_by_smsg($smsg); + my $mime = PublicInbox::MIME->new($ref); + my $cid = content_id($mime); + my $tuple = [ $ibx, $smsg ]; + push @{$found->{$cid} ||= []}, $tuple + } + delete @$ibx{qw(over mm git search)}; # cleanup + } + $found; +} + +sub show_cmd ($$) { + my ($ibx, $smsg) = @_; + " GIT_DIR=$ibx->{mainrepo}/all.git \\\n git show $smsg->{blob}\n"; +} + +sub show_found () { + foreach my $to_edit (values %$found) { + foreach my $tuple (@$to_edit) { + my ($ibx, $smsg) = @$tuple; + warn show_cmd($ibx, $smsg); + } + } +} + +if (defined($mid)) { + $mid = mid_clean($mid); + $found = find_mid($mid); + my $nr = scalar(keys %$found); + die "No message found for <$mid>\n" unless $nr; + if ($nr > 1) { + warn <<""; +Multiple messages with different content found matching +<$mid>: + + show_found(); + die "Use --force to edit all of them\n" if !$opt->{force}; + warn "Will edit all of them\n"; + } +} else { + open my $fh, '<', $file or die "open($file) failed: $!"; + my $orig = do { local $/; <$fh> }; + my $mime = PublicInbox::MIME->new(\$orig); + my $mids = mids($mime->header_obj); + find_mid($_) for (@$mids); # populates $found + my $cid = content_id($mime); + my $to_edit = $found->{$cid}; + unless ($to_edit) { + my $nr = scalar(keys %$found); + if ($nr > 0) { + warn <<""; +$nr matches to Message-ID(s) in $file, but none matched content +Partial matches below: + + show_found(); + } elsif ($nr == 0) { + $mids = join('', map { " <$_>\n" } @$mids); + warn <<""; +No matching messages found matching Message-ID(s) in $file +$mids + + } + exit 1; + } + $found = { $cid => $to_edit }; +} + +my $tmpl = 'public-inbox-edit-XXXXXX'; +foreach my $to_edit (values %$found) { + my ($edit_fh, $edit_fn) = tempfile($tmpl, TMPDIR => 1, UNLINK => 1); + $edit_fh->autoflush(1); + my ($ibx, $smsg) = @{$to_edit->[0]}; + my $old_raw = $ibx->msg_by_smsg($smsg); + delete @$ibx{qw(over mm git search)}; # cleanup + + my $tmp = $$old_raw; + if (!$opt->{raw}) { + my $oid = $smsg->{blob}; + print $edit_fh "From mboxrd\@$oid Thu Jan 1 00:00:00 1970\n"; + $tmp =~ s/^(>*From )/>$1/gm; + } + print $edit_fh $tmp or + die "failed to write tempfile for editing: $!"; + + # run the editor, respecting spaces/quote +retry_edit: + if (system(qw(sh -c), qq(eval "$editor" '"\$@"'), '--', $edit_fn)) { + if (!(-t STDIN) && !$opt->{force}) { + die "E: $editor failed: $?\n"; + } + print STDERR "$editor failed, "; + print STDERR "continuing as forced\n" if $opt->{force}; + while (!$opt->{force}) { + print STDERR "(r)etry, (c)ontinue, (q)uit?\n"; + chomp(my $op = || ''); + $op = lc($op); + goto retry_edit if $op eq 'r'; + exit $? if $op eq 'q'; + last if $op eq 'c'; # continuing + print STDERR "\`$op' not recognized\n"; + } + } + + # reread the edited file, not using $edit_fh since $EDITOR may + # rename/relink $edit_fn + open my $new_fh, '<', $edit_fn or + die "can't read edited file ($edit_fn): $!\n"; + my $new_raw = do { local $/; <$new_fh> }; + + if (!$opt->{raw}) { + # get rid of the From we added + $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + + # check if user forgot to purge (in mutt) after editing + if ($new_raw =~ /^From /sm) { + if (-t STDIN) { + print STDERR <<''; +Extra "From " lines detected in new mbox. +Did you forget to purge the original message from the mbox after editing? + + while (1) { + print STDERR <<""; +(y)es to re-edit, (n)o to continue + + chomp(my $op = || ''); + $op = lc($op); + goto retry_edit if $op eq 'y'; + last if $op eq 'n'; # continuing + print STDERR "\`$op' not recognized\n"; + } + } else { # non-interactive path + # unlikely to happen, as extra From lines are + # only a common mistake (for me) with + # interactive use + warn <<""; +W: possible message boundary splitting error + + } + } + # unescape what we escaped: + $new_raw =~ s/^>(>*From )/$1/gm; + } + + my $new_mime = PublicInbox::MIME->new(\$new_raw); + my $old_mime = PublicInbox::MIME->new($old_raw); + + # make sure we don't compare unwanted headers, since mutt adds + # Content-Length, Status, and Lines headers: + PublicInbox::Import::drop_unwanted_headers($new_mime); + PublicInbox::Import::drop_unwanted_headers($old_mime); + + # allow changing Received: and maybe other headers which can + # contain sensitive info. + my $nhdr = $new_mime->header_obj; + my $ohdr = $old_mime->header_obj; + if (($nhdr->as_string eq $ohdr->as_string) && + (content_id($new_mime) eq content_id($old_mime))) { + warn "No change detected to:\n", show_cmd($ibx, $smsg); + + next unless $opt->{verbose}; + # should we consider this machine-parseable? + PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, []); + next; + } + + foreach my $tuple (@$to_edit) { + $ibx = PublicInbox::InboxWritable->new($tuple->[0]); + $smsg = $tuple->[1]; + my $im = $ibx->importer(0); + my $commits = $im->replace($old_mime, $new_mime); + $im->done; + unless ($commits) { + warn "Failed to replace:\n", show_cmd($ibx, $smsg); + next; + } + next unless $opt->{verbose}; + # should we consider this machine-parseable? + PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits); + } +} diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 25e6cc9b..0705d170 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -7,110 +7,27 @@ use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use PublicInbox::Admin qw(resolve_repo_dir); +use PublicInbox::AdminEdit; PublicInbox::Admin::check_require('-index'); require PublicInbox::Filter::Base; -require PublicInbox::Config; require PublicInbox::MIME; require PublicInbox::V2Writable; { no warnings 'once'; *REJECT = *PublicInbox::Filter::Base::REJECT } my $usage = "$0 [--all] [INBOX_DIRS] new }; -my $cfgfile = PublicInbox::Config::default_file(); -my ($all, $force); -my $verbose = 1; -my %opts = ( - 'all' => \$all, - 'force|f' => \$force, - 'verbose|v!' => \$verbose, -); -GetOptions(%opts) or die "bad command-line args\n", $usage, "\n"; +my $opt = { verbose => 1, all => 0, -min_inbox_version => 2 }; +GetOptions($opt, @PublicInbox::AdminEdit::OPT) or + die "bad command-line args\n$usage\n"; -# TODO: clean this up and share code with -index via ::Admin -my %dir2ibx; # ( path => Inbox object ) -my @inboxes; -$config and $config->each_inbox(sub { - my ($ibx) = @_; - push @inboxes, $ibx if $all && $ibx->{version} != 1; - $dir2ibx{$ibx->{mainrepo}} = $ibx; -}); - -if ($all) { - $config or die "--all specified, but $cfgfile not readable\n"; - @ARGV and die "--all specified, but directories specified\n"; -} else { - my @err; - my @dirs = scalar(@ARGV) ? @ARGV : ('.'); - my $u = 0; - - foreach my $dir (@dirs) { - my $v; - my $dir = resolve_repo_dir($dir, \$v); - if ($v == 1) { - push @err, $dir; - next; - } - my $ibx = $dir2ibx{$dir} ||= do { - warn "$dir not configured in $cfgfile\n"; - $u++; - my $name = "unconfigured-$u"; - PublicInbox::Inbox->new({ - version => 2, - name => $name, - -primary_address => "$name\@example.com", - mainrepo => $dir, - }); - }; - push @inboxes, $ibx; - } - - if (@err) { - die "v1 inboxes currently not supported by -purge\n\t", - join("\n\t", @err), "\n"; - } -} - -foreach my $ibx (@inboxes) { - my $lvl = $ibx->{indexlevel}; - if (defined $lvl) { - PublicInbox::Admin::indexlevel_ok_or_die($lvl); - next; - } - - # Undefined indexlevel, so `full'... - # Search::Xapian exists and the DB can be read, at least, fine - $ibx->search and next; - - # it's possible for a Xapian directory to exist, but Search::Xapian - # to go missing/broken. Make sure it's purged in that case: - $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n"; - - # $ibx->{search} is populated by $ibx->over call - my $xdir_ro = $ibx->{search}->xdir(1); - my $npart = 0; - foreach my $part (<$xdir_ro/*>) { - if (-d $part && $part =~ m!/[0-9]+\z!) { - my $bytes = 0; - $bytes += -s $_ foreach glob("$part/*"); - $npart++ if $bytes; - } - } - if ($npart) { - PublicInbox::Admin::require_or_die('-search'); - } else { - # somebody could "rm -r" all the Xapian directories; - # let them purge the overview, at least - $ibx->{indexlevel} ||= 'basic'; - } -} +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); +PublicInbox::AdminEdit::check_editable(\@ibxs); my $data = do { local $/; scalar }; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; -foreach my $ibx (@inboxes) { +foreach my $ibx (@ibxs) { my $mime = PublicInbox::MIME->new($data); my $v2w = PublicInbox::V2Writable->new($ibx, 0); @@ -127,19 +44,14 @@ foreach my $ibx (@inboxes) { $v2w->done; - if ($verbose) { # should we consider this machine-parseable? - print "$ibx->{mainrepo}:"; - if (scalar @$commits) { - print join("\n\t", '', @$commits), "\n"; - } else { - print " NONE\n"; - } + if ($opt->{verbose}) { # should we consider this machine-parseable? + PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits); } $n_purged += scalar @$commits; } # behave like "rm -f" -exit(0) if ($force || $n_purged); +exit(0) if ($opt->{force} || $n_purged); -warn "Not found\n" if $verbose; +warn "Not found\n" if $opt->{verbose}; exit(1); diff --git a/t/common.perl b/t/common.perl index e49a5965..5a898e32 100644 --- a/t/common.perl +++ b/t/common.perl @@ -3,6 +3,8 @@ use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD); use POSIX qw(dup2); +use strict; +use warnings; sub stream_to_string { my ($res) = @_; @@ -48,7 +50,7 @@ sub require_git ($;$) { my $cur_int = ($cur_maj << 24) | ($cur_min << 16); if ($cur_int < $req_int) { return 0 if $maybe; - plan skip_all => "git $req+ required, have $git_ver"; + plan skip_all => "git $req+ required, have $cur_maj.$cur_min"; } 1; } diff --git a/t/edit.t b/t/edit.t new file mode 100644 index 00000000..6b4e35c3 --- /dev/null +++ b/t/edit.t @@ -0,0 +1,196 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +# edit frontend behavior test (t/replace.t for backend) +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +require './t/common.perl'; +require_git(2.6); +require PublicInbox::Inbox; +require PublicInbox::InboxWritable; +require PublicInbox::Config; +use PublicInbox::MID qw(mid_clean); + +my @mods = qw(IPC::Run DBI DBD::SQLite); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "missing $mod for $0" if $@; +}; +IPC::Run->import(qw(run)); + +my $cmd_pfx = 'blib/script/public-inbox'; +my $tmpdir = tempdir('pi-edit-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $mainrepo = "$tmpdir/v2"; +my $ibx = PublicInbox::Inbox->new({ + mainrepo => $mainrepo, + name => 'test-v2edit', + version => 2, + -primary_address => 'test@example.com', + indexlevel => 'basic', +}); +$ibx = PublicInbox::InboxWritable->new($ibx, {nproc=>1}); +my $cfgfile = "$tmpdir/config"; +local $ENV{PI_CONFIG} = $cfgfile; +my $file = 't/data/0001.patch'; +open my $fh, '<', $file or die "open: $!"; +my $raw = do { local $/; <$fh> }; +my $im = $ibx->importer(0); +my $mime = PublicInbox::MIME->new($raw); +my $mid = mid_clean($mime->header('Message-Id')); +ok($im->add($mime), 'add message to be edited'); +$im->done; +my ($in, $out, $err, $cmd, $cur, $t); +my $__git_dir = "--git-dir=$ibx->{mainrepo}/git/0.git"; + +$t = '-F FILE'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean prefix/bool pfx/'"; + $cmd = [ "$cmd_pfx-edit", "-F$file", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t edit OK"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->header('Subject'), qr/bool pfx/, "$t message edited"); + like($out, qr/[a-f0-9]{40}/, "$t shows commit on success"); +} + +$t = '-m MESSAGE_ID'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t edit OK"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->header('Subject'), qr/boolean prefix/, "$t message edited"); + like($out, qr/[a-f0-9]{40}/, "$t shows commit on success"); +} + +$t = 'no-op -m MESSAGE_ID'; { + $in = $out = $err = ''; + my $before = `git $__git_dir rev-parse HEAD`; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + my $prev = $cur; + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + is_deeply($cur, $prev, "$t makes no change"); + like($cur->header('Subject'), qr/boolean prefix/, + "$t does not change message"); + like($out, qr/NONE/, 'noop shows NONE'); + my $after = `git $__git_dir rev-parse HEAD`; + is($after, $before, 'git head unchanged'); +} + +$t = 'no-op -m MESSAGE_ID w/Status: header'; { # because mutt does it + $in = $out = $err = ''; + my $before = `git $__git_dir rev-parse HEAD`; + local $ENV{MAIL_EDITOR} = + "$^X -i -p -e 's/^Subject:.*/Status: RO\\n\$&/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + my $prev = $cur; + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + is_deeply($cur, $prev, "$t makes no change"); + like($cur->header('Subject'), qr/boolean prefix/, + "$t does not change message"); + is($cur->header('Status'), undef, 'Status header not added'); + like($out, qr/NONE/, 'noop shows NONE'); + my $after = `git $__git_dir rev-parse HEAD`; + is($after, $before, 'git head unchanged'); +} + +$t = '-m MESSAGE_ID can change Received: headers'; { + $in = $out = $err = ''; + my $before = `git $__git_dir rev-parse HEAD`; + local $ENV{MAIL_EDITOR} = + "$^X -i -p -e 's/^Subject:.*/Received: x\\n\$&/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->header('Subject'), qr/boolean prefix/, + "$t does not change Subject"); + is($cur->header('Received'), 'x', 'added Received header'); +} + +$t = '-m miss'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean/FAIL/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid-miss", $mainrepo ]; + ok(!run($cmd, \$in, \$out, \$err), "$t fails on invalid MID"); + like($err, qr/No message found/, "$t shows error"); +} + +$t = 'non-interactive editor failure'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 'END { exit 1 }'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(!run($cmd, \$in, \$out, \$err), "$t detected"); + like($err, qr/END \{ exit 1 \}' failed:/, "$t shows error"); +} + +$t = 'mailEditor set in config'; { + $in = $out = $err = ''; + my $rc = system(qw(git config), "--file=$cfgfile", + 'publicinbox.maileditor', + "$^X -i -p -e 's/boolean prefix/bool pfx/'"); + is($rc, 0, 'set publicinbox.mailEditor'); + local $ENV{MAIL_EDITOR}; + local $ENV{GIT_EDITOR} = 'echo should not run'; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t edited message"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->header('Subject'), qr/bool pfx/, "$t message edited"); + unlike($out, qr/should not run/, 'did not run GIT_EDITOR'); +} + +$t = '--raw and mbox escaping'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^\$/\\nFrom not mbox\\n/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", '--raw', $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->body, qr/^From not mbox/sm, 'put "From " line into body'); + + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^>From not/\$& an/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds with mbox escaping"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->body, qr/^From not an mbox/sm, + 'changed "From " line unescaped'); + + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^From not an mbox\\n//s'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", '--raw', $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds again"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + unlike($cur->body, qr/^From not an mbox/sm, "$t restored body"); +} + +$t = 'reuse Message-ID'; { + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + ok($im->add($mime), "$t and re-add"); + $im->done; + like($warn[0], qr/reused for mismatched content/, "$t got warning"); +} + +$t = 'edit ambiguous Message-ID with -m'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(!run($cmd, \$in, \$out, \$err), "$t fails w/o --force"); + like($err, qr/Multiple messages with different content found matching/, + "$t shows matches"); + like($err, qr/GIT_DIR=.*git show/is, "$t shows git commands"); +} + +$t .= ' and --force'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Subject:x/i'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", '--force', $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + like($err, qr/Will edit all of them/, "$t notes all will be edited"); + my @dump = `git $__git_dir cat-file --batch --batch-all-objects`; + chomp @dump; + is_deeply([grep(/^Subject:/i, @dump)], [qw(Subject:x Subject:x)], + "$t edited both messages"); +} + +done_testing(); diff --git a/t/git.t b/t/git.t index 913f6e5e..9bc8900c 100644 --- a/t/git.t +++ b/t/git.t @@ -33,33 +33,7 @@ use_ok 'PublicInbox::Git'; my $raw = $gcf->cat_file($f); is($x[2], length($$raw), 'length matches'); - { - my $size; - my $rv = $gcf->cat_file($f, sub { - my ($in, $left) = @_; - $size = $$left; - 'nothing' - }); - is($rv, 'nothing', 'returned from callback without reading'); - is($size, $x[2], 'set size for callback correctly'); - } - - eval { $gcf->cat_file($f, sub { die 'OMG' }) }; - like($@, qr/\bOMG\b/, 'died in callback propagated'); is(${$gcf->cat_file($f)}, $$raw, 'not broken after failures'); - - { - my ($buf, $r); - my $rv = $gcf->cat_file($f, sub { - my ($in, $left) = @_; - $r = read($in, $buf, 2); - $$left -= $r; - 'blah' - }); - is($r, 2, 'only read 2 bytes'); - is($buf, '--', 'partial read succeeded'); - is($rv, 'blah', 'return value propagated'); - } is(${$gcf->cat_file($f)}, $$raw, 'not broken after partial read'); } @@ -79,44 +53,12 @@ if (1) { my $gcf = PublicInbox::Git->new($dir); my $rsize; - is($gcf->cat_file($buf, sub { - $rsize = ${$_[1]}; - 'x'; - }), 'x', 'checked input'); - is($rsize, $size, 'got correct size on big file'); - my $x = $gcf->cat_file($buf, \$rsize); is($rsize, $size, 'got correct size ref on big file'); is(length($$x), $size, 'read correct number of bytes'); - my $rline; - $gcf->cat_file($buf, sub { - my ($in, $left) = @_; - $rline = <$in>; - $$left -= length($rline); - }); - { - open my $fh, '<', $big_data or die "open failed: $!\n"; - is($rline, <$fh>, 'first line matches'); - }; - - my $all; - $gcf->cat_file($buf, sub { - my ($in, $left) = @_; - my $x = read($in, $all, $$left); - $$left -= $x; - }); - { - open my $fh, '<', $big_data or die "open failed: $!\n"; - local $/; - is($all, <$fh>, 'entire read matches'); - }; - my $ref = $gcf->qx(qw(cat-file blob), $buf); - is($all, $ref, 'qx read giant single string'); - my @ref = $gcf->qx(qw(cat-file blob), $buf); - is($all, join('', @ref), 'qx returned array when wanted'); my $nl = scalar @ref; ok($nl > 1, "qx returned array length of $nl"); diff --git a/t/nntpd.t b/t/nntpd.t index c73cc122..a95fb6fc 100644 --- a/t/nntpd.t +++ b/t/nntpd.t @@ -231,6 +231,43 @@ EOF ok($date >= $t0, 'valid date after start'); ok($date <= $t1, 'valid date before stop'); } + if ('leafnode interop') { + my $for_leafnode = PublicInbox::MIME->new(<<""); +From: longheader\@example.com +To: $addr +Subject: none +Date: Fri, 02 Oct 1993 00:00:00 +0000 + + my $long_hdr = 'for-leafnode-'.('y'x200).'@example.com'; + $for_leafnode->header_set('Message-ID', "<$long_hdr>"); + $im->add($for_leafnode); + $im->done; + if ($version == 1) { + my $s = PublicInbox::SearchIdx->new($mainrepo, 1); + $s->index_sync; + } + my $hdr = $n->head("<$long_hdr>"); + my $expect = qr/\AMessage-ID: /i . qr/\Q<$long_hdr>\E/; + ok(scalar(grep(/$expect/, @$hdr)), 'Message-ID not folded'); + ok(scalar(grep(/^Path:/, @$hdr)), 'Path: header found'); + + # it's possible for v2 messages to have 2+ Message-IDs, + # but leafnode can't handle it + if ($version != 1) { + my @mids = ("<$long_hdr>", '<2mid@wtf>'); + $for_leafnode->header_set('Message-ID', @mids); + $for_leafnode->body_set('not-a-dupe'); + my $warn = ''; + $SIG{__WARN__} = sub { $warn .= join('', @_) }; + $im->add($for_leafnode); + $im->done; + like($warn, qr/reused/, 'warned for reused MID'); + $hdr = $n->head('<2mid@wtf>'); + my @hmids = grep(/\AMessage-ID: /i, @$hdr); + is(scalar(@hmids), 1, 'Single Message-ID in header'); + like($hmids[0], qr/: <2mid\@wtf>/, 'got expected mid'); + } + } # pipelined requests: { diff --git a/t/purge.t b/t/purge.t index c1e0e9a0..384f32a6 100644 --- a/t/purge.t +++ b/t/purge.t @@ -57,7 +57,7 @@ is($? >> 8, 1, 'missed purge exits with 1'); # a successful case: ok(IPC::Run::run([$purge, $mainrepo], \$raw, \$out, \$err), 'match OK'); -like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted'); +like($out, qr/\b[a-f0-9]{40,}/m, 'removed commit noted'); # add (old) vger filter to config file print $cfg_fh < +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use PublicInbox::MIME; +use PublicInbox::InboxWritable; +use File::Temp qw/tempdir/; +require './t/common.perl'; +require_git(2.6); # replace is v2 only, for now... +foreach my $mod (qw(DBD::SQLite)) { + eval "require $mod"; + plan skip_all => "$mod missing for $0" if $@; +} + +sub test_replace ($$$) { + my ($v, $level, $opt) = @_; + diag "v$v $level replace"; + my $this = "pi-$v-$level-replace"; + my $tmpdir = tempdir("$this-tmp-XXXXXX", TMPDIR => 1, CLEANUP => 1); + my $ibx = PublicInbox::Inbox->new({ + mainrepo => "$tmpdir/testbox", + name => $this, + version => $v, + -primary_address => 'test@example.com', + indexlevel => $level, + }); + + my $orig = PublicInbox::MIME->new(<<'EOF'); +From: Barbra Streisand +To: test@example.com +Subject: confidential +Message-ID: +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +Top secret info about my house in Malibu... +EOF + my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer; + # fake a bunch of epochs + $im->{rotate_bytes} = $opt->{rotate_bytes} if $opt->{rotate_bytes}; + + if ($opt->{pre}) { + $opt->{pre}->($im, 1, 2); + $orig->header_set('References', '<1@example.com>'); + } + ok($im->add($orig), 'add message to be replaced'); + if ($opt->{post}) { + $opt->{post}->($im, 3, { 4 => 'replace@example.com' }); + } + $im->done; + my $thread_a = $ibx->over->get_thread('replace@example.com'); + + my %before = map {; delete($_->{blob}) => $_ } @{$ibx->recent}; + my $reject = PublicInbox::MIME->new($orig->as_string); + foreach my $mid (['', ''], + [], ['']) { + $reject->header_set('Message-ID', @$mid); + my $ok = eval { $im->replace($orig, $reject) }; + like($@, qr/Message-ID.*may not be changed/, + '->replace died on Message-ID change'); + ok(!$ok, 'no replacement happened'); + } + + # prepare the replacement + my $expect = "Move along, nothing to see here\n"; + my $repl = PublicInbox::MIME->new($orig->as_string); + $repl->header_set('From', ''); + $repl->header_set('Subject', 'redacted'); + $repl->header_set('Date', 'Sat, 02 Oct 2010 00:00:00 +0000'); + $repl->body_str_set($expect); + + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + ok(my $cmts = $im->replace($orig, $repl), 'replaced message'); + my $changed_epochs = 0; + for my $tip (@$cmts) { + next if !defined $tip; + $changed_epochs++; + like($tip, qr/\A[a-f0-9]{40}\z/, + 'replace returned current commit'); + } + is($changed_epochs, 1, 'only one epoch changed'); + + $im->done; + my $m = PublicInbox::MIME->new($ibx->msg_by_mid('replace@example.com')); + is($m->body, $expect, 'replaced message'); + is_deeply(\@warn, [], 'no warnings on noop'); + + my @cat = qw(cat-file --buffer --batch --batch-all-objects); + my $git = $ibx->git; + my @all = $git->qx(@cat); + is_deeply([grep(/confidential/, @all)], [], 'nothing confidential'); + is_deeply([grep(/Streisand/, @all)], [], 'Streisand who?'); + is_deeply([grep(/\bOct 1993\b/, @all)], [], 'nothing from Oct 1993'); + my $t19931002 = qr/ 749520000 /; + is_deeply([grep(/$t19931002/, @all)], [], "nothing matches $t19931002"); + + for my $dir (glob("$ibx->{mainrepo}/git/*.git")) { + my ($bn) = ($dir =~ m!([^/]+)\z!); + is(system(qw(git --git-dir), $dir, qw(fsck --strict)), 0, + "git fsck is clean in epoch $bn"); + } + + my $thread_b = $ibx->over->get_thread('replace@example.com'); + is_deeply([sort map { $_->{mid} } @$thread_b], + [sort map { $_->{mid} } @$thread_a], 'threading preserved'); + + if (my $srch = $ibx->search) { + for my $q ('f:streisand', 's:confidential', 'malibu') { + my $msgs = $srch->query($q); + is_deeply($msgs, [], "no match for $q"); + } + my @ok = ('f:redactor', 's:redacted', 'nothing to see'); + if ($opt->{pre}) { + push @ok, 'm:1@example.com', 'm:2@example.com', + 's:message2', 's:message1'; + } + if ($opt->{post}) { + push @ok, 'm:3@example.com', 'm:4@example.com', + 's:message3', 's:message4'; + } + for my $q (@ok) { + my $msgs = $srch->query($q); + ok($msgs->[0], "got match for $q"); + } + } + + # check overview matches: + my %after = map {; delete($_->{blob}) => $_ } @{$ibx->recent}; + my @before_blobs = keys %before; + foreach my $blob (@before_blobs) { + delete $before{$blob} if delete $after{$blob}; + } + + is(scalar keys %before, 1, 'one unique blob from before left'); + is(scalar keys %after, 1, 'one unique blob from after left'); + foreach my $blob (keys %before) { + is($git->check($blob), undef, 'old blob not found'); + my $smsg = $before{$blob}; + is($smsg->{subject}, 'confidential', 'before subject'); + is($smsg->{mid}, 'replace@example.com', 'before MID'); + } + foreach my $blob (keys %after) { + ok($git->check($blob), 'new blob found'); + my $smsg = $after{$blob}; + is($smsg->{subject}, 'redacted', 'after subject'); + is($smsg->{mid}, 'replace@example.com', 'before MID'); + } + @warn = (); + is($im->replace($orig, $repl), undef, 'no-op replace returns undef'); + is($im->purge($orig), undef, 'no-op purge returns undef'); + is_deeply(\@warn, [], 'no warnings on noop'); +} + +sub pad_msgs { + my ($im, @range) = @_; + for my $i (@range) { + my $irt; + if (ref($i) eq 'HASH') { + ($i, $irt) = each %$i; + } + my $sec = sprintf('%0d', $i); + my $mime = PublicInbox::MIME->new(< +Date: Fri, 02, Jan 1970 00:00:$sec +0000 +Subject: message$i + +message number$i +EOF + + if (defined($irt)) { + $mime->header_set('References', "<$irt>"); + } + + $im->add($mime); + } +} + +my $opt = { pre => *pad_msgs }; +test_replace(2, 'basic', {}); +test_replace(2, 'basic', $opt); +test_replace(2, 'basic', $opt = { %$opt, post => *pad_msgs }); +test_replace(2, 'basic', $opt = { %$opt, rotate_bytes => 1 }); + +SKIP: if ('test xapian') { + require PublicInbox::Search; + PublicInbox::Search::load_xapian() or skip 'Search::Xapian missing', 8; + for my $l (qw(medium)) { + test_replace(2, $l, {}); + $opt = { pre => *pad_msgs }; + test_replace(2, $l, $opt); + test_replace(2, $l, $opt = { %$opt, post => *pad_msgs }); + test_replace(2, $l, $opt = { %$opt, rotate_bytes => 1 }); + } +}; + +done_testing();