From: Eric Wong Date: Thu, 31 Jan 2019 21:08:48 +0000 (+0000) Subject: Merge remote-tracking branch 'origin/purge' X-Git-Tag: v1.2.0~382 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=65323f060a3db731bb9fafa004336eeb4bbb8f00;hp=ce4fe8f5144f7555ddd42b6a94ec602e042c6e43 Merge remote-tracking branch 'origin/purge' * origin/purge: implement public-inbox-purge tool v2writable: read epoch on purge v2writable: cleanup processes when done v2writable: purge ignores non-existent git epoch directories v2writable: ->purge returns undef on no-op import: purge: reap fast-export process hoist out resolve_repo_dir from -index --- diff --git a/MANIFEST b/MANIFEST index e6272066..c4a9349f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -55,6 +55,7 @@ examples/unsubscribe.milter examples/unsubscribe.psgi examples/varnish-4.vcl lib/PublicInbox/Address.pm +lib/PublicInbox/Admin.pm lib/PublicInbox/AltId.pm lib/PublicInbox/Config.pm lib/PublicInbox/ContentId.pm @@ -137,6 +138,7 @@ script/public-inbox-init script/public-inbox-learn script/public-inbox-mda script/public-inbox-nntpd +script/public-inbox-purge script/public-inbox-watch script/public-inbox.cgi scripts/dc-dlvr @@ -151,6 +153,7 @@ scripts/slrnspool2maildir scripts/ssoma-replay scripts/xhdr-num2mid t/address.t +t/admin.t t/altid.t t/altid_v2.t t/cgi.t @@ -208,6 +211,7 @@ t/psgi_multipart_not.t t/psgi_search.t t/psgi_text.t t/psgi_v2.t +t/purge.t t/qspawn.t t/reply.t t/search-thr-index.t diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm new file mode 100644 index 00000000..d0a8dd00 --- /dev/null +++ b/lib/PublicInbox/Admin.pm @@ -0,0 +1,44 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ + +# common stuff for administrative command-line tools +# Unstable internal API +package PublicInbox::Admin; +use strict; +use warnings; +use Cwd 'abs_path'; +use base qw(Exporter); +our @EXPORT_OK = qw(resolve_repo_dir); + +sub resolve_repo_dir { + my ($cd, $ver) = @_; + my $prefix = defined $cd ? $cd : './'; + if (-d $prefix && -f "$prefix/inbox.lock") { # v2 + $$ver = 2 if $ver; + return abs_path($prefix); + } + + my @cmd = qw(git rev-parse --git-dir); + my $cmd = join(' ', @cmd); + my $pid = open my $fh, '-|'; + defined $pid or die "forking $cmd failed: $!\n"; + if ($pid == 0) { + if (defined $cd) { + chdir $cd or die "chdir $cd failed: $!\n"; + } + exec @cmd; + die "Failed to exec $cmd: $!\n"; + } else { + my $dir = eval { + local $/; + <$fh>; + }; + close $fh or die "error in $cmd: $!\n"; + chomp $dir; + $$ver = 1 if $ver; + return abs_path($cd) if ($dir eq '.' && defined $cd); + abs_path($dir); + } +} + +1; diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index fd4255cf..7e596abc 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -495,7 +495,7 @@ sub purge_oids { my $old = $self->{'ref'}; my $git = $self->{git}; my @export = (qw(fast-export --no-data --use-done-feature), $old); - my ($rd, $pid) = $git->popen(@export); + my $rd = $git->popen(@export); my ($r, $w) = $self->gfi_start; my @buf; my $npurge = 0; @@ -550,6 +550,7 @@ sub purge_oids { push @buf, $_; } } + close $rd or die "close fast-export failed: $?"; if (@buf) { $w->print(@buf) or wfail; } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 222df5c2..1f17fe21 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -285,10 +285,19 @@ sub purge_oids { $self->done; my $pfx = "$self->{-inbox}->{mainrepo}/git"; my $purges = []; - foreach my $i (0..$self->{epoch_max}) { - my $git = PublicInbox::Git->new("$pfx/$i.git"); + my $max = $self->{epoch_max}; + + unless (defined($max)) { + defined(my $latest = git_dir_latest($self, \$max)) or return; + $self->{epoch_max} = $max; + } + foreach my $i (0..$max) { + my $git_dir = "$pfx/$i.git"; + -d $git_dir or next; + my $git = PublicInbox::Git->new($git_dir); my $im = $self->import_init($git, 0, 1); $purges->[$i] = $im->purge_oids($purge); + $im->done; } $purges; } @@ -390,7 +399,7 @@ sub purge { my ($self, $mime) = @_; my $purges = $self->{-inbox}->with_umask(sub { remove_internal($self, $mime, undef, {}); - }); + }) or return; $self->idx_init if @$purges; # ->done is called on purges for my $i (0..$#$purges) { defined(my $cmt = $purges->[$i]) or next; @@ -497,6 +506,7 @@ sub done { delete $self->{bnote}; $self->{transact_bytes} = 0; $self->lock_release if $parts; + $self->{-inbox}->git->cleanup; } sub git_init { diff --git a/script/public-inbox-index b/script/public-inbox-index index 2ae92757..5adb6e74 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -9,9 +9,10 @@ use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use Cwd 'abs_path'; my $usage = "public-inbox-index REPO_DIR"; use PublicInbox::Config; +use PublicInbox::Admin qw(resolve_repo_dir); + my $config = eval { PublicInbox::Config->new } || eval { warn "public-inbox unconfigured for serving, indexing anyways...\n"; undef; @@ -35,35 +36,6 @@ die "--jobs must be positive\n" if defined $jobs && $jobs < 0; my @dirs; -sub resolve_repo_dir { - my ($cd) = @_; - my $prefix = defined $cd ? $cd : './'; - if (-d $prefix && -f "$prefix/inbox.lock") { # v2 - return abs_path($prefix); - } - - my @cmd = qw(git rev-parse --git-dir); - my $cmd = join(' ', @cmd); - my $pid = open my $fh, '-|'; - defined $pid or die "forking $cmd failed: $!\n"; - if ($pid == 0) { - if (defined $cd) { - chdir $cd or die "chdir $cd failed: $!\n"; - } - exec @cmd; - die "Failed to exec $cmd: $!\n"; - } else { - my $dir = eval { - local $/; - <$fh>; - }; - close $fh or die "error in $cmd: $!\n"; - chomp $dir; - return abs_path($cd) if ($dir eq '.' && defined $cd); - abs_path($dir); - } -} - if (@ARGV) { @dirs = map { resolve_repo_dir($_) } @ARGV; } else { diff --git a/script/public-inbox-purge b/script/public-inbox-purge new file mode 100755 index 00000000..688dd950 --- /dev/null +++ b/script/public-inbox-purge @@ -0,0 +1,111 @@ +#!/usr/bin/perl -w +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +# +# Used for purging messages entirely from a public-inbox. Currently +# supports v2 inboxes only, for now. +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::Config; +use PublicInbox::MIME; +use PublicInbox::Admin qw(resolve_repo_dir); +use PublicInbox::Filter::Base; +*REJECT = *PublicInbox::Filter::Base::REJECT; + +my $usage = "$0 [--all] [INBOX_DIRS] new }; +my $cfgfile = PublicInbox::Config::default_file(); +my ($all, $force); +my $verbose = 1; +my %opts = ( + 'all' => \$all, + 'force|f' => \$force, + 'verbose|v!' => \$verbose, +); +GetOptions(%opts) or die "bad command-line args\n", $usage, "\n"; + +# TODO: clean this up and share code with -index via ::Admin +my %dir2ibx; # ( path => Inbox object ) +my @inboxes; +$config and $config->each_inbox(sub { + my ($ibx) = @_; + push @inboxes, $ibx if $all && $ibx->{version} != 1; + $dir2ibx{$ibx->{mainrepo}} = $ibx; +}); + +if ($all) { + $config or die "--all specified, but $cfgfile not readable\n"; + @ARGV and die "--all specified, but directories specified\n"; +} else { + my @err; + my @dirs = scalar(@ARGV) ? @ARGV : ('.'); + my $u = 0; + + foreach my $dir (@dirs) { + my $v; + my $dir = resolve_repo_dir($dir, \$v); + if ($v == 1) { + push @err, $dir; + next; + } + my $ibx = $dir2ibx{$dir} ||= do { + warn "$dir not configured in $cfgfile\n"; + $u++; + my $name = "unconfigured-$u"; + PublicInbox::Inbox->new({ + version => 2, + name => $name, + -primary_address => "$name\@example.com", + mainrepo => $dir, + }); + }; + push @inboxes, $ibx; + } + + if (@err) { + die "v1 inboxes currently not supported by -purge\n\t", + join("\n\t", @err), "\n"; + } +} + +my $data = do { local $/; scalar }; +$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $n_purged = 0; + +foreach my $ibx (@inboxes) { + my $mime = PublicInbox::MIME->new($data); + my $v2w = PublicInbox::V2Writable->new($ibx, 0); + + my $commits = $v2w->purge($mime) || []; + + if (my $scrub = $ibx->filter($v2w)) { + my $scrubbed = $scrub->scrub($mime, 1); + + if ($scrubbed && $scrubbed != REJECT()) { + my $scrub_commits = $v2w->purge($scrubbed); + push @$commits, @$scrub_commits if $scrub_commits; + } + } + + $v2w->done; + + if ($verbose) { # should we consider this machine-parseable? + print "$ibx->{mainrepo}:"; + if (scalar @$commits) { + print join("\n\t", '', @$commits), "\n"; + } else { + print " NONE\n"; + } + } + $n_purged += scalar @$commits; +} + +# behave like "rm -f" +exit(0) if ($force || $n_purged); + +warn "Not found\n" if $verbose; +exit(1); diff --git a/t/admin.t b/t/admin.t new file mode 100644 index 00000000..cc1e65d1 --- /dev/null +++ b/t/admin.t @@ -0,0 +1,81 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw(tempdir); +# use Cwd qw(getcwd); +use_ok 'PublicInbox::Admin', qw(resolve_repo_dir); +my $tmpdir = tempdir('pi-admin.XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $git_dir = "$tmpdir/v1"; +my $v2_dir = "$tmpdir/v2"; +my ($res, $err, $v); + +is(0, system(qw(git init -q --bare), $git_dir), 'git init v1'); + +# v1 +is(resolve_repo_dir($git_dir), $git_dir, 'top-level GIT_DIR resolved'); +is(resolve_repo_dir("$git_dir/objects"), $git_dir, 'GIT_DIR/objects resolved'); + +ok(chdir($git_dir), 'chdir GIT_DIR works'); +is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR'); + +ok(chdir("$git_dir/objects"), 'chdir GIT_DIR/objects works'); +is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR'); +$res = resolve_repo_dir(undef, \$v); +is($v, 1, 'version 1 detected'); +is($res, $git_dir, 'detects directory along with version'); + +# $tmpdir could be inside a git working, directory, so we test '/' +SKIP: { + my $no_vcs_dir = '/'; + # do people version-control "/"? + skip "$no_vcs_dir is version controlled by git", 4 if -d '/.git'; + open my $null, '>', '/dev/null' or die "open /dev/null: $!"; + open my $olderr, '>&', \*STDERR or die "dup stderr: $!"; + + ok(chdir($no_vcs_dir), 'chdir to a non-inbox'); + open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!"; + $res = eval { resolve_repo_dir() }; + open STDERR, '>&', $olderr or die "restore stderr: $!"; + is($res, undef, 'fails inside non-version-controlled dir'); + + ok(chdir($tmpdir), 'back to test-specific $tmpdir'); + open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!"; + $res = eval { resolve_repo_dir($no_vcs_dir) }; + $err = $@; + open STDERR, '>&', $olderr or die "restore stderr: $!"; + is($res, undef, 'fails on non-version-controlled dir'); + ok($err, '$@ set on failure'); +} + +# v2 +SKIP: { + for my $m (qw(DBD::SQLite Search::Xapian)) { + skip "$m missing", 5 unless eval "require $m"; + } + use_ok 'PublicInbox::V2Writable'; + use_ok 'PublicInbox::Inbox'; + my $ibx = PublicInbox::Inbox->new({ + mainrepo => $v2_dir, + name => 'test-v2writable', + version => 2, + -primary_address => 'test@example.com', + indexlevel => 'basic', + }); + PublicInbox::V2Writable->new($ibx, 1)->idx_init; + + ok(-e "$v2_dir/inbox.lock", 'exists'); + is(resolve_repo_dir($v2_dir), $v2_dir, + 'resolve_repo_dir works on v2_dir'); + ok(chdir($v2_dir), 'chdir v2_dir OK'); + is(resolve_repo_dir(), $v2_dir, 'resolve_repo_dir works inside v2_dir'); + $res = resolve_repo_dir(undef, \$v); + is($v, 2, 'version 2 detected'); + is($res, $v2_dir, 'detects directory along with version'); + + # TODO: should work from inside Xapian dirs, and git dirs, here... +} + +chdir '/'; +done_testing(); diff --git a/t/purge.t b/t/purge.t new file mode 100644 index 00000000..94060055 --- /dev/null +++ b/t/purge.t @@ -0,0 +1,97 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +require './t/common.perl'; +require_git(2.6); +my @mods = qw(IPC::Run DBI DBD::SQLite Search::Xapian); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "missing $_ for t/purge.t" if $@; +}; +use Cwd qw(abs_path); +my $purge = abs_path('blib/script/public-inbox-purge'); +my $tmpdir = tempdir('pi-purge-XXXXXX', TMPDIR => 1, CLEANUP => 1); +use_ok 'PublicInbox::V2Writable'; +my $mainrepo = "$tmpdir/v2"; +my $ibx = PublicInbox::Inbox->new({ + mainrepo => $mainrepo, + name => 'test-v2purge', + version => 2, + -primary_address => 'test@example.com', + indexlevel => 'basic', +}); + +my $raw = <<'EOF'; +From: a@example.com +To: test@example.com +Subject: this is a subject +Message-ID: +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +Hello World + +EOF + +local $ENV{NPROC} = '1'; +my $cfgfile = "$tmpdir/config"; +local $ENV{PI_CONFIG} = $cfgfile; +open my $cfg_fh, '>', $cfgfile or die "open: $!"; + +my $v2w = PublicInbox::V2Writable->new($ibx, 1); +my $mime = PublicInbox::MIME->new($raw); +ok($v2w->add($mime), 'add message to be purged'); +$v2w->done; + +# failing cases, first: +my $in = "$raw\nMOAR\n"; +my ($out, $err) = ('', ''); +ok(IPC::Run::run([$purge, '-f', $mainrepo], \$in, \$out, \$err), + 'purge -f OK'); + +$out = $err = ''; +ok(!IPC::Run::run([$purge, $mainrepo], \$in, \$out, \$err), + 'mismatch fails without -f'); +is($? >> 8, 1, 'missed purge exits with 1'); + +# a successful case: +ok(IPC::Run::run([$purge, $mainrepo], \$raw, \$out, \$err), 'match OK'); +like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted'); + +# add (old) vger filter to config file +print $cfg_fh <add($mime), 'add vger-signatured message to be purged'); +$v2w->done; + +my $pre_scrub = $raw . <<'EOF'; + +-- +To unsubscribe from this list: send the line "unsubscribe linux-kernel" in +the body of a message to majordomo@vger.kernel.org +More majordomo info at http://vger.kernel.org/majordomo-info.html +Please read the FAQ at http://www.tux.org/lkml/ +EOF + +$out = $err = ''; +ok(chdir('/'), "chdir / OK for --all test"); +ok(IPC::Run::run([$purge, '--all'], \$pre_scrub, \$out, \$err), + 'scrub purge OK'); +like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted'); +# diag "out: $out"; diag "err: $err"; + +$out = $err = ''; +ok(!IPC::Run::run([$purge, '--all' ], \$pre_scrub, \$out, \$err), + 'scrub purge not idempotent without -f'); +# diag "out: $out"; diag "err: $err"; + +done_testing(); diff --git a/t/v2writable.t b/t/v2writable.t index ec9f56d9..f1714175 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -247,6 +247,9 @@ EOF ok(my $cmts = $im->purge($mime), 'purged message'); like($cmts->[0], qr/\A[a-f0-9]{40}\z/, 'purge returned current commit'); $im->done; + + # again + is($im->purge($mime), undef, 'no-op returns undef'); } {