From: Eric Wong Date: Fri, 14 Jun 2019 16:23:00 +0000 (+0000) Subject: Merge remote-tracking branch 'origin/edit' into next X-Git-Tag: v1.2.0~179 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=d9aad3a7988262b31ab2171f2c982daf04ac822c;hp=3fd2143efc1c96f0000af2ae7a6dee0990337fc4 Merge remote-tracking branch 'origin/edit' into next * origin/edit: edit: unlink temporary file when done v2writable: replace: kill git processes before reindexing edit: drop unwanted headers before noop check edit|purge: improve output on rewrites edit: new tool to perform edits doc: document the --prune option for -index admin: expose ->config AdminEdit: move editability checks from -purge admin: beef up resolve_inboxes to handle purge options purge: start moving common options to AdminEdit module admin: remove warning arg for unconfigured inboxes v2writable: implement ->replace call import: switch to "replace_oids" interface for purge import: extract_author_info becomes extract_commit_info v2writable: consolidate overview and indexing call --- diff --git a/Documentation/include.mk b/Documentation/include.mk index f5f46d0b..8501adc8 100644 --- a/Documentation/include.mk +++ b/Documentation/include.mk @@ -32,6 +32,7 @@ podtext = $(PODTEXT) $(PODTEXT_OPTS) # MakeMaker only seems to support manpage sections 1 and 3... m1 = m1 += public-inbox-compact +m1 += public-inbox-convert m1 += public-inbox-edit m1 += public-inbox-httpd m1 += public-inbox-index @@ -151,7 +152,7 @@ rsync-doc: $(RSYNC) --chmod=Fugo=r -av $(rsync_docs) $(rsync_xdocs) $(RSYNC_DEST) clean-doc: - $(RM) $(man1) $(man5) $(man7) $(gz_docs) $(docs_html) $(mantxt) + $(RM) $(man1) $(man5) $(man7) $(man8) $(gz_docs) $(docs_html) $(mantxt) $(RM) $(gz_xdocs) $(xdocs_html) $(xdocs) clean :: clean-doc diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 7679376c..610dacbe 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -59,8 +59,6 @@ C<$GIT_DIR/public-inbox/> directory. v2 repositories are described in L. -=back - =head1 ENVIRONMENT =over 8 diff --git a/INSTALL b/INSTALL index 0246299b..a661c776 100644 --- a/INSTALL +++ b/INSTALL @@ -36,6 +36,9 @@ Beyond that, there is a long list of Perl modules required, starting with: pkg: p5-TimeDate rpm: perl-TimeDate +* Digest::SHA typically installed with Perl + rpm: perl-Digest-SHA + * Email::MIME deb: libemail-mime-perl pkg: p5-Email-MIME rpm: perl-Email-MIME @@ -132,8 +135,8 @@ above, so there is no need to explicitly install them: (for public-inbox-watch, pulled in by Plack) - IO::Compress::Gzip deb: perl-modules (or libio-compress-perl) - rpm: perl-PerlIO-gzip pkg: perl5 + rpm: perl-IO-Compress (for gzipped mbox over HTTP) Uncommonly needed modules: @@ -156,18 +159,25 @@ Optional packages testing and development: pkg: p5-IPC-Run rpm: perl-IPC-Run +- Plack::Test deb: libplack-test-perl + pkg: p5-Plack + rpm: perl-Plack-Test + +- Test::Simple deb: perl-modules-5.$MINOR + pkg: perl5 + rpm: perl-Test-Simple + - XML::Feed deb: libxml-feed-perl pkg: p5-XML-Feed rpm: perl-XML-Feed -- Test::HTTP::Server::Simple deb: libtest-http-server-simple-perl - pkg: p5-Test-HTTP-Server-Simple - rpm: perl-Test-HTTP-Server-Simple - (XXX is this really needed?) - standard MakeMaker installation (Perl) -------------------------------------- +To use MakeMaker, you need to ensure ExtUtils::MakeMaker is available. +This is typically installed with Perl, but RPM-based systems will likely +need to install the `perl-ExtUtils-MakeMaker' package. + Once the dependencies are installed, you should be able to build and install the system (into /usr/local) with: diff --git a/Makefile.PL b/Makefile.PL index b1274ad1..113f8c77 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -28,6 +28,11 @@ WriteMakefile( # We also depend on git. # Keep this sorted and synced to the INSTALL document 'Date::Parse' => 0, + + # libperl$PERL_VERSION, + # `perl5' on FreeBSD + # perl-Digest-SHA on RH-based + 'Digest::SHA' => 0, 'Email::MIME' => 0, # the following should be pulled in by Email::MIME: @@ -44,6 +49,14 @@ WriteMakefile( # We have more test dependencies, but do not force # users to install them. See INSTALL + + # All Perl installs I know about have these, but RH-based + # distros make them separate even though 'perl' pulls them in + 'File::Path' => 0, + 'File::Temp' => 0, + 'Getopt::Long' => 0, + 'Exporter' => 0, + # ExtUtils::MakeMaker # this file won't run w/o it... }, MAN3PODS => \%man3, ); @@ -57,14 +70,17 @@ N = \$\$(( \$\$(nproc 2>/dev/null || gnproc 2>/dev/null || echo 2) + 1 )) -include config.mak -include Documentation/include.mk SCRIPTS := scripts/ssoma-replay -my_syntax := \$(addsuffix .syntax, $PM_FILES \$(EXE_FILES) \$(SCRIPTS)) - +syn_files := $PM_FILES \$(EXE_FILES) \$(SCRIPTS) +my_syntax := \$(addsuffix .syntax, \$(syn_files)) +changed = \$(shell git ls-files -m) %.syntax :: - @\$(PERL) -I lib -c \$(subst .syntax,,\$@) + @\$(PERL) -w -I lib -c \$(subst .syntax,,\$@) syntax:: \$(my_syntax) +dsyn :: \$(addsuffix .syntax, \$(filter \$(changed), \$(syn_files))) + check-manifest :: MANIFEST if git ls-files >\$?.gen 2>&1; then diff -u \$? \$?.gen; fi diff --git a/ci/deps.perl b/ci/deps.perl index faca4590..62870c1f 100755 --- a/ci/deps.perl +++ b/ci/deps.perl @@ -9,9 +9,11 @@ my $usage = "$0 PKG_FMT PROFILE [PROFILE_MOD]"; my $pkg_fmt = shift; @ARGV or die $usage, "\n"; +my @test_essential = qw(Test::Simple Plack::Test); + # package profiles my $profiles = { - # the smallest possible profile + # the smallest possible profile for testing # TODO: trim this, Plack pulls in Filesys::Notify::Simple, # and we don't need that for mda-only installs essential => [ qw( @@ -19,14 +21,16 @@ my $profiles = { perl Date::Parse Devel::Peek + Digest::SHA Email::Simple Email::MIME Email::MIME::ContentType Encode + ExtUtils::MakeMaker Filesys::Notify::Simple Plack URI::Escape - ) ], + ), @test_essential ], # everything optional for normal use optional => [ qw( @@ -44,10 +48,9 @@ my $profiles = { xapian-compact ) ], - # developer stuff + # optional developer stuff devtest => [ qw( IPC::Run - Test::HTTP::Server::Simple XML::Feed curl w3m @@ -86,22 +89,41 @@ my $non_auto = { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', }, + 'Digest::SHA' => { + deb => 'perl', # libperl5.XX, but the XX varies + pkg => 'perl5', + }, 'Encode' => { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', rpm => 'perl-Encode', }, + 'ExtUtils::MakeMaker' => { + deb => 'perl', # perl-modules-5.xx + pkg => 'perl5', + rpm => 'perl-ExtUtils-MakeMaker', + }, 'IO::Compress::Gzip' => { deb => 'perl', # perl-modules-5.xx pkg => 'perl5', - rpm => 'perl-PerlIO-gzip', + rpm => 'perl-IO-Compress', }, 'DBD::SQLite' => { deb => 'libdbd-sqlite3-perl' }, + 'Plack::Test' => { + deb => 'libplack-perl', + pkg => 'p5-Plack', + rpm => 'perl-Plack-Test', + }, 'URI::Escape' => { deb => 'liburi-perl', pkg => 'p5-URI', rpm => 'perl-URI', }, + 'Test::Simple' => { + deb => 'perl', # perl-modules-5.XX, but the XX varies + pkg => 'perl5', + rpm => 'perl-Test-Simple', + }, 'highlight.pm' => { deb => 'libhighlight-perl', pkg => [], diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 68445b3c..6a87661c 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -145,41 +145,24 @@ again: fail($self, "Unexpected result from git cat-file: $head"); my $size = $1; - my $ref_type = $ref ? ref($ref) : ''; - my $rv; my $left = $size; - $$ref = $size if ($ref_type eq 'SCALAR'); - my $cb_err; - - if ($ref_type eq 'CODE') { - $rv = eval { $ref->($in, \$left) }; - $cb_err = $@; - # drain the rest - my $max = 8192; - while ($left > 0) { - my $r = read($in, my $x, $left > $max ? $max : $left); - defined($r) or fail($self, "read failed: $!"); - $r == 0 and fail($self, 'exited unexpectedly'); - $left -= $r; - } - } else { - my $offset = 0; - my $buf = ''; - while ($left > 0) { - my $r = read($in, $buf, $left, $offset); - defined($r) or fail($self, "read failed: $!"); - $r == 0 and fail($self, 'exited unexpectedly'); - $left -= $r; - $offset += $r; - } - $rv = \$buf; + $$ref = $size if $ref; + + my $offset = 0; + my $buf = ''; + while ($left > 0) { + my $r = read($in, $buf, $left, $offset); + defined($r) or fail($self, "read failed: $!"); + $r == 0 and fail($self, 'exited unexpectedly'); + $left -= $r; + $offset += $r; } + $rv = \$buf; - my $r = read($in, my $buf, 1); + my $r = read($in, my $lf, 1); defined($r) or fail($self, "read failed: $!"); - fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n"); - die $cb_err if $cb_err; + fail($self, 'newline missing after blob') if ($r != 1 || $lf ne "\n"); $rv; } diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index be80560f..8a31b910 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -434,6 +434,26 @@ sub xref ($$$$) { sub set_nntp_headers ($$$$$) { my ($self, $hdr, $ng, $n, $mid) = @_; + # why? leafnode requires a Path: header for some inexplicable + # reason. We'll fake the shortest one possible. + $hdr->header_set('Path', 'y'); + + # leafnode (and maybe other NNTP clients) have trouble dealing + # with v2 messages which have multiple Message-IDs (either due + # to our own content-based dedupe or buggy git-send-email versions). + my @mids = $hdr->header('Message-ID'); + if (scalar(@mids) > 1) { + my $mid0 = "<$mid>"; + $hdr->header_set('Message-ID', $mid0); + my @alt = $hdr->header('X-Alt-Message-ID'); + my %seen = map { $_ => 1 } (@alt, $mid0); + foreach my $m (@mids) { + next if $seen{$m}++; + push @alt, $m; + } + $hdr->header_set('X-Alt-Message-ID', @alt); + } + # clobber some my $xref = xref($self, $ng, $n, $mid); $hdr->header_set('Xref', $xref); @@ -515,6 +535,13 @@ sub _header ($) { my $hdr = $_[0]->header_obj->as_string; utf8::encode($hdr); $hdr =~ s/(?{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag); + my $xdb = eval { Search::Xapian::WritableDatabase->new($dir, $flag) }; + if ($@) { + die "Failed opening $dir: ", $@; + } + $self->{xdb} = $xdb; } sub add_val ($$$) { diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 5f3c8af8..96a26b15 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -25,12 +25,6 @@ sub wrap { bless { mid => $mid }, $class; } -sub get { - my ($class, $head, $db, $mid) = @_; - my $doc_id = $head->get_docid; - load_expand(wrap($class, $mid), $db->get_document($doc_id)); -} - sub get_val ($$) { my ($doc, $col) = @_; Search::Xapian::sortable_unserialise($doc->get_value($col)); diff --git a/t/common.perl b/t/common.perl index e49a5965..5a898e32 100644 --- a/t/common.perl +++ b/t/common.perl @@ -3,6 +3,8 @@ use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD); use POSIX qw(dup2); +use strict; +use warnings; sub stream_to_string { my ($res) = @_; @@ -48,7 +50,7 @@ sub require_git ($;$) { my $cur_int = ($cur_maj << 24) | ($cur_min << 16); if ($cur_int < $req_int) { return 0 if $maybe; - plan skip_all => "git $req+ required, have $git_ver"; + plan skip_all => "git $req+ required, have $cur_maj.$cur_min"; } 1; } diff --git a/t/git.t b/t/git.t index 913f6e5e..9bc8900c 100644 --- a/t/git.t +++ b/t/git.t @@ -33,33 +33,7 @@ use_ok 'PublicInbox::Git'; my $raw = $gcf->cat_file($f); is($x[2], length($$raw), 'length matches'); - { - my $size; - my $rv = $gcf->cat_file($f, sub { - my ($in, $left) = @_; - $size = $$left; - 'nothing' - }); - is($rv, 'nothing', 'returned from callback without reading'); - is($size, $x[2], 'set size for callback correctly'); - } - - eval { $gcf->cat_file($f, sub { die 'OMG' }) }; - like($@, qr/\bOMG\b/, 'died in callback propagated'); is(${$gcf->cat_file($f)}, $$raw, 'not broken after failures'); - - { - my ($buf, $r); - my $rv = $gcf->cat_file($f, sub { - my ($in, $left) = @_; - $r = read($in, $buf, 2); - $$left -= $r; - 'blah' - }); - is($r, 2, 'only read 2 bytes'); - is($buf, '--', 'partial read succeeded'); - is($rv, 'blah', 'return value propagated'); - } is(${$gcf->cat_file($f)}, $$raw, 'not broken after partial read'); } @@ -79,44 +53,12 @@ if (1) { my $gcf = PublicInbox::Git->new($dir); my $rsize; - is($gcf->cat_file($buf, sub { - $rsize = ${$_[1]}; - 'x'; - }), 'x', 'checked input'); - is($rsize, $size, 'got correct size on big file'); - my $x = $gcf->cat_file($buf, \$rsize); is($rsize, $size, 'got correct size ref on big file'); is(length($$x), $size, 'read correct number of bytes'); - my $rline; - $gcf->cat_file($buf, sub { - my ($in, $left) = @_; - $rline = <$in>; - $$left -= length($rline); - }); - { - open my $fh, '<', $big_data or die "open failed: $!\n"; - is($rline, <$fh>, 'first line matches'); - }; - - my $all; - $gcf->cat_file($buf, sub { - my ($in, $left) = @_; - my $x = read($in, $all, $$left); - $$left -= $x; - }); - { - open my $fh, '<', $big_data or die "open failed: $!\n"; - local $/; - is($all, <$fh>, 'entire read matches'); - }; - my $ref = $gcf->qx(qw(cat-file blob), $buf); - is($all, $ref, 'qx read giant single string'); - my @ref = $gcf->qx(qw(cat-file blob), $buf); - is($all, join('', @ref), 'qx returned array when wanted'); my $nl = scalar @ref; ok($nl > 1, "qx returned array length of $nl"); diff --git a/t/nntpd.t b/t/nntpd.t index c73cc122..a95fb6fc 100644 --- a/t/nntpd.t +++ b/t/nntpd.t @@ -231,6 +231,43 @@ EOF ok($date >= $t0, 'valid date after start'); ok($date <= $t1, 'valid date before stop'); } + if ('leafnode interop') { + my $for_leafnode = PublicInbox::MIME->new(<<""); +From: longheader\@example.com +To: $addr +Subject: none +Date: Fri, 02 Oct 1993 00:00:00 +0000 + + my $long_hdr = 'for-leafnode-'.('y'x200).'@example.com'; + $for_leafnode->header_set('Message-ID', "<$long_hdr>"); + $im->add($for_leafnode); + $im->done; + if ($version == 1) { + my $s = PublicInbox::SearchIdx->new($mainrepo, 1); + $s->index_sync; + } + my $hdr = $n->head("<$long_hdr>"); + my $expect = qr/\AMessage-ID: /i . qr/\Q<$long_hdr>\E/; + ok(scalar(grep(/$expect/, @$hdr)), 'Message-ID not folded'); + ok(scalar(grep(/^Path:/, @$hdr)), 'Path: header found'); + + # it's possible for v2 messages to have 2+ Message-IDs, + # but leafnode can't handle it + if ($version != 1) { + my @mids = ("<$long_hdr>", '<2mid@wtf>'); + $for_leafnode->header_set('Message-ID', @mids); + $for_leafnode->body_set('not-a-dupe'); + my $warn = ''; + $SIG{__WARN__} = sub { $warn .= join('', @_) }; + $im->add($for_leafnode); + $im->done; + like($warn, qr/reused/, 'warned for reused MID'); + $hdr = $n->head('<2mid@wtf>'); + my @hmids = grep(/\AMessage-ID: /i, @$hdr); + is(scalar(@hmids), 1, 'Single Message-ID in header'); + like($hmids[0], qr/: <2mid\@wtf>/, 'got expected mid'); + } + } # pipelined requests: {