X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FImport.pm;h=fc4e72dc93ea0ce978a93f5224c74e5b8178acc3;hb=6e6f7999361925e4c944f308df4bc32a1842cd69;hp=c7a96e1eabcc1214e6cdc61572ba6ed70db64750;hpb=69329215485cf2ab9d8cd1fa7faf65d8ec42dc0b;p=public-inbox.git diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index c7a96e1e..fc4e72dc 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -1,9 +1,10 @@ -# Copyright (C) 2016-2018 all contributors +# Copyright (C) 2016-2019 all contributors # License: AGPL-3.0+ # # git fast-import-based ssoma-mda MDA replacement -# This is only ever run by public-inbox-mda and public-inbox-learn, -# not the WWW or NNTP code which only requires read-only access. +# This is only ever run by public-inbox-mda, public-inbox-learn +# and public-inbox-watch. Not the WWW or NNTP code which only +# requires read-only access. package PublicInbox::Import; use strict; use warnings; @@ -117,6 +118,9 @@ sub _cat_blob ($$$) { $n = read($r, my $lf, 1); defined($n) or die "read final byte of cat-blob failed: $!"; die "bad read on final byte: <$lf>" if $lf ne "\n"; + + # fixup some bugginess in old versions: + $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; \$buf; } @@ -174,8 +178,8 @@ sub _update_git_info ($$) { run_die([@cmd, 'update-server-info'], undef); ($self->{path_type} eq '2/38') and eval { require PublicInbox::SearchIdx; - my $inbox = $self->{inbox} || $git_dir; - my $s = PublicInbox::SearchIdx->new($inbox); + my $ibx = $self->{inbox} || $git_dir; + my $s = PublicInbox::SearchIdx->new($ibx); $s->index_sync({ ref => $self->{ref} }); }; eval { run_die([@cmd, qw(gc --auto)], undef) } if $do_gc; @@ -275,10 +279,12 @@ sub extract_author_info ($) { my $sender = ''; my $from = $mime->header('From'); + $from ||= ''; my ($email) = PublicInbox::Address::emails($from); my ($name) = PublicInbox::Address::names($from); if (!defined($name) || !defined($email)) { $sender = $mime->header('Sender'); + $sender ||= ''; if (!defined($name)) { ($name) = PublicInbox::Address::names($sender); } @@ -402,6 +408,8 @@ sub add { print $w "reset $ref\n" or wfail; } + # Mime decoding can create nulls replace them with spaces to protect git + $subject =~ tr/\0/ /; utf8::encode($subject); print $w "commit $ref\nmark :$commit\n", "author $name <$email> $author_time_raw\n", @@ -437,11 +445,14 @@ sub done { _update_git_info($self, 1) if delete $self->{nchg}; $self->lock_release; + + $self->{git}->cleanup; } sub atfork_child { my ($self) = @_; foreach my $f (qw(in out)) { + next unless defined($self->{$f}); close $self->{$f} or die "failed to close import[$f]: $!\n"; } } @@ -484,7 +495,7 @@ sub purge_oids { my $old = $self->{'ref'}; my $git = $self->{git}; my @export = (qw(fast-export --no-data --use-done-feature), $old); - my ($rd, $pid) = $git->popen(@export); + my $rd = $git->popen(@export); my ($r, $w) = $self->gfi_start; my @buf; my $npurge = 0; @@ -509,11 +520,12 @@ sub purge_oids { push @buf, $buf; } elsif (/^M 100644 ([a-f0-9]+) (\w+)/) { my ($oid, $path) = ($1, $2); + $tree->{$path} = 1; if ($purge->{$oid}) { push @oids, $oid; - delete $tree->{$path}; + my $cmd = "M 100644 inline $path\ndata 0\n\n"; + push @buf, $cmd; } else { - $tree->{$path} = 1; push @buf, $_; } } elsif (/^D (\w+)/) { @@ -538,6 +550,7 @@ sub purge_oids { push @buf, $_; } } + close $rd or die "close fast-export failed: $?"; if (@buf) { $w->print(@buf) or wfail; } @@ -624,8 +637,8 @@ version 1.0 =head1 DESCRIPTION An importer and remover for public-inboxes which takes L -messages as input and stores them in a ssoma repository as -documented in L, +messages as input and stores them in a git repository as +documented in L, except it does not allow duplicate Message-IDs. It requires L and L to be installed.