X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FImport.pm;h=29c482f9b4339b09089421723f3991b24e57f553;hb=cf35d38e7f845393659dfce0249a76d529a2c92c;hp=b25427ee552628237bc094225f2daf24e98dcdac;hpb=cfb8d16578e7f2f2e300f9f436205e4a8fc7f322;p=public-inbox.git diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index b25427ee..29c482f9 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2018 all contributors +# Copyright (C) 2016-2019 all contributors # License: AGPL-3.0+ # # git fast-import-based ssoma-mda MDA replacement @@ -278,10 +278,12 @@ sub extract_author_info ($) { my $sender = ''; my $from = $mime->header('From'); + $from ||= ''; my ($email) = PublicInbox::Address::emails($from); my ($name) = PublicInbox::Address::names($from); if (!defined($name) || !defined($email)) { $sender = $mime->header('Sender'); + $sender ||= ''; if (!defined($name)) { ($name) = PublicInbox::Address::names($sender); } @@ -405,6 +407,8 @@ sub add { print $w "reset $ref\n" or wfail; } + # Mime decoding can create nulls replace them with spaces to protect git + $subject =~ tr/\0/ /; utf8::encode($subject); print $w "commit $ref\nmark :$commit\n", "author $name <$email> $author_time_raw\n", @@ -440,11 +444,14 @@ sub done { _update_git_info($self, 1) if delete $self->{nchg}; $self->lock_release; + + $self->{git}->cleanup; } sub atfork_child { my ($self) = @_; foreach my $f (qw(in out)) { + next unless defined($self->{$f}); close $self->{$f} or die "failed to close import[$f]: $!\n"; } } @@ -512,11 +519,12 @@ sub purge_oids { push @buf, $buf; } elsif (/^M 100644 ([a-f0-9]+) (\w+)/) { my ($oid, $path) = ($1, $2); + $tree->{$path} = 1; if ($purge->{$oid}) { push @oids, $oid; - delete $tree->{$path}; + my $cmd = "M 100644 inline $path\ndata 0\n\n"; + push @buf, $cmd; } else { - $tree->{$path} = 1; push @buf, $_; } } elsif (/^D (\w+)/) { @@ -627,8 +635,8 @@ version 1.0 =head1 DESCRIPTION An importer and remover for public-inboxes which takes L -messages as input and stores them in a ssoma repository as -documented in L, +messages as input and stores them in a git repository as +documented in L, except it does not allow duplicate Message-IDs. It requires L and L to be installed.