X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FImport.pm;h=fc4e72dc93ea0ce978a93f5224c74e5b8178acc3;hb=6e6f7999361925e4c944f308df4bc32a1842cd69;hp=c7a96e1eabcc1214e6cdc61572ba6ed70db64750;hpb=69329215485cf2ab9d8cd1fa7faf65d8ec42dc0b;p=public-inbox.git
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index c7a96e1e..fc4e72dc 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -1,9 +1,10 @@
-# Copyright (C) 2016-2018 all contributors
+# Copyright (C) 2016-2019 all contributors
# License: AGPL-3.0+
#
# git fast-import-based ssoma-mda MDA replacement
-# This is only ever run by public-inbox-mda and public-inbox-learn,
-# not the WWW or NNTP code which only requires read-only access.
+# This is only ever run by public-inbox-mda, public-inbox-learn
+# and public-inbox-watch. Not the WWW or NNTP code which only
+# requires read-only access.
package PublicInbox::Import;
use strict;
use warnings;
@@ -117,6 +118,9 @@ sub _cat_blob ($$$) {
$n = read($r, my $lf, 1);
defined($n) or die "read final byte of cat-blob failed: $!";
die "bad read on final byte: <$lf>" if $lf ne "\n";
+
+ # fixup some bugginess in old versions:
+ $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
\$buf;
}
@@ -174,8 +178,8 @@ sub _update_git_info ($$) {
run_die([@cmd, 'update-server-info'], undef);
($self->{path_type} eq '2/38') and eval {
require PublicInbox::SearchIdx;
- my $inbox = $self->{inbox} || $git_dir;
- my $s = PublicInbox::SearchIdx->new($inbox);
+ my $ibx = $self->{inbox} || $git_dir;
+ my $s = PublicInbox::SearchIdx->new($ibx);
$s->index_sync({ ref => $self->{ref} });
};
eval { run_die([@cmd, qw(gc --auto)], undef) } if $do_gc;
@@ -275,10 +279,12 @@ sub extract_author_info ($) {
my $sender = '';
my $from = $mime->header('From');
+ $from ||= '';
my ($email) = PublicInbox::Address::emails($from);
my ($name) = PublicInbox::Address::names($from);
if (!defined($name) || !defined($email)) {
$sender = $mime->header('Sender');
+ $sender ||= '';
if (!defined($name)) {
($name) = PublicInbox::Address::names($sender);
}
@@ -402,6 +408,8 @@ sub add {
print $w "reset $ref\n" or wfail;
}
+ # Mime decoding can create nulls replace them with spaces to protect git
+ $subject =~ tr/\0/ /;
utf8::encode($subject);
print $w "commit $ref\nmark :$commit\n",
"author $name <$email> $author_time_raw\n",
@@ -437,11 +445,14 @@ sub done {
_update_git_info($self, 1) if delete $self->{nchg};
$self->lock_release;
+
+ $self->{git}->cleanup;
}
sub atfork_child {
my ($self) = @_;
foreach my $f (qw(in out)) {
+ next unless defined($self->{$f});
close $self->{$f} or die "failed to close import[$f]: $!\n";
}
}
@@ -484,7 +495,7 @@ sub purge_oids {
my $old = $self->{'ref'};
my $git = $self->{git};
my @export = (qw(fast-export --no-data --use-done-feature), $old);
- my ($rd, $pid) = $git->popen(@export);
+ my $rd = $git->popen(@export);
my ($r, $w) = $self->gfi_start;
my @buf;
my $npurge = 0;
@@ -509,11 +520,12 @@ sub purge_oids {
push @buf, $buf;
} elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
my ($oid, $path) = ($1, $2);
+ $tree->{$path} = 1;
if ($purge->{$oid}) {
push @oids, $oid;
- delete $tree->{$path};
+ my $cmd = "M 100644 inline $path\ndata 0\n\n";
+ push @buf, $cmd;
} else {
- $tree->{$path} = 1;
push @buf, $_;
}
} elsif (/^D (\w+)/) {
@@ -538,6 +550,7 @@ sub purge_oids {
push @buf, $_;
}
}
+ close $rd or die "close fast-export failed: $?";
if (@buf) {
$w->print(@buf) or wfail;
}
@@ -624,8 +637,8 @@ version 1.0
=head1 DESCRIPTION
An importer and remover for public-inboxes which takes L
-messages as input and stores them in a ssoma repository as
-documented in L,
+messages as input and stores them in a git repository as
+documented in L,
except it does not allow duplicate Message-IDs.
It requires L and L to be installed.