X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FImport.pm;h=079afc5f7861320a7bd44e3cb8577b4c86a53456;hb=0c8106d44f317175e122744b43407bf067183175;hp=07a4951871375674b442c1a0c5bd7710013e608f;hpb=0faddbbfecaa784c584d3a625628c288fe9316c7;p=public-inbox.git diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 07a49518..079afc5f 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -48,7 +48,7 @@ sub gfi_start { return ($self->{in}, $self->{out}) if $self->{pid}; - my (@ret, $out_r, $out_w); + my ($in_r, $pid, $out_r, $out_w); pipe($out_r, $out_w) or die "pipe failed: $!"; $self->lock_acquire; @@ -56,27 +56,28 @@ sub gfi_start { my ($git, $ref) = @$self{qw(git ref)}; local $/ = "\n"; chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref)); + die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?; if ($self->{path_type} ne '2/38' && $self->{tip}) { local $/ = "\0"; my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref); + die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?; chomp @t; $self->{-tree} = { map { $_ => 1 } @t }; } my @cmd = ('git', "--git-dir=$git->{git_dir}", qw(fast-import --quiet --done --date-format=raw)); - my ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r }); + ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r }); $out_w->autoflush(1); $self->{in} = $in_r; $self->{out} = $out_w; $self->{pid} = $pid; $self->{nchg} = 0; - @ret = ($in_r, $out_w); }; if ($@) { $self->lock_release; die $@; } - @ret; + ($in_r, $out_w); } sub wfail () { die "write to fast-import failed: $!" } @@ -106,7 +107,7 @@ sub _cat_blob ($$$) { local $/ = "\n"; my $info = <$r>; defined $info or die "EOF from fast-import / cat-blob: $!"; - $info =~ /\A[a-f0-9]{40} blob ([0-9]+)\n\z/ or return; + $info =~ /\A[a-f0-9]{40,} blob ([0-9]+)\n\z/ or return; my $left = $1; my $offset = 0; my $buf = ''; @@ -137,7 +138,7 @@ sub check_remove_v1 { my ($r, $w, $tip, $path, $mime) = @_; my $info = _check_path($r, $w, $tip, $path) or return ('MISSING',undef); - $info =~ m!\A100644 blob ([a-f0-9]{40})\t!s or die "not blob: $info"; + $info =~ m!\A100644 blob ([a-f0-9]{40,})\t!s or die "not blob: $info"; my $oid = $1; my $msg = _cat_blob($r, $w, $oid) or die "BUG: cat-blob $1 failed"; my $cur = PublicInbox::Eml->new($msg); @@ -285,15 +286,14 @@ sub extract_cmt_info ($;$) { # $mime is PublicInbox::Eml, but remains Email::MIME-compatible $smsg //= bless {}, 'PublicInbox::Smsg'; - my $hdr = $mime->header_obj; - $smsg->populate($hdr); + $smsg->populate($mime); my $sender = ''; my $from = delete($smsg->{From}) // ''; my ($email) = PublicInbox::Address::emails($from); my ($name) = PublicInbox::Address::names($from); if (!defined($name) || !defined($email)) { - $sender = $hdr->header('Sender') // ''; + $sender = $mime->header('Sender') // ''; $name //= (PublicInbox::Address::names($sender))[0]; $email //= (PublicInbox::Address::emails($sender))[0]; } @@ -329,11 +329,13 @@ sub extract_cmt_info ($;$) { } # kill potentially confusing/misleading headers +our @UNWANTED_HEADERS = (qw(Bytes Lines Content-Length), + qw(Status X-Status)); sub drop_unwanted_headers ($) { - my ($mime) = @_; - - $mime->header_set($_) for qw(Bytes Lines Content-Length Status); - $mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS; + my ($eml) = @_; + for (@UNWANTED_HEADERS, @PublicInbox::MDA::BAD_HEADERS) { + $eml->header_set($_); + } } # used by V2Writable, too @@ -346,13 +348,12 @@ sub append_mid ($$) { } sub v1_mid0 ($) { - my ($mime) = @_; - my $hdr = $mime->header_obj; - my $mids = mids($hdr); + my ($eml) = @_; + my $mids = mids($eml); if (!scalar(@$mids)) { # spam often has no Message-ID - my $mid0 = digest2mid(content_digest($mime), $hdr); - append_mid($hdr, $mid0); + my $mid0 = digest2mid(content_digest($eml), $eml); + append_mid($eml, $mid0); return $mid0; } $mids->[0]; @@ -405,6 +406,10 @@ sub add { if ($smsg) { $smsg->{blob} = $self->get_mark(":$blob"); $smsg->{raw_bytes} = $n; + if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore + return if $oidx->blob_exists($smsg->{blob}); + } + # XXX do we need this? it's in git at this point $smsg->{-raw_email} = \$raw_email; } my $ref = $self->{ref}; @@ -463,6 +468,9 @@ sub init_bare { } } +# true if locked and active +sub active { !!$_[0]->{out} } + sub done { my ($self) = @_; my $w = delete $self->{out} or return; @@ -671,8 +679,7 @@ version 1.0 my $parsed = PublicInbox::Eml->new($message); my $ret = $im->add($parsed); if (!defined $ret) { - warn "duplicate: ", - $parsed->header_obj->header_raw('Message-ID'), "\n"; + warn "duplicate: ", $parsed->header_raw('Message-ID'), "\n"; } else { print "imported at mark $ret\n"; }