X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FImport.pm;h=2cb4896af5a83a08ca45cb7340f5c1e548ed1e42;hb=e5d32fe689dd0922eaf206e2371fc8040ad85247;hp=07a4951871375674b442c1a0c5bd7710013e608f;hpb=0faddbbfecaa784c584d3a625628c288fe9316c7;p=public-inbox.git diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 07a49518..2cb4896a 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -106,7 +106,7 @@ sub _cat_blob ($$$) { local $/ = "\n"; my $info = <$r>; defined $info or die "EOF from fast-import / cat-blob: $!"; - $info =~ /\A[a-f0-9]{40} blob ([0-9]+)\n\z/ or return; + $info =~ /\A[a-f0-9]{40,} blob ([0-9]+)\n\z/ or return; my $left = $1; my $offset = 0; my $buf = ''; @@ -137,7 +137,7 @@ sub check_remove_v1 { my ($r, $w, $tip, $path, $mime) = @_; my $info = _check_path($r, $w, $tip, $path) or return ('MISSING',undef); - $info =~ m!\A100644 blob ([a-f0-9]{40})\t!s or die "not blob: $info"; + $info =~ m!\A100644 blob ([a-f0-9]{40,})\t!s or die "not blob: $info"; my $oid = $1; my $msg = _cat_blob($r, $w, $oid) or die "BUG: cat-blob $1 failed"; my $cur = PublicInbox::Eml->new($msg); @@ -285,15 +285,14 @@ sub extract_cmt_info ($;$) { # $mime is PublicInbox::Eml, but remains Email::MIME-compatible $smsg //= bless {}, 'PublicInbox::Smsg'; - my $hdr = $mime->header_obj; - $smsg->populate($hdr); + $smsg->populate($mime); my $sender = ''; my $from = delete($smsg->{From}) // ''; my ($email) = PublicInbox::Address::emails($from); my ($name) = PublicInbox::Address::names($from); if (!defined($name) || !defined($email)) { - $sender = $hdr->header('Sender') // ''; + $sender = $mime->header('Sender') // ''; $name //= (PublicInbox::Address::names($sender))[0]; $email //= (PublicInbox::Address::emails($sender))[0]; } @@ -329,11 +328,13 @@ sub extract_cmt_info ($;$) { } # kill potentially confusing/misleading headers +our @UNWANTED_HEADERS = (qw(Bytes Lines Content-Length), + qw(Status X-Status)); sub drop_unwanted_headers ($) { - my ($mime) = @_; - - $mime->header_set($_) for qw(Bytes Lines Content-Length Status); - $mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS; + my ($eml) = @_; + for (@UNWANTED_HEADERS, @PublicInbox::MDA::BAD_HEADERS) { + $eml->header_set($_); + } } # used by V2Writable, too @@ -346,13 +347,12 @@ sub append_mid ($$) { } sub v1_mid0 ($) { - my ($mime) = @_; - my $hdr = $mime->header_obj; - my $mids = mids($hdr); + my ($eml) = @_; + my $mids = mids($eml); if (!scalar(@$mids)) { # spam often has no Message-ID - my $mid0 = digest2mid(content_digest($mime), $hdr); - append_mid($hdr, $mid0); + my $mid0 = digest2mid(content_digest($eml), $eml); + append_mid($eml, $mid0); return $mid0; } $mids->[0]; @@ -463,6 +463,9 @@ sub init_bare { } } +# true if locked and active +sub active { !!$_[0]->{out} } + sub done { my ($self) = @_; my $w = delete $self->{out} or return; @@ -671,8 +674,7 @@ version 1.0 my $parsed = PublicInbox::Eml->new($message); my $ret = $im->add($parsed); if (!defined $ret) { - warn "duplicate: ", - $parsed->header_obj->header_raw('Message-ID'), "\n"; + warn "duplicate: ", $parsed->header_raw('Message-ID'), "\n"; } else { print "imported at mark $ret\n"; }