X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FV2Writable.pm;h=e9fd502e02e9bd785e14731ce244d4478dc69d88;hb=f0ef0a56a8957d6f3095b1a24798e54b0b815d04;hp=1cc4b005f86525352bcd1cb81f7ee19139ec9ee5;hpb=3348ad4b3b1a0865ee58a902953165ea0f4aa4bd;p=public-inbox.git diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 1cc4b005..e9fd502e 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -22,8 +22,11 @@ use IO::Handle; my $PACKING_FACTOR = 0.4; # assume 2 cores if GNU nproc(1) is not available -sub nproc () { - int($ENV{NPROC} || `nproc 2>/dev/null` || 2); +sub nproc_parts () { + my $n = int($ENV{NPROC} || `nproc 2>/dev/null` || 2); + # subtract for the main process and git-fast-import + $n -= 1; + $n < 1 ? 1 : $n; } sub count_partitions ($) { @@ -73,7 +76,7 @@ sub new { rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR), last_commit => [], # git repo -> commit }; - $self->{partitions} = count_partitions($self) || nproc(); + $self->{partitions} = count_partitions($self) || nproc_parts(); bless $self, $class; } @@ -171,19 +174,19 @@ sub num_for_harder { my $hdr = $mime->header_obj; my $dig = content_digest($mime); - $$mid0 = PublicInbox::Import::digest2mid($dig); + $$mid0 = PublicInbox::Import::digest2mid($dig, $hdr); my $num = $self->{mm}->mid_insert($$mid0); unless (defined $num) { # it's hard to spoof the last Received: header my @recvd = $hdr->header_raw('Received'); $dig->add("Received: $_") foreach (@recvd); - $$mid0 = PublicInbox::Import::digest2mid($dig); + $$mid0 = PublicInbox::Import::digest2mid($dig, $hdr); $num = $self->{mm}->mid_insert($$mid0); # fall back to a random Message-ID and give up determinism: until (defined($num)) { $dig->add(rand); - $$mid0 = PublicInbox::Import::digest2mid($dig); + $$mid0 = PublicInbox::Import::digest2mid($dig, $hdr); warn "using random Message-ID <$$mid0> as fallback\n"; $num = $self->{mm}->mid_insert($$mid0); } @@ -256,12 +259,32 @@ sub purge_oids { $purges; } +sub content_ids ($) { + my ($mime) = @_; + my @cids = ( content_id($mime) ); + + # Email::MIME->as_string doesn't always round-trip, so we may + # use a second content_id + my $rt = content_id(PublicInbox::MIME->new(\($mime->as_string))); + push @cids, $rt if $cids[0] ne $rt; + \@cids; +} + +sub content_matches ($$) { + my ($cids, $existing) = @_; + my $cid = content_id($existing); + foreach (@$cids) { + return 1 if $_ eq $cid + } + 0 +} + sub remove_internal { my ($self, $mime, $cmt_msg, $purge) = @_; $self->idx_init; my $im = $self->importer unless $purge; my $over = $self->{over}; - my $cid = content_id($mime); + my $cids = content_ids($mime); my $parts = $self->{idx_parts}; my $mm = $self->{mm}; my $removed; @@ -284,7 +307,7 @@ sub remove_internal { } my $orig = $$msg; my $cur = PublicInbox::MIME->new($msg); - if (content_id($cur) eq $cid) { + if (content_matches($cids, $cur)) { $smsg->{mime} = $cur; $gone{$smsg->{num}} = [ $smsg, \$orig ]; } @@ -569,8 +592,7 @@ sub get_blob ($$) { sub lookup_content { my ($self, $mime, $mid) = @_; my $over = $self->{over}; - my $cid = content_id($mime); - my $found; + my $cids = content_ids($mime); my ($id, $prev); while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) { my $msg = get_blob($self, $smsg); @@ -579,16 +601,16 @@ sub lookup_content { next; } my $cur = PublicInbox::MIME->new($msg); - if (content_id($cur) eq $cid) { + if (content_matches($cids, $cur)) { $smsg->{mime} = $cur; - $found = $smsg; - last; + return $smsg; } + # XXX DEBUG_DIFF is experimental and may be removed diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF}; } - $found; + undef; } sub atfork_child {