lib/PublicInbox/ContentHash.pm | 15 +++++++-------- lib/PublicInbox/Import.pm | 17 +++++++---------- lib/PublicInbox/OverIdx.pm | 9 ++++----- lib/PublicInbox/SearchIdx.pm | 14 ++++++-------- lib/PublicInbox/V2Writable.pm | 35 ++++++++++++++++------------------- lib/PublicInbox/View.pm | 35 ++++++++++++++++------------------- lib/PublicInbox/WatchMaildir.pm | 3 +-- lib/PublicInbox/WwwAtomStream.pm | 9 ++++----- script/public-inbox-edit | 8 ++++---- script/public-inbox-mda | 2 +- diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm index 420dc5e7c92d28c05cfa183cf4059d5258afd786..1fe229559d2a584c6fc691e5851abde14f3ef59a 100644 --- a/lib/PublicInbox/ContentHash.pm +++ b/lib/PublicInbox/ContentHash.pm @@ -53,29 +53,28 @@ $dig->add($s); } sub content_digest ($) { - my ($mime) = @_; + my ($eml) = @_; my $dig = Digest::SHA->new(256); - my $hdr = $mime->header_obj; # References: and In-Reply-To: get used interchangeably # in some "duplicates" in LKML. We treat them the same # in SearchIdx, so treat them the same for this: # do NOT consider the Message-ID as part of the content_hash # if we got here, we've already got Message-ID reuse - my %seen = map { $_ => 1 } @{mids($hdr)}; - foreach my $mid (@{references($hdr)}) { + my %seen = map { $_ => 1 } @{mids($eml)}; + foreach my $mid (@{references($eml)}) { $dig->add("ref\0$mid\0") unless $seen{$mid}++; } # Only use Sender: if From is not present foreach my $h (qw(From Sender)) { - my @v = $hdr->header($h); + my @v = $eml->header($h); if (@v) { digest_addr($dig, $h, $_) foreach @v; } } foreach my $h (qw(Subject Date)) { - my @v = $hdr->header($h); + my @v = $eml->header($h); foreach my $v (@v) { utf8::encode($v); $dig->add("$h\0$v\0"); @@ -85,10 +84,10 @@ # Some mail processors will add " to unquoted names that were # not in the original message. For the purposes of deduplication, # do not take it into account: foreach my $h (qw(To Cc)) { - my @v = $hdr->header($h); + my @v = $eml->header($h); digest_addr($dig, $h, $_) foreach @v; } - msg_iter($mime, \&content_dig_i, $dig); + msg_iter($eml, \&content_dig_i, $dig); $dig; } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 07a4951871375674b442c1a0c5bd7710013e608f..700b40262e36d6eae460d12639106b6c2529062c 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -285,15 +285,14 @@ my ($mime, $smsg) = @_; # $mime is PublicInbox::Eml, but remains Email::MIME-compatible $smsg //= bless {}, 'PublicInbox::Smsg'; - my $hdr = $mime->header_obj; - $smsg->populate($hdr); + $smsg->populate($mime); my $sender = ''; my $from = delete($smsg->{From}) // ''; my ($email) = PublicInbox::Address::emails($from); my ($name) = PublicInbox::Address::names($from); if (!defined($name) || !defined($email)) { - $sender = $hdr->header('Sender') // ''; + $sender = $mime->header('Sender') // ''; $name //= (PublicInbox::Address::names($sender))[0]; $email //= (PublicInbox::Address::emails($sender))[0]; } @@ -346,13 +345,12 @@ $hdr->header_set('Message-ID', @cur, "<$mid0>"); } sub v1_mid0 ($) { - my ($mime) = @_; - my $hdr = $mime->header_obj; - my $mids = mids($hdr); + my ($eml) = @_; + my $mids = mids($eml); if (!scalar(@$mids)) { # spam often has no Message-ID - my $mid0 = digest2mid(content_digest($mime), $hdr); - append_mid($hdr, $mid0); + my $mid0 = digest2mid(content_digest($eml), $eml); + append_mid($eml, $mid0); return $mid0; } $mids->[0]; @@ -671,8 +669,7 @@ "\ntest message"; my $parsed = PublicInbox::Eml->new($message); my $ret = $im->add($parsed); if (!defined $ret) { - warn "duplicate: ", - $parsed->header_obj->header_raw('Message-ID'), "\n"; + warn "duplicate: ", $parsed->header_raw('Message-ID'), "\n"; } else { print "imported at mark $ret\n"; } diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 5821c562b814de635862db755db1c7a42f9954e8..c8f61e012e0977dc98b794357ee033b801dc8e0a 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -271,11 +271,10 @@ lc($subj); } sub add_overview { - my ($self, $mime, $smsg) = @_; - $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!; - my $hdr = $mime->header_obj; - my $mids = mids_for_index($hdr); - my $refs = parse_references($smsg, $hdr, $mids); + my ($self, $eml, $smsg) = @_; + $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; + my $mids = mids_for_index($eml); + my $refs = parse_references($smsg, $eml, $mids); my $subj = $smsg->{subject}; my $xpath; if ($subj ne '') { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index feb00de22e062a86c7042b2f5234496a9b606c61..a1baa65bd2945dcc8885fc70735a1d60ecd5853b 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -350,8 +350,7 @@ } } sub add_xapian ($$$$) { - my ($self, $mime, $smsg, $mids) = @_; - my $hdr = $mime->header_obj; + my ($self, $eml, $smsg, $mids) = @_; my $doc = $X->{Document}->new; add_val($doc, PublicInbox::Search::TS(), $smsg->{ts}); my @ds = gmtime($smsg->{ds}); @@ -366,10 +365,10 @@ my $tg = term_generator($self); $tg->set_document($doc); index_headers($self, $smsg); - msg_iter($mime, \&index_xapian, [ $self, $doc ]); - index_ids($self, $doc, $hdr, $mids); + msg_iter($eml, \&index_xapian, [ $self, $doc ]); + index_ids($self, $doc, $eml, $mids); $smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP - PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids); + PublicInbox::OverIdx::parse_references($smsg, $eml, $mids); my $data = $smsg->to_doc_data; $doc->set_data($data); if (my $altid = $self->{-altid}) { @@ -398,8 +397,7 @@ sub add_message { # mime = PublicInbox::Eml or Email::MIME object my ($self, $mime, $smsg, $sync) = @_; - my $hdr = $mime->header_obj; - my $mids = mids_for_index($hdr); + my $mids = mids_for_index($mime); $smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat $smsg->{mid} //= $mids->[0]; # v1 compatibility $smsg->{num} //= do { # v1 @@ -408,7 +406,7 @@ index_mm($self, $mime, $smsg->{blob}, $sync); }; # v1 and tests only: - $smsg->populate($hdr, $sync); + $smsg->populate($mime, $sync); $smsg->{bytes} //= length($mime->as_string); eval { diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index e1c9a393a36f0fa12b3a04e804984f6981c03223..344edbbad2010668fd8170f7af9b28c9a9e4fbfa 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -197,7 +197,7 @@ } sub v2_num_for { my ($self, $mime) = @_; - my $mids = mids($mime->header_obj); + my $mids = mids($mime); if (@$mids) { my $mid = $mids->[0]; my $num = $self->{mm}->mid_insert($mid); @@ -244,28 +244,27 @@ v2_num_for_harder($self, $mime); } sub v2_num_for_harder { - my ($self, $mime) = @_; + my ($self, $eml) = @_; - my $hdr = $mime->header_obj; - my $dig = content_digest($mime); - my $mid0 = PublicInbox::Import::digest2mid($dig, $hdr); + my $dig = content_digest($eml); + my $mid0 = PublicInbox::Import::digest2mid($dig, $eml); my $num = $self->{mm}->mid_insert($mid0); unless (defined $num) { # it's hard to spoof the last Received: header - my @recvd = $hdr->header_raw('Received'); + my @recvd = $eml->header_raw('Received'); $dig->add("Received: $_") foreach (@recvd); - $mid0 = PublicInbox::Import::digest2mid($dig, $hdr); + $mid0 = PublicInbox::Import::digest2mid($dig, $eml); $num = $self->{mm}->mid_insert($mid0); # fall back to a random Message-ID and give up determinism: until (defined($num)) { $dig->add(rand); - $mid0 = PublicInbox::Import::digest2mid($dig, $hdr); + $mid0 = PublicInbox::Import::digest2mid($dig, $eml); warn "using random Message-ID <$mid0> as fallback\n"; $num = $self->{mm}->mid_insert($mid0); } } - PublicInbox::Import::append_mid($hdr, $mid0); + PublicInbox::Import::append_mid($eml, $mid0); ($num, $mid0); } @@ -384,7 +383,7 @@ } my $over = $self->{over}; my $chashes = content_hashes($old_eml); my $removed = []; - my $mids = mids($old_eml->header_obj); + my $mids = mids($old_eml); # We avoid introducing new blobs into git since the raw content # can be slightly different, so we do not need the user-supplied @@ -514,9 +513,7 @@ # The rules for dealing with messages with multiple or conflicting # Message-IDs are pretty complex and rethreading hasn't been fully # implemented, yet. sub check_mids_match ($$) { - my ($old_mime, $new_mime) = @_; - my $old = $old_mime->header_obj; - my $new = $new_mime->header_obj; + my ($old, $new) = @_; _check_mids_match(mids($old), mids($new), 'Message-ID(s)'); _check_mids_match(references($old), references($new), 'References/In-Reply-To'); @@ -894,9 +891,9 @@ sub index_oid { # cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; return if $size == 0; # purged my ($num, $mid0); - my $mime = PublicInbox::Eml->new($$bref); - my $mids = mids($mime->header_obj); - my $chash = content_hash($mime); + my $eml = PublicInbox::Eml->new($$bref); + my $mids = mids($eml); + my $chash = content_hash($eml); my $self = $arg->{v2w}; if (scalar(@$mids) == 0) { @@ -960,8 +957,8 @@ num => $num, blob => $oid, mid => $mid0, }, 'PublicInbox::Smsg'; - $smsg->populate($mime, $arg); - if (do_idx($self, $bref, $mime, $smsg)) { + $smsg->populate($eml, $arg); + if (do_idx($self, $bref, $eml, $smsg)) { ${$arg->{need_checkpoint}} = 1; } } @@ -1113,7 +1110,7 @@ my ($bref, $oid, $type, $size, $sync) = @_; my $self = $sync->{v2w}; my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef; my $mm = $self->{mm}; - my $mids = mids(PublicInbox::Eml->new($bref)->header_obj); + my $mids = mids(PublicInbox::Eml->new($bref)); undef $$bref; my $over = $self->{over}; foreach my $mid (@$mids) { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index d7ec4eb0a1a63f2500b911eaa4638bd7565e21a3..4cb72bea849e6b28b4bfacc2684af1daf2689128 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -33,8 +33,7 @@ my $smsg = $ctx->{smsg}; $ctx->{smsg} = $ctx->{over}->next_by_mid(@{$ctx->{next_arg}}); $ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ? "../${\mid_href($smsg->{mid})}/" : ''; - my $hdr = $eml->header_obj; - my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($hdr, $ctx); + my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx); multipart_text_as_html($eml, $ctx); delete $ctx->{obuf}; $$obuf .= '
';
} else {
- $ctx->{first_hdr} = $hdr;
+ $ctx->{first_hdr} = $eml->header_obj;
if ($ctx->{smsg}) {
$rv .=
"WARNING: multiple messages have this Message-ID\n
";
@@ -644,7 +641,7 @@ $rv .= ""; # anchor for body start
}
$ctx->{-upfx} = '../' if $over;
my @title; # (Subject[0], From[0])
- for my $v ($hdr->header('From')) {
+ for my $v ($eml->header('From')) {
my @n = PublicInbox::Address::names($v);
$v = ascii_html($v);
$title[1] //= ascii_html(join(', ', @n));
@@ -655,14 +652,14 @@ }
$rv .= "From: $v\n" if $v ne '';
}
foreach my $h (qw(To Cc)) {
- for my $v ($hdr->header($h)) {
+ for my $v ($eml->header($h)) {
fold_addresses($v);
$v = ascii_html($v);
obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
$rv .= "$h: $v\n" if $v ne '';
}
}
- my @subj = $hdr->header('Subject');
+ my @subj = $eml->header('Subject');
if (@subj) {
my $v = ascii_html(shift @subj);
obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
@@ -678,7 +675,7 @@ } else { # dummy anchor for thread skeleton at bottom of page
$rv .= qq() if $over;
$title[0] = '(no subject)';
}
- for my $v ($hdr->header('Date')) {
+ for my $v ($eml->header('Date')) {
$v = ascii_html($v);
obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P
$rv .= "Date: $v\n";
@@ -697,12 +694,12 @@ # public-inbox-nntpd because of multiple Message-ID headers.
my $lnk = PublicInbox::Linkify->new;
my $s = '';
for my $h (qw(Message-ID X-Alt-Message-ID)) {
- $s .= "$h: $_\n" for ($hdr->header_raw($h));
+ $s .= "$h: $_\n" for ($eml->header_raw($h));
}
$lnk->linkify_mids('..', \$s, 1);
$rv .= $s;
}
- $rv .= _parent_headers($hdr, $over);
+ $rv .= _parent_headers($eml, $over);
$rv .= "\n";
\$rv;
}
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 814b455b2ac579819feec01b8e291575b493e2d6..7d4139a5dcf983461193854495359c45556a97af 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -163,9 +163,8 @@
# any header match means it's eligible for the inbox:
if (my $watch_hdrs = $ibx->{-watchheaders}) {
my $ok;
- my $hdr = $eml->header_obj;
for my $wh (@$watch_hdrs) {
- my @v = $hdr->header_raw($wh->[0]);
+ my @v = $eml->header_raw($wh->[0]);
$ok = grep(/$wh->[1]/, @v) and last;
}
return unless $ok;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 1ed806fd42543fcf4ad650577f8ad369732fbccb..388def1239f90af5a7758c2bcffe5045114dda64 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -116,9 +116,8 @@
# returns undef or string
sub feed_entry {
my ($ctx, $smsg, $eml) = @_;
- my $hdr = $eml->header_obj;
my $mid = $smsg->{mid};
- my $irt = PublicInbox::View::in_reply_to($hdr);
+ my $irt = PublicInbox::View::in_reply_to($eml);
my $uuid = to_uuid($mid);
my $base = $ctx->{feed_base_url};
if (defined $irt) {
@@ -130,13 +129,13 @@ } else {
$irt = '';
}
my $href = $base . mid_href($mid) . '/';
- my $updated = feed_updated(msg_timestamp($hdr));
+ my $updated = feed_updated(msg_timestamp($eml));
- my $title = $hdr->header('Subject');
+ my $title = $eml->header('Subject');
$title = '(no subject)' unless defined $title && $title ne '';
$title = title_tag($title);
- my $from = $hdr->header('From') // $hdr->header('Sender') //
+ my $from = $eml->header('From') // $eml->header('Sender') //
$ctx->{-inbox}->{-primary_address};
my ($email) = PublicInbox::Address::emails($from);
my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
diff --git a/script/public-inbox-edit b/script/public-inbox-edit
index 2d3c4af4d38827f1c3068190b38fa787c2aad5eb..240beb3a168ff3e6c6a35e4814b3cab144761642 100755
--- a/script/public-inbox-edit
+++ b/script/public-inbox-edit
@@ -93,7 +93,7 @@ warn "Will edit all of them\n";
}
} else {
my $eml = eml_from_path($file) or die "open($file) failed: $!";
- my $mids = mids($eml->header_obj);
+ my $mids = mids($eml);
find_mid($found, $_, \@ibxs) for (@$mids); # populates $found
my $chash = content_hash($eml);
my $to_edit = $found->{$chash};
@@ -214,9 +214,9 @@ PublicInbox::Import::drop_unwanted_headers($old_mime);
# allow changing Received: and maybe other headers which can
# contain sensitive info.
- my $nhdr = $new_mime->header_obj;
- my $ohdr = $old_mime->header_obj;
- if (($nhdr->as_string eq $ohdr->as_string) &&
+ my $nhdr = $new_mime->header_obj->as_string;
+ my $ohdr = $old_mime->header_obj->as_string;
+ if (($nhdr eq $ohdr) &&
(content_hash($new_mime) eq content_hash($old_mime))) {
warn "No change detected to:\n", show_cmd($ibx, $smsg);
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 42d0e00cbb72687bbdaf5e67e1fb0c6c379ba9b1..02ca343163eaf00658b8081f025695f94b6c2e58 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -119,7 +119,7 @@ # ->abort is idempotent, no emergency if a single
# destination succeeds
$emm->abort;
} else { # v1-only
- my $mid = $mime->header_obj->header_raw('Message-ID');
+ my $mid = $mime->header_raw('Message-ID');
# this message is similar to what ssoma-mda shows:
print STDERR "CONFLICT: Message-ID: $mid exists\n";
}