}
sub content_digest ($) {
- my ($mime) = @_;
+ my ($eml) = @_;
my $dig = Digest::SHA->new(256);
- my $hdr = $mime->header_obj;
# References: and In-Reply-To: get used interchangeably
# in some "duplicates" in LKML. We treat them the same
# in SearchIdx, so treat them the same for this:
# do NOT consider the Message-ID as part of the content_hash
# if we got here, we've already got Message-ID reuse
- my %seen = map { $_ => 1 } @{mids($hdr)};
- foreach my $mid (@{references($hdr)}) {
+ my %seen = map { $_ => 1 } @{mids($eml)};
+ foreach my $mid (@{references($eml)}) {
$dig->add("ref\0$mid\0") unless $seen{$mid}++;
}
# Only use Sender: if From is not present
foreach my $h (qw(From Sender)) {
- my @v = $hdr->header($h);
+ my @v = $eml->header($h);
if (@v) {
digest_addr($dig, $h, $_) foreach @v;
}
}
foreach my $h (qw(Subject Date)) {
- my @v = $hdr->header($h);
+ my @v = $eml->header($h);
foreach my $v (@v) {
utf8::encode($v);
$dig->add("$h\0$v\0");
# not in the original message. For the purposes of deduplication,
# do not take it into account:
foreach my $h (qw(To Cc)) {
- my @v = $hdr->header($h);
+ my @v = $eml->header($h);
digest_addr($dig, $h, $_) foreach @v;
}
- msg_iter($mime, \&content_dig_i, $dig);
+ msg_iter($eml, \&content_dig_i, $dig);
$dig;
}
# $mime is PublicInbox::Eml, but remains Email::MIME-compatible
$smsg //= bless {}, 'PublicInbox::Smsg';
- my $hdr = $mime->header_obj;
- $smsg->populate($hdr);
+ $smsg->populate($mime);
my $sender = '';
my $from = delete($smsg->{From}) // '';
my ($email) = PublicInbox::Address::emails($from);
my ($name) = PublicInbox::Address::names($from);
if (!defined($name) || !defined($email)) {
- $sender = $hdr->header('Sender') // '';
+ $sender = $mime->header('Sender') // '';
$name //= (PublicInbox::Address::names($sender))[0];
$email //= (PublicInbox::Address::emails($sender))[0];
}
}
sub v1_mid0 ($) {
- my ($mime) = @_;
- my $hdr = $mime->header_obj;
- my $mids = mids($hdr);
+ my ($eml) = @_;
+ my $mids = mids($eml);
if (!scalar(@$mids)) { # spam often has no Message-ID
- my $mid0 = digest2mid(content_digest($mime), $hdr);
- append_mid($hdr, $mid0);
+ my $mid0 = digest2mid(content_digest($eml), $eml);
+ append_mid($eml, $mid0);
return $mid0;
}
$mids->[0];
my $parsed = PublicInbox::Eml->new($message);
my $ret = $im->add($parsed);
if (!defined $ret) {
- warn "duplicate: ",
- $parsed->header_obj->header_raw('Message-ID'), "\n";
+ warn "duplicate: ", $parsed->header_raw('Message-ID'), "\n";
} else {
print "imported at mark $ret\n";
}
}
sub add_overview {
- my ($self, $mime, $smsg) = @_;
- $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!;
- my $hdr = $mime->header_obj;
- my $mids = mids_for_index($hdr);
- my $refs = parse_references($smsg, $hdr, $mids);
+ my ($self, $eml, $smsg) = @_;
+ $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!;
+ my $mids = mids_for_index($eml);
+ my $refs = parse_references($smsg, $eml, $mids);
my $subj = $smsg->{subject};
my $xpath;
if ($subj ne '') {
}
sub add_xapian ($$$$) {
- my ($self, $mime, $smsg, $mids) = @_;
- my $hdr = $mime->header_obj;
+ my ($self, $eml, $smsg, $mids) = @_;
my $doc = $X->{Document}->new;
add_val($doc, PublicInbox::Search::TS(), $smsg->{ts});
my @ds = gmtime($smsg->{ds});
$tg->set_document($doc);
index_headers($self, $smsg);
- msg_iter($mime, \&index_xapian, [ $self, $doc ]);
- index_ids($self, $doc, $hdr, $mids);
+ msg_iter($eml, \&index_xapian, [ $self, $doc ]);
+ index_ids($self, $doc, $eml, $mids);
$smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP
- PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids);
+ PublicInbox::OverIdx::parse_references($smsg, $eml, $mids);
my $data = $smsg->to_doc_data;
$doc->set_data($data);
if (my $altid = $self->{-altid}) {
sub add_message {
# mime = PublicInbox::Eml or Email::MIME object
my ($self, $mime, $smsg, $sync) = @_;
- my $hdr = $mime->header_obj;
- my $mids = mids_for_index($hdr);
+ my $mids = mids_for_index($mime);
$smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat
$smsg->{mid} //= $mids->[0]; # v1 compatibility
$smsg->{num} //= do { # v1
};
# v1 and tests only:
- $smsg->populate($hdr, $sync);
+ $smsg->populate($mime, $sync);
$smsg->{bytes} //= length($mime->as_string);
eval {
sub v2_num_for {
my ($self, $mime) = @_;
- my $mids = mids($mime->header_obj);
+ my $mids = mids($mime);
if (@$mids) {
my $mid = $mids->[0];
my $num = $self->{mm}->mid_insert($mid);
}
sub v2_num_for_harder {
- my ($self, $mime) = @_;
+ my ($self, $eml) = @_;
- my $hdr = $mime->header_obj;
- my $dig = content_digest($mime);
- my $mid0 = PublicInbox::Import::digest2mid($dig, $hdr);
+ my $dig = content_digest($eml);
+ my $mid0 = PublicInbox::Import::digest2mid($dig, $eml);
my $num = $self->{mm}->mid_insert($mid0);
unless (defined $num) {
# it's hard to spoof the last Received: header
- my @recvd = $hdr->header_raw('Received');
+ my @recvd = $eml->header_raw('Received');
$dig->add("Received: $_") foreach (@recvd);
- $mid0 = PublicInbox::Import::digest2mid($dig, $hdr);
+ $mid0 = PublicInbox::Import::digest2mid($dig, $eml);
$num = $self->{mm}->mid_insert($mid0);
# fall back to a random Message-ID and give up determinism:
until (defined($num)) {
$dig->add(rand);
- $mid0 = PublicInbox::Import::digest2mid($dig, $hdr);
+ $mid0 = PublicInbox::Import::digest2mid($dig, $eml);
warn "using random Message-ID <$mid0> as fallback\n";
$num = $self->{mm}->mid_insert($mid0);
}
}
- PublicInbox::Import::append_mid($hdr, $mid0);
+ PublicInbox::Import::append_mid($eml, $mid0);
($num, $mid0);
}
my $over = $self->{over};
my $chashes = content_hashes($old_eml);
my $removed = [];
- my $mids = mids($old_eml->header_obj);
+ my $mids = mids($old_eml);
# We avoid introducing new blobs into git since the raw content
# can be slightly different, so we do not need the user-supplied
# Message-IDs are pretty complex and rethreading hasn't been fully
# implemented, yet.
sub check_mids_match ($$) {
- my ($old_mime, $new_mime) = @_;
- my $old = $old_mime->header_obj;
- my $new = $new_mime->header_obj;
+ my ($old, $new) = @_;
_check_mids_match(mids($old), mids($new), 'Message-ID(s)');
_check_mids_match(references($old), references($new),
'References/In-Reply-To');
my ($bref, $oid, $type, $size, $arg) = @_;
return if $size == 0; # purged
my ($num, $mid0);
- my $mime = PublicInbox::Eml->new($$bref);
- my $mids = mids($mime->header_obj);
- my $chash = content_hash($mime);
+ my $eml = PublicInbox::Eml->new($$bref);
+ my $mids = mids($eml);
+ my $chash = content_hash($eml);
my $self = $arg->{v2w};
if (scalar(@$mids) == 0) {
blob => $oid,
mid => $mid0,
}, 'PublicInbox::Smsg';
- $smsg->populate($mime, $arg);
- if (do_idx($self, $bref, $mime, $smsg)) {
+ $smsg->populate($eml, $arg);
+ if (do_idx($self, $bref, $eml, $smsg)) {
${$arg->{need_checkpoint}} = 1;
}
}
my $self = $sync->{v2w};
my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
my $mm = $self->{mm};
- my $mids = mids(PublicInbox::Eml->new($bref)->header_obj);
+ my $mids = mids(PublicInbox::Eml->new($bref));
undef $$bref;
my $over = $self->{over};
foreach my $mid (@$mids) {
$ctx->{smsg} = $ctx->{over}->next_by_mid(@{$ctx->{next_arg}});
$ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ?
"../${\mid_href($smsg->{mid})}/" : '';
- my $hdr = $eml->header_obj;
- my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($hdr, $ctx);
+ my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx);
multipart_text_as_html($eml, $ctx);
delete $ctx->{obuf};
$$obuf .= '</pre><hr>';
my ($ctx) = @_;
my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return; # 404
my $eml = PublicInbox::Eml->new($bref);
- my $hdr = $eml->header_obj;
$ctx->{mhref} = '';
PublicInbox::WwwStream::init($ctx);
- my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($hdr, $ctx);
+ my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx);
multipart_text_as_html($eml, $ctx);
delete $ctx->{obuf};
$$obuf .= '</pre><hr>';
- eval { $$obuf .= html_footer($ctx, $hdr) };
+ eval { $$obuf .= html_footer($ctx, $eml) };
html_oneshot($ctx, 200, $obuf);
}
# Deleting these fields saves about 400K as we iterate across 1K msgs
delete @$smsg{qw(ts blob)};
- my $hdr = $eml->header_obj;
- my $from = _hdr_names_html($hdr, 'From');
+ my $from = _hdr_names_html($eml, 'From');
obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
$rv .= "From: $from @ ".fmt_ts($ds)." UTC";
my $upfx = $ctx->{-upfx};
my $mhref = $upfx . mid_href($mid_raw) . '/';
$rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
$rv .= qq{<a\nhref="${mhref}raw">raw</a>)\n};
- my $to = fold_addresses(_hdr_names_html($hdr, 'To'));
- my $cc = fold_addresses(_hdr_names_html($hdr, 'Cc'));
+ my $to = fold_addresses(_hdr_names_html($eml, 'To'));
+ my $cc = fold_addresses(_hdr_names_html($eml, 'Cc'));
my ($tlen, $clen) = (length($to), length($cc));
my $to_cc = '';
if (($tlen + $clen) > COLS) {
$rv .= $to_cc;
my $mapping = $ctx->{mapping};
- if (!$mapping && (defined($irt) || defined($irt = in_reply_to($hdr)))) {
+ if (!$mapping && (defined($irt) || defined($irt = in_reply_to($eml)))) {
my $href = $upfx . mid_href($irt) . '/';
my $html = ascii_html($irt);
$rv .= qq(In-Reply-To: <<a\nhref="$href">$html</a>>\n)
}
sub _msg_page_prepare_obuf {
- my ($hdr, $ctx) = @_;
+ my ($eml, $ctx) = @_;
my $over = $ctx->{-inbox}->over;
my $obfs_ibx = $ctx->{-obfs_ibx};
my $rv = '';
- my $mids = mids_for_index($hdr);
+ my $mids = mids_for_index($eml);
my $nr = $ctx->{nr}++;
if ($nr) { # unlikely
$rv .= '<pre>';
} else {
- $ctx->{first_hdr} = $hdr;
+ $ctx->{first_hdr} = $eml->header_obj;
if ($ctx->{smsg}) {
$rv .=
"<pre>WARNING: multiple messages have this Message-ID\n</pre>";
}
$ctx->{-upfx} = '../' if $over;
my @title; # (Subject[0], From[0])
- for my $v ($hdr->header('From')) {
+ for my $v ($eml->header('From')) {
my @n = PublicInbox::Address::names($v);
$v = ascii_html($v);
$title[1] //= ascii_html(join(', ', @n));
$rv .= "From: $v\n" if $v ne '';
}
foreach my $h (qw(To Cc)) {
- for my $v ($hdr->header($h)) {
+ for my $v ($eml->header($h)) {
fold_addresses($v);
$v = ascii_html($v);
obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
$rv .= "$h: $v\n" if $v ne '';
}
}
- my @subj = $hdr->header('Subject');
+ my @subj = $eml->header('Subject');
if (@subj) {
my $v = ascii_html(shift @subj);
obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
$rv .= qq(<a\nhref="#r"\nid=t></a>) if $over;
$title[0] = '(no subject)';
}
- for my $v ($hdr->header('Date')) {
+ for my $v ($eml->header('Date')) {
$v = ascii_html($v);
obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P
$rv .= "Date: $v\n";
my $lnk = PublicInbox::Linkify->new;
my $s = '';
for my $h (qw(Message-ID X-Alt-Message-ID)) {
- $s .= "$h: $_\n" for ($hdr->header_raw($h));
+ $s .= "$h: $_\n" for ($eml->header_raw($h));
}
$lnk->linkify_mids('..', \$s, 1);
$rv .= $s;
}
- $rv .= _parent_headers($hdr, $over);
+ $rv .= _parent_headers($eml, $over);
$rv .= "\n";
\$rv;
}
# any header match means it's eligible for the inbox:
if (my $watch_hdrs = $ibx->{-watchheaders}) {
my $ok;
- my $hdr = $eml->header_obj;
for my $wh (@$watch_hdrs) {
- my @v = $hdr->header_raw($wh->[0]);
+ my @v = $eml->header_raw($wh->[0]);
$ok = grep(/$wh->[1]/, @v) and last;
}
return unless $ok;
# returns undef or string
sub feed_entry {
my ($ctx, $smsg, $eml) = @_;
- my $hdr = $eml->header_obj;
my $mid = $smsg->{mid};
- my $irt = PublicInbox::View::in_reply_to($hdr);
+ my $irt = PublicInbox::View::in_reply_to($eml);
my $uuid = to_uuid($mid);
my $base = $ctx->{feed_base_url};
if (defined $irt) {
$irt = '';
}
my $href = $base . mid_href($mid) . '/';
- my $updated = feed_updated(msg_timestamp($hdr));
+ my $updated = feed_updated(msg_timestamp($eml));
- my $title = $hdr->header('Subject');
+ my $title = $eml->header('Subject');
$title = '(no subject)' unless defined $title && $title ne '';
$title = title_tag($title);
- my $from = $hdr->header('From') // $hdr->header('Sender') //
+ my $from = $eml->header('From') // $eml->header('Sender') //
$ctx->{-inbox}->{-primary_address};
my ($email) = PublicInbox::Address::emails($from);
my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
}
} else {
my $eml = eml_from_path($file) or die "open($file) failed: $!";
- my $mids = mids($eml->header_obj);
+ my $mids = mids($eml);
find_mid($found, $_, \@ibxs) for (@$mids); # populates $found
my $chash = content_hash($eml);
my $to_edit = $found->{$chash};
# allow changing Received: and maybe other headers which can
# contain sensitive info.
- my $nhdr = $new_mime->header_obj;
- my $ohdr = $old_mime->header_obj;
- if (($nhdr->as_string eq $ohdr->as_string) &&
+ my $nhdr = $new_mime->header_obj->as_string;
+ my $ohdr = $old_mime->header_obj->as_string;
+ if (($nhdr eq $ohdr) &&
(content_hash($new_mime) eq content_hash($old_mime))) {
warn "No change detected to:\n", show_cmd($ibx, $smsg);
# destination succeeds
$emm->abort;
} else { # v1-only
- my $mid = $mime->header_obj->header_raw('Message-ID');
+ my $mid = $mime->header_raw('Message-ID');
# this message is similar to what ssoma-mda shows:
print STDERR "CONFLICT: Message-ID: $mid exists\n";
}