Since we support duplicate MIDs in v2, we can safely truncate
long MID terms in the database and let other normal duplicate
resolution sort it out. It seems only spammers use excessively
long MIDs, and there'll always be abuse/misuse vectors for causing
mis-threaded messages, so it's not worth worrying about
excessively long MIDs.
mids references/;
use URI::Escape qw(uri_escape_utf8);
use Digest::SHA qw/sha1_hex/;
mids references/;
use URI::Escape qw(uri_escape_utf8);
use Digest::SHA qw/sha1_hex/;
-use constant MID_MAX => 40; # SHA-1 hex length
+use constant {
+ MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this
+ MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
+};
sub mid_clean {
my ($mid) = @_;
sub mid_clean {
my ($mid) = @_;
+ foreach my $i (0..$#mids) {
+ next if length($mids[$i]) <= MAX_MID_SIZE;
+ warn "Message-ID: <$mids[$i]> too long, truncating\n";
+ $mids[$i] = substr($mids[$i], 0, MAX_MID_SIZE);
+ }
+
require PublicInbox::Git;
use constant {
require PublicInbox::Git;
use constant {
- MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
PERM_UMASK => 0,
OLD_PERM_GROUP => 1,
OLD_PERM_EVERYBODY => 2,
PERM_UMASK => 0,
OLD_PERM_GROUP => 1,
OLD_PERM_EVERYBODY => 2,
eval {
my $smsg = PublicInbox::SearchMsg->new($mime);
my $doc = $smsg->{doc};
eval {
my $smsg = PublicInbox::SearchMsg->new($mime);
my $doc = $smsg->{doc};
- foreach my $mid (@$mids) {
- # FIXME: may be abused to prevent archival
- length($mid) > MAX_MID_SIZE and
- die 'Message-ID too long';
- $doc->add_term('Q' . $mid);
- }
my $subj = $smsg->subject;
my $xpath;
if ($subj ne '') {
my $subj = $smsg->subject;
my $xpath;
if ($subj ne '') {
if ($skel) {
push @values, $mids, $xpath, $data;
$skel->index_skeleton(\@values);
if ($skel) {
push @values, $mids, $xpath, $data;
$skel->index_skeleton(\@values);
+ $doc->add_boolean_term('Q' . $_) foreach @$mids;
$doc_id = $self->{xdb}->add_document($doc);
} else {
$doc_id = link_and_save($self, $doc, $mids, $refs,
$doc_id = $self->{xdb}->add_document($doc);
} else {
$doc_id = link_and_save($self, $doc, $mids, $refs,
my %mids = map { $_ => 1 } @{mids($hdr)};
my @keep;
foreach my $ref (@$refs) {
my %mids = map { $_ => 1 } @{mids($hdr)};
my @keep;
foreach my $ref (@$refs) {
- # FIXME: this is an archive-prevention vector like X-No-Archive
- if (length($ref) > MAX_MID_SIZE) {
+ if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
warn "References: <$ref> too long, ignoring\n";
warn "References: <$ref> too long, ignoring\n";
}
next if $mids{$ref};
push @keep, $ref;
}
next if $mids{$ref};
push @keep, $ref;
my $doc_id;
$doc->add_boolean_term('XNUM' . $num) if defined $num;
$doc->add_boolean_term('XPATH' . $xpath) if defined $xpath;
my $doc_id;
$doc->add_boolean_term('XNUM' . $num) if defined $num;
$doc->add_boolean_term('XPATH' . $xpath) if defined $xpath;
+ $doc->add_boolean_term('Q' . $_) foreach @$mids;
+
my $vivified = 0;
foreach my $mid (@$mids) {
$self->each_smsg_by_mid($mid, sub {
my $vivified = 0;
foreach my $mid (@$mids) {
$self->each_smsg_by_mid($mid, sub {
my $ts = $values->[PublicInbox::Search::TS];
my $smsg = PublicInbox::SearchMsg->new(undef);
my $doc = $smsg->{doc};
my $ts = $values->[PublicInbox::Search::TS];
my $smsg = PublicInbox::SearchMsg->new(undef);
my $doc = $smsg->{doc};
- foreach my $mid (@$mids) {
- $doc->add_term('Q' . $mid);
- }
PublicInbox::SearchIdx::add_values($doc, $values);
$doc->set_data($doc_data);
$smsg->{ts} = $ts;
PublicInbox::SearchIdx::add_values($doc, $values);
$doc->set_data($doc_data);
$smsg->{ts} = $ts;