We can pass blessed PublicInbox::Smsg objects to internal
indexing APIs instead of having long parameter lists in some
places. The end goal is to avoid parsing redundant information
each step of the way and hopefully make things more
understandable.
- my ($self, $mime, $bytes, $num, $oid, $mid0, $times) = @_;
- my $lines = $mime->body_raw =~ tr!\n!\n!;
- my $smsg = bless {
- mime => $mime,
- mid => $mid0,
- bytes => $bytes,
- lines => $lines,
- blob => $oid,
- }, 'PublicInbox::Smsg';
+ my ($self, $mime, $smsg, $times) = @_;
+ $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!;
+ $smsg->{mime} = $mime; # XXX temporary?
my $hdr = $mime->header_obj;
my $mids = mids_for_index($hdr);
my $refs = parse_references($smsg, $hdr, $mids);
my $hdr = $mime->header_obj;
my $mids = mids_for_index($hdr);
my $refs = parse_references($smsg, $hdr, $mids);
$dd = compress($dd);
my $ds = msg_timestamp($hdr, $times->{autime});
my $ts = msg_datestamp($hdr, $times->{cotime});
$dd = compress($dd);
my $ds = msg_timestamp($hdr, $times->{autime});
my $ts = msg_datestamp($hdr, $times->{cotime});
- my $values = [ $ts, $ds, $num, $mids, $refs, $xpath, $dd ];
+ my $values = [ $ts, $ds, $smsg->{num}, $mids, $refs, $xpath, $dd ];
add_over($self, $values);
}
add_over($self, $values);
}
index_body($self, $_, /\A>/ ? 0 : $doc) for @sections;
}
index_body($self, $_, /\A>/ ? 0 : $doc) for @sections;
}
-sub add_xapian ($$$$$$) {
- my ($self, $mime, $num, $oid, $mids, $mid0) = @_;
- my $smsg = PublicInbox::Smsg->new($mime);
+sub add_xapian ($$$$) {
+ my ($self, $mime, $smsg, $mids) = @_;
+ $smsg->{mime} = $mime; # XXX dangerous
my $hdr = $mime->header_obj;
$smsg->{ds} = msg_datestamp($hdr, $self->{autime});
$smsg->{ts} = msg_timestamp($hdr, $self->{cotime});
my $hdr = $mime->header_obj;
$smsg->{ds} = msg_datestamp($hdr, $self->{autime});
$smsg->{ts} = msg_timestamp($hdr, $self->{cotime});
index_text($self, join(' ', @long), 1, 'XM');
}
}
index_text($self, join(' ', @long), 1, 'XM');
}
}
- $smsg->{to} = $smsg->{cc} = '';
- $smsg->{blob} = $oid;
- $smsg->{mid} = $mid0;
+ $smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP
PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids);
my $data = $smsg->to_doc_data;
$doc->set_data($data);
PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids);
my $data = $smsg->to_doc_data;
$doc->set_data($data);
}
}
$doc->add_boolean_term('Q' . $_) foreach @$mids;
}
}
$doc->add_boolean_term('Q' . $_) foreach @$mids;
- $self->{xdb}->replace_document($num, $doc);
+ $self->{xdb}->replace_document($smsg->{num}, $doc);
sub add_message {
# mime = Email::MIME object
sub add_message {
# mime = Email::MIME object
- my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
+ my ($self, $mime, $smsg) = @_;
my $mids = mids_for_index($mime->header_obj);
my $mids = mids_for_index($mime->header_obj);
- $mid0 //= $mids->[0]; # v1 compatibility
- $num //= do { # v1
+ $smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat
+ $smsg->{mid} //= $mids->[0]; # v1 compatibility
+ $smsg->{num} //= do { # v1
_msgmap_init($self);
index_mm($self, $mime);
};
eval {
_msgmap_init($self);
index_mm($self, $mime);
};
eval {
- if (need_xapian($self)) {
- add_xapian($self, $mime, $num, $oid, $mids, $mid0);
+ # order matters, overview stores every possible piece of
+ # data in doc_data (deflated). Xapian only stores a subset
+ # of the fields which exist in over.sqlite3. We may stop
+ # storing doc_data in Xapian sometime after we get multi-inbox
+ # search working.
+ if (my $over = $self->{over}) { # v1 only
+ $over->add_overview($mime, $smsg, $self);
- if (my $over = $self->{over}) {
- $over->add_overview($mime, $bytes, $num, $oid, $mid0,
- $self);
+ if (need_xapian($self)) {
+ add_xapian($self, $mime, $smsg, $mids);
warn "failed to index message <".join('> <',@$mids).">: $@\n";
return undef;
}
warn "failed to index message <".join('> <',@$mids).">: $@\n";
return undef;
}
}
# returns begin and end PostingIterator
}
# returns begin and end PostingIterator
- my ($self, $mime, $bytes, $blob) = @_;
+ my ($self, $mime, $smsg) = @_;
my $num = index_mm($self, $mime);
my $num = index_mm($self, $mime);
- add_message($self, $mime, $bytes, $num, $blob);
+ $smsg->{num} = $num;
+ add_message($self, $mime, $smsg);
next;
}
my $mime = do_cat_mail($git, $blob, \$bytes) or next;
next;
}
my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+ my $smsg = bless {}, 'PublicInbox::Smsg';
batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr);
batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr);
- $add_cb->($self, $mime, $bytes, $blob);
+ $smsg->{blob} = $blob;
+ $smsg->{bytes} = $bytes;
+ $add_cb->($self, $mime, $smsg);
} elsif ($line =~ /$delmsg/o) {
my $blob = $1;
$D{$blob} = 1;
} elsif ($line =~ /$delmsg/o) {
my $blob = $1;
$D{$blob} = 1;
$artnum = int($artnum);
$self->{autime} = $autime;
$self->{cotime} = $cotime;
$artnum = int($artnum);
$self->{autime} = $autime;
$self->{cotime} = $cotime;
- $self->add_message($mime, $n, $artnum, $oid, $mid0);
+ my $smsg = bless {
+ bytes => $len,
+ num => $artnum,
+ blob => $oid,
+ mid => $mid0,
+ }, 'PublicInbox::Smsg';
+ $self->add_message($mime, $smsg);
$self->begin_txn_lazy;
$self->{autime} = $at;
$self->{cotime} = $ct;
$self->begin_txn_lazy;
$self->{autime} = $at;
$self->{cotime} = $ct;
- $self->add_message($mime, $bytes, $artnum, $oid, $mid0);
+ my $smsg = bless {
+ bytes => $bytes,
+ num => $artnum,
+ blob => $oid,
+ mid => $mid0,
+ }, 'PublicInbox::Smsg';
+ $self->add_message($mime, $smsg);
# indexes a message, returns true if checkpointing is needed
sub do_idx ($$$$$$$) {
my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_;
# indexes a message, returns true if checkpointing is needed
sub do_idx ($$$$$$$) {
my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_;
- $self->{over}->add_overview($mime, $len, $num, $oid, $mid0, $self);
+ my $smsg = bless {
+ bytes => $len,
+ num => $num,
+ blob => $oid,
+ mid => $mid0,
+ }, 'PublicInbox::Smsg';
+ $self->{over}->add_overview($mime, $smsg, $self);
my $idx = idx_shard($self, $num % $self->{shards});
$idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime, $self);
my $n = $self->{transact_bytes} += $len;
my $idx = idx_shard($self, $num % $self->{shards});
$idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime, $self);
my $n = $self->{transact_bytes} += $len;
use Email::MIME;
require_mods(qw(DBD::SQLite Search::Xapian));
require PublicInbox::SearchIdx;
use Email::MIME;
require_mods(qw(DBD::SQLite Search::Xapian));
require PublicInbox::SearchIdx;
+require PublicInbox::Smsg;
require PublicInbox::Inbox;
my ($tmpdir, $for_destroy) = tmpdir();
my $git_dir = "$tmpdir/a.git";
require PublicInbox::Inbox;
my ($tmpdir, $for_destroy) = tmpdir();
my $git_dir = "$tmpdir/a.git";
$mime->header_set('To' => 'git@vger.kernel.org');
my $bytes = bytes::length($mime->as_string);
my $mid = mids($mime->header_obj)->[0];
$mime->header_set('To' => 'git@vger.kernel.org');
my $bytes = bytes::length($mime->as_string);
my $mid = mids($mime->header_obj)->[0];
- my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored', $mid);
+ my $smsg = bless {
+ bytes => $bytes,
+ num => ++$num,
+ mid => $mid,
+ blob => '',
+ }, 'PublicInbox::Smsg';
+ my $doc_id = $rw->add_message($mime, $smsg);
push @mids, $mid;
ok($doc_id, 'message added: '. $mid);
}
push @mids, $mid;
ok($doc_id, 'message added: '. $mid);
}
my $bytes = bytes::length($mime->as_string);
my $mid = mids($mime->header_obj)->[0];
my $bytes = bytes::length($mime->as_string);
my $mid = mids($mime->header_obj)->[0];
- my $doc_id = $rw->add_message($mime, $bytes, $num, 'ignored', $mid);
+ my $smsg = bless {
+ bytes => $bytes,
+ num => $num,
+ mid => $mid,
+ blob => '',
+ }, 'PublicInbox::Smsg';
+ my $doc_id = $rw->add_message($mime, $smsg);
ok($doc_id, 'message reindexed'. $mid);
is($doc_id, $num, "article number unchanged: $num");
ok($doc_id, 'message reindexed'. $mid);
is($doc_id, $num, "article number unchanged: $num");