Since we only query the SQLite over DB for OVER/XOVER; do not
need to waste space storing fields To/Cc/:bytes/:lines or the
XNUM term. We only use From/Subject/References/Message-ID/:blob
in various places of the PSGI code.
For reindexing, we will take advantage of docid stability
in "xapian-compact --no-renumber" to ensure duplicates do not
show up in search results. Since the PSGI interface is the
only consumer of Xapian at the moment, it has no need to
search based on NNTP article number.
my $nr = scalar @$msgs or return;
my $tmp = '';
foreach my $s (@$msgs) {
my $nr = scalar @$msgs or return;
my $tmp = '';
foreach my $s (@$msgs) {
- $tmp .= $s->num . ' ' . $s->$field . "\r\n";
+ $tmp .= $s->{num} . ' ' . $s->$field . "\r\n";
}
utf8::encode($tmp);
do_more($self, $tmp);
}
utf8::encode($tmp);
do_more($self, $tmp);
-sub parse_references ($$$$) {
- my ($self, $smsg, $mid0, $mids) = @_;
+sub parse_references ($$$) {
+ my ($smsg, $mid0, $mids) = @_;
my $mime = $smsg->{mime};
my $hdr = $mime->header_obj;
my $refs = references($hdr);
my $mime = $smsg->{mime};
my $hdr = $mime->header_obj;
my $refs = references($hdr);
blob => $oid,
}, 'PublicInbox::SearchMsg';
my $mids = mids($mime->header_obj);
blob => $oid,
}, 'PublicInbox::SearchMsg';
my $mids = mids($mime->header_obj);
- my $refs = $self->parse_references($smsg, $mid0, $mids);
+ my $refs = parse_references($smsg, $mid0, $mids);
my $subj = $smsg->subject;
my $xpath;
if ($subj ne '') {
my $subj = $smsg->subject;
my $xpath;
if ($subj ne '') {
my $smsg = PublicInbox::SearchMsg->new($mime);
my $doc = $smsg->{doc};
my $subj = $smsg->subject;
my $smsg = PublicInbox::SearchMsg->new($mime);
my $doc = $smsg->{doc};
my $subj = $smsg->subject;
-
- $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!;
- defined $bytes or $bytes = length($mime->as_string);
- $smsg->{bytes} = $bytes;
-
add_val($doc, PublicInbox::Search::TS(), $smsg->ts);
my @ds = gmtime($smsg->ds);
my $yyyymmdd = strftime('%Y%m%d', @ds);
add_val($doc, PublicInbox::Search::YYYYMMDD(), $yyyymmdd);
my $dt = strftime('%Y%m%d%H%M%S', @ds);
add_val($doc, PublicInbox::Search::DT(), $dt);
add_val($doc, PublicInbox::Search::TS(), $smsg->ts);
my @ds = gmtime($smsg->ds);
my $yyyymmdd = strftime('%Y%m%d', @ds);
add_val($doc, PublicInbox::Search::YYYYMMDD(), $yyyymmdd);
my $dt = strftime('%Y%m%d%H%M%S', @ds);
add_val($doc, PublicInbox::Search::DT(), $dt);
- my @vals = ($smsg->{ts}, $smsg->{ds});
my $tg = $self->term_generator;
my $tg = $self->term_generator;
index_body($tg, \@orig, $doc) if @orig;
});
index_body($tg, \@orig, $doc) if @orig;
});
- # populates smsg->references for smsg->to_doc_data
- my $data = $smsg->to_doc_data($oid, $mid0);
foreach my $mid (@$mids) {
$tg->index_text($mid, 1, 'XM');
}
foreach my $mid (@$mids) {
$tg->index_text($mid, 1, 'XM');
}
+ $smsg->{to} = $smsg->{cc} = '';
+ my $data = $smsg->to_doc_data($oid, $mid0);
$doc->set_data($data);
if (my $altid = $self->{-altid}) {
foreach my $alt (@$altid) {
$doc->set_data($data);
if (my $altid = $self->{-altid}) {
foreach my $alt (@$altid) {
- $self->delete_article($num) if defined $num; # for reindexing
-
if (my $over = $self->{over}) {
if (my $over = $self->{over}) {
- utf8::encode($data);
- $data = compress($data);
- my $refs = $over->parse_references($smsg, $mid0, $mids);
- my $xpath;
- if ($subj ne '') {
- $xpath = $self->subject_path($subj);
- $xpath = id_compress($xpath);
- }
-
- push @vals, $num, $mids, $refs, $xpath, $data;
- $over->add_over(\@vals);
+ $over->add_overview($mime, $bytes, $num, $oid, $mid0);
}
$doc->add_boolean_term('Q' . $_) foreach @$mids;
}
$doc->add_boolean_term('Q' . $_) foreach @$mids;
- $doc->add_boolean_term('XNUM' . $num) if defined $num;
- $doc_id = $self->{xdb}->add_document($doc);
+ $self->{xdb}->replace_document($doc_id = $num, $doc);
-sub delete_article {
- my ($self, $num) = @_;
- my $ndel = 0;
- batch_do($self, 'XNUM' . $num, sub {
- my ($ids) = @_;
- $ndel += scalar @$ids;
- $self->{xdb}->delete_document($_) for @$ids;
- });
-}
-
# MID is a hint in V2
sub remove_by_oid {
my ($self, $oid, $mid) = @_;
# MID is a hint in V2
sub remove_by_oid {
my ($self, $oid, $mid) = @_;
- $self->{bytes},
- $self->{lines}
+ $self->{bytes} || '',
+ $self->{lines} || ''
sub load_from_data ($$) {
my ($self) = $_[0]; # data = $_[1]
(
sub load_from_data ($$) {
my ($self) = $_[0]; # data = $_[1]
(
# :bytes and :lines metadata in RFC 3977
sub bytes ($) { $_[0]->{bytes} }
sub lines ($) { $_[0]->{lines} }
# :bytes and :lines metadata in RFC 3977
sub bytes ($) { $_[0]->{bytes} }
sub lines ($) { $_[0]->{lines} }
-sub num ($) { $_[0]->{num} ||= _get_term_val($_[0], 'XNUM', qr/\AXNUM/) }
sub __hdr ($$) {
my ($self, $field) = @_;
sub __hdr ($$) {
my ($self, $field) = @_;
my %gone;
my ($id, $prev);
while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
my %gone;
my ($id, $prev);
while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
- $gone{$smsg->num} = 1 if $oid eq $smsg->{blob};
+ $gone{$smsg->{num}} = 1 if $oid eq $smsg->{blob};
1; # continue
}
my $n = scalar keys %gone;
1; # continue
}
my $n = scalar keys %gone;
$im->lock_release;
remove_tree("$old/old") or die "failed to remove $old/old: $!\n";
}
$im->lock_release;
remove_tree("$old/old") or die "failed to remove $old/old: $!\n";
}
+my @compact = qw(xapian-compact --no-renumber);
if ($v == 2) {
require PublicInbox::V2Writable;
my $v2w = PublicInbox::V2Writable->new($ibx);
if ($v == 2) {
require PublicInbox::V2Writable;
my $v2w = PublicInbox::V2Writable->new($ibx);
}
close $dh;
die "No Xapian parts found in $old\n" unless @parts;
}
close $dh;
die "No Xapian parts found in $old\n" unless @parts;
- my $cmd = ['xapian-compact', @parts, "$new/0" ];
+ my $cmd = [@compact, @parts, "$new/0" ];
PublicInbox::Import::run_die($cmd);
commit_changes($v2w, $old, $new);
});
PublicInbox::Import::run_die($cmd);
commit_changes($v2w, $old, $new);
});
my $new = tempdir('compact-XXXXXXXX', CLEANUP => 1, DIR => $v1_root);
$ibx->with_umask(sub {
$im->lock_acquire;
my $new = tempdir('compact-XXXXXXXX', CLEANUP => 1, DIR => $v1_root);
$ibx->with_umask(sub {
$im->lock_acquire;
- PublicInbox::Import::run_die(['xapian-compact', $old, $new]);
+ PublicInbox::Import::run_die([@compact, $old, $new]);
commit_changes($im, $old, $new);
});
} else {
commit_changes($im, $old, $new);
});
} else {
- my $res = $ro->query('t:list@example.com');
- is(scalar @$res, 6, 'searched To: successfully');
- foreach my $smsg (@$res) {
+ my $mset = $ro->query('t:list@example.com', {mset => 1});
+ is($mset->size, 6, 'searched To: successfully');
+ foreach my $m ($mset->items) {
+ my $smsg = $ro->lookup_article($m->get_docid);
like($smsg->to, qr/\blist\@example\.com\b/, 'to appears');
}
like($smsg->to, qr/\blist\@example\.com\b/, 'to appears');
}
- $res = $ro->query('tc:list@example.com');
- is(scalar @$res, 6, 'searched To+Cc: successfully');
- foreach my $smsg (@$res) {
+ $mset = $ro->query('tc:list@example.com', {mset => 1});
+ is($mset->size, 6, 'searched To+Cc: successfully');
+ foreach my $m ($mset->items) {
+ my $smsg = $ro->lookup_article($m->get_docid);
my $tocc = join("\n", $smsg->to, $smsg->cc);
like($tocc, qr/\blist\@example\.com\b/, 'tocc appears');
}
foreach my $pfx ('tcf:', 'c:') {
my $tocc = join("\n", $smsg->to, $smsg->cc);
like($tocc, qr/\blist\@example\.com\b/, 'tocc appears');
}
foreach my $pfx ('tcf:', 'c:') {
- $res = $ro->query($pfx . 'foo@example.com');
- is(scalar @$res, 1,
- "searched $pfx successfully for Cc:");
- foreach my $smsg (@$res) {
+ my $mset = $ro->query($pfx . 'foo@example.com', { mset => 1 });
+ is($mset->items, 1, "searched $pfx successfully for Cc:");
+ foreach my $m ($mset->items) {
+ my $smsg = $ro->lookup_article($m->get_docid);
like($smsg->cc, qr/\bfoo\@example\.com\b/,
'cc appears');
}
}
foreach my $pfx ('', 'tcf:', 'f:') {
like($smsg->cc, qr/\bfoo\@example\.com\b/,
'cc appears');
}
}
foreach my $pfx ('', 'tcf:', 'f:') {
- $res = $ro->query($pfx . 'Laggy');
+ my $res = $ro->query($pfx . 'Laggy');
is(scalar(@$res), 1,
"searched $pfx successfully for From:");
foreach my $smsg (@$res) {
is(scalar(@$res), 1,
"searched $pfx successfully for From:");
foreach my $smsg (@$res) {
'commit message propagated to git');
is_deeply(\@after, \@before, 'only one commit written to git');
is($ibx->mm->num_for($smsg->mid), undef, 'no longer in Msgmap by mid');
'commit message propagated to git');
is_deeply(\@after, \@before, 'only one commit written to git');
is($ibx->mm->num_for($smsg->mid), undef, 'no longer in Msgmap by mid');
- like($smsg->num, qr/\A\d+\z/, 'numeric number in return message');
- is($ibx->mm->mid_for($smsg->num), undef, 'no longer in Msgmap by num');
+ my $num = $smsg->{num};
+ like($num, qr/\A\d+\z/, 'numeric number in return message');
+ is($ibx->mm->mid_for($num), undef, 'no longer in Msgmap by num');
my $srch = $ibx->search->reopen;
my $mset = $srch->query('m:'.$smsg->mid, { mset => 1});
is($mset->size, 0, 'no longer found in Xapian');
my @log1 = qw(log -1 --pretty=raw --raw -r --no-abbrev --no-renames);
my $srch = $ibx->search->reopen;
my $mset = $srch->query('m:'.$smsg->mid, { mset => 1});
is($mset->size, 0, 'no longer found in Xapian');
my @log1 = qw(log -1 --pretty=raw --raw -r --no-abbrev --no-renames);
- is($srch->{over_ro}->get_art($smsg->num), undef,
+ is($srch->{over_ro}->get_art($num), undef,
'removal propagated to Over DB');
my $after = $git0->qx(@log1);
'removal propagated to Over DB');
my $after = $git0->qx(@log1);