use base qw(PublicInbox::Search PublicInbox::Lock);
use PublicInbox::MIME;
use PublicInbox::InboxWritable;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mids references/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mids/;
use PublicInbox::MsgIter;
use Carp qw(croak);
use POSIX qw(strftime);
my $smsg = PublicInbox::SearchMsg->new($mime);
my $doc = $smsg->{doc};
my $subj = $smsg->subject;
- my $xpath;
- if ($subj ne '') {
- $xpath = $self->subject_path($subj);
- $xpath = id_compress($xpath);
- }
-
- $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!;
- defined $bytes or $bytes = length($mime->as_string);
- $smsg->{bytes} = $bytes;
add_val($doc, PublicInbox::Search::TS(), $smsg->ts);
- my $yyyymmdd = strftime('%Y%m%d', gmtime($smsg->ds));
- add_val($doc, PublicInbox::Search::YYYYMMDD, $yyyymmdd);
+ my @ds = gmtime($smsg->ds);
+ my $yyyymmdd = strftime('%Y%m%d', @ds);
+ add_val($doc, PublicInbox::Search::YYYYMMDD(), $yyyymmdd);
+ my $dt = strftime('%Y%m%d%H%M%S', @ds);
+ add_val($doc, PublicInbox::Search::DT(), $dt);
my $tg = $self->term_generator;
index_body($tg, \@orig, $doc) if @orig;
});
- # populates smsg->references for smsg->to_doc_data
- my $refs = parse_references($smsg);
- my $data = $smsg->to_doc_data($oid, $mid0);
foreach my $mid (@$mids) {
$tg->index_text($mid, 1, 'XM');
}
+ $smsg->{to} = $smsg->{cc} = '';
+ my $data = $smsg->to_doc_data($oid, $mid0);
$doc->set_data($data);
if (my $altid = $self->{-altid}) {
foreach my $alt (@$altid) {
}
}
- $self->delete_article($num) if defined $num; # for reindexing
-
- utf8::encode($data);
- $data = compress($data);
- my @vals = ($smsg->ts, $num, $mids, $refs, $xpath, $data);
- $self->{over}->add_over(\@vals);
+ if (my $over = $self->{over}) {
+ $over->add_overview($mime, $bytes, $num, $oid, $mid0);
+ }
$doc->add_boolean_term('Q' . $_) foreach @$mids;
- $doc->add_boolean_term('XNUM' . $num) if defined $num;
- $doc_id = $self->{xdb}->add_document($doc);
+ $self->{xdb}->replace_document($doc_id = $num, $doc);
};
if ($@) {
}
}
-sub delete_article {
- my ($self, $num) = @_;
- my $ndel = 0;
- batch_do($self, 'XNUM' . $num, sub {
- my ($ids) = @_;
- $ndel += scalar @$ids;
- $self->{xdb}->delete_document($_) for @$ids;
- });
-}
-
# MID is a hint in V2
sub remove_by_oid {
my ($self, $oid, $mid) = @_;
my $db = $self->{xdb};
+ $self->{over}->remove_oid($oid, $mid) if $self->{over};
+
# XXX careful, we cannot use batch_do here since we conditionally
# delete documents based on other factors, so we cannot call
# find_doc_ids twice.
# there is only ONE element in @delete unless we
# have bugs in our v2writable deduplication check
my @delete;
- my @over_del;
for (; $head != $tail; $head->inc) {
my $docid = $head->get_docid;
my $doc = $db->get_document($docid);
$smsg->load_expand;
if ($smsg->{blob} eq $oid) {
push(@delete, $docid);
- push(@over_del, $smsg->num);
}
}
$db->delete_document($_) foreach @delete;
- $self->{over}->remove_oid($oid, $mid);
scalar(@delete);
}
$self->{term_generator} = $tg;
}
-sub parse_references ($) {
- my ($smsg) = @_;
- my $mime = $smsg->{mime};
- my $hdr = $mime->header_obj;
- my $refs = references($hdr);
- return $refs if scalar(@$refs) == 0;
-
- # prevent circular references via References here:
- my %mids = map { $_ => 1 } @{mids($hdr)};
- my @keep;
- foreach my $ref (@$refs) {
- if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
- warn "References: <$ref> too long, ignoring\n";
- next;
- }
- next if $mids{$ref};
- push @keep, $ref;
- }
- $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
- \@keep;
-}
-
sub index_git_blob_id {
my ($doc, $pfx, $objid) = @_;
--raw -r --no-abbrev/, $range);
}
-# indexes all unindexed messages
+# indexes all unindexed messages (v1 only)
sub _index_sync {
my ($self, $opts) = @_;
my $tip = $opts->{ref} || 'HEAD';
my ($self) = @_;
return if $self->{txn};
my $xdb = $self->{xdb} || $self->_xdb_acquire;
- $self->{over}->begin_lazy;
+ $self->{over}->begin_lazy if $self->{over};
$xdb->begin_transaction;
$self->{txn} = 1;
$xdb;
my ($self) = @_;
delete $self->{txn} or return;
$self->{xdb}->commit_transaction;
- $self->{over}->commit_lazy;
+ $self->{over}->commit_lazy if $self->{over};
}
sub worker_done {