use PublicInbox::Msgmap;
use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::SearchIdx;
+use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
+use PublicInbox::MultiMidQueue;
use IO::Handle; # ->autoflush
use File::Temp qw(tempfile);
last_commit => [], # git repo -> commit
};
$self->{shards} = count_shards($self) || nproc_shards($creat);
+ $self->{index_max_size} = $v2ibx->{index_max_size};
bless $self, $class;
}
}
# indexes a message, returns true if checkpointing is needed
-sub do_idx ($$$$$$$) {
- my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_;
- $self->{over}->add_overview($mime, $len, $num, $oid, $mid0, $self);
- my $idx = idx_shard($self, $num % $self->{shards});
- $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime, $self);
- my $n = $self->{transact_bytes} += $len;
+sub do_idx ($$$$) {
+ my ($self, $msgref, $mime, $smsg) = @_;
+ $smsg->{ds} //= msg_datestamp($mime->header_obj, $self->{autime});
+ $smsg->{ts} //= msg_timestamp($mime->header_obj, $self->{cotime});
+ $self->{over}->add_overview($mime, $smsg);
+ my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
+ $idx->index_raw($msgref, $mime, $smsg);
+ my $n = $self->{transact_bytes} += $smsg->{bytes};
$n >= (PublicInbox::SearchIdx::BATCH_BYTES * $self->{shards});
}
defined $num or return; # duplicate
defined $mid0 or die "BUG: $mid0 undefined\n";
my $im = $self->importer;
- my $cmt = $im->add($mime);
+ my $smsg = bless { mid => $mid0, num => $num }, 'PublicInbox::Smsg';
+ my $cmt = $im->add($mime, undef, $smsg); # sets $smsg->{ds|ts|blob}
$cmt = $im->get_mark($cmt);
$self->{last_commit}->[$self->{epoch_max}] = $cmt;
- my ($oid, $len, $msgref) = @{$im->{last_object}};
- if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) {
+ my $msgref = delete $smsg->{-raw_email};
+ if (do_idx($self, $msgref, $mime, $smsg)) {
$self->checkpoint;
}
}
# make sure we really got the OID:
- my ($oid, $type, $len) = $self->{-inbox}->git->check($expect_oid);
- $oid eq $expect_oid or die "BUG: $expect_oid not found after replace";
+ my ($blob, $type, $bytes) = $self->{-inbox}->git->check($expect_oid);
+ $blob eq $expect_oid or die "BUG: $expect_oid not found after replace";
# don't leak FDs to Xapian:
$self->{-inbox}->git->cleanup;
# reindex modified messages:
for my $smsg (@$need_reindex) {
- my $num = $smsg->{num};
- my $mid0 = $smsg->{mid};
- do_idx($self, \$raw, $new_mime, $len, $num, $oid, $mid0);
+ my $new_smsg = bless {
+ blob => $blob,
+ bytes => $bytes,
+ num => $smsg->{num},
+ mid => $smsg->{mid},
+ }, 'PublicInbox::Smsg';
+ do_idx($self, \$raw, $new_mime, $new_smsg);
}
$rewritten->{rewrites};
}
sub git_init {
my ($self, $epoch) = @_;
my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git";
- my @cmd = (qw(git init --bare -q), $git_dir);
- PublicInbox::Import::run_die(\@cmd);
- @cmd = (qw/git config/, "--file=$git_dir/config",
+ PublicInbox::Import::init_bare($git_dir);
+ my @cmd = (qw/git config/, "--file=$git_dir/config",
'include.path', '../../all.git/config');
PublicInbox::Import::run_die(\@cmd);
fill_alternates($self, $epoch);
my ($self, $git, $packed_bytes, $tmp) = @_;
my $im = PublicInbox::Import->new($git, undef, undef, $self->{-inbox});
$im->{bytes_added} = int($packed_bytes / $PACKING_FACTOR);
- $im->{want_object_info} = 1;
$im->{lock_path} = undef;
$im->{path_type} = 'v2';
$self->{im} = $im unless $tmp;
sub mark_deleted ($$$$) {
my ($self, $sync, $git, $oid) = @_;
+ return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
my $msgref = $git->cat_file($oid);
my $mime = PublicInbox::MIME->new($$msgref);
my $mids = mids($mime->header_obj);
}
}
$sync->{nr}++;
- if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) {
+ my $smsg = bless {
+ bytes => $len,
+ num => $num,
+ blob => $oid,
+ mid => $mid0,
+ }, 'PublicInbox::Smsg';
+ if (do_idx($self, $msgref, $mime, $smsg)) {
reindex_checkpoint($self, $sync, $git);
}
}
}
}
-# reuse Msgmap to store num => oid mapping (rather than num => mid)
-sub multi_mid_q_new () {
- my ($fh, $fn) = tempfile('multi_mid-XXXXXXX', EXLOCK => 0, TMPDIR => 1);
- my $multi_mid = PublicInbox::Msgmap->new_file($fn, 1);
- $multi_mid->{dbh}->do('PRAGMA synchronous = OFF');
- # for Msgmap->DESTROY:
- $multi_mid->{tmp_name} = $fn;
- $multi_mid->{pid} = $$;
- close $fh or die "failed to close $fn: $!";
- $multi_mid
-}
-
-sub multi_mid_q_push ($$) {
- my ($sync, $oid) = @_;
- my $multi_mid = $sync->{multi_mid} //= multi_mid_q_new();
+sub multi_mid_q_push ($$$) {
+ my ($self, $sync, $oid) = @_;
+ my $multi_mid = $sync->{multi_mid} //= PublicInbox::MultiMidQueue->new;
if ($sync->{reindex}) { # no regen on reindex
- $multi_mid->mid_insert($oid);
+ $multi_mid->push_oid($oid, $self);
} else {
my $num = $sync->{regen}--;
die "BUG: ran out of article numbers" if $num <= 0;
- $multi_mid->mid_set($num, $oid);
+ $multi_mid->set_oid($num, $oid, $self);
}
}
sub reindex_oid ($$$$) {
my ($self, $sync, $git, $oid) = @_;
+ return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
my ($num, $mid0, $len);
my $msgref = $git->cat_file($oid, \$len);
return if $len == 0; # purged
# do not delete from {mm_tmp}, since another
# single-MID message may use it.
} else { # handle them at the end:
- multi_mid_q_push($sync, $oid);
+ multi_mid_q_push($self, $sync, $oid);
}
return;
}
$sync->{mm_tmp}->mid_delete($mid0) or
die "failed to delete <$mid0> for article #$num\n";
$sync->{nr}++;
- if (do_idx($self, $msgref, $mime, $len, $num, $oid, $mid0)) {
+ my $smsg = bless {
+ bytes => $len,
+ num => $num,
+ blob => $oid,
+ mid => $mid0,
+ }, 'PublicInbox::Smsg';
+ if (do_idx($self, $msgref, $mime, $smsg)) {
reindex_checkpoint($self, $sync, $git);
}
}
}
if (my $multi_mid = delete $sync->{multi_mid}) {
$git //= $self->{-inbox}->git;
- my ($min, $max) = $multi_mid->minmax;
+ my $min = $multi_mid->{min};
+ my $max = $multi_mid->{max};
if ($sync->{reindex}) {
# we may need to create new Message-IDs if mirrors
# were initially indexed with old versions
for (my $i = $max; $i >= $min; $i--) {
- my $oid = $multi_mid->mid_for($i);
+ my $oid;
+ $oid = $multi_mid->get_oid($i, $self) or next;
next unless defined $oid;
reindex_oid_m($self, $sync, $git, $oid);
}
} else { # regen on initial index
for my $num ($min..$max) {
- my $oid = $multi_mid->mid_for($num);
- next unless defined $oid;
+ my $oid;
+ $oid = $multi_mid->get_oid($num, $self) or next;
reindex_oid_m($self, $sync, $git, $oid, $num);
}
}