use base qw(PublicInbox::Lock);
use 5.010_001;
use PublicInbox::SearchIdxShard;
-use PublicInbox::MIME;
+use PublicInbox::Eml;
use PublicInbox::Git;
use PublicInbox::Import;
use PublicInbox::MID qw(mids references);
use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::SearchIdx;
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
+use PublicInbox::MultiMidQueue;
use IO::Handle; # ->autoflush
use File::Temp qw(tempfile);
last_commit => [], # git repo -> commit
};
$self->{shards} = count_shards($self) || nproc_shards($creat);
+ $self->{index_max_size} = $v2ibx->{index_max_size};
bless $self, $class;
}
my ($mime) = @_;
my @cids = ( content_id($mime) );
+ # We still support Email::MIME, here, and
# Email::MIME->as_string doesn't always round-trip, so we may
# use a second content_id
- my $rt = content_id(PublicInbox::MIME->new(\($mime->as_string)));
+ my $rt = content_id(PublicInbox::Eml->new(\($mime->as_string)));
push @cids, $rt if $cids[0] ne $rt;
\@cids;
}
next; # continue
}
my $orig = $$msg;
- my $cur = PublicInbox::MIME->new($msg);
+ my $cur = PublicInbox::Eml->new($msg);
if (content_matches($cids, $cur)) {
$gone{$smsg->{num}} = [ $smsg, $cur, \$orig ];
}
sub git_init {
my ($self, $epoch) = @_;
my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git";
- my @cmd = (qw(git init --bare -q), $git_dir);
- PublicInbox::Import::run_die(\@cmd);
- @cmd = (qw/git config/, "--file=$git_dir/config",
+ PublicInbox::Import::init_bare($git_dir);
+ my @cmd = (qw/git config/, "--file=$git_dir/config",
'include.path', '../../all.git/config');
PublicInbox::Import::run_die(\@cmd);
fill_alternates($self, $epoch);
warn "broken smsg for $mid\n";
next;
}
- my $cur = PublicInbox::MIME->new($msg);
+ my $cur = PublicInbox::Eml->new($msg);
return 1 if content_matches($cids, $cur);
# XXX DEBUG_DIFF is experimental and may be removed
sub mark_deleted ($$$$) {
my ($self, $sync, $git, $oid) = @_;
+ return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
my $msgref = $git->cat_file($oid);
- my $mime = PublicInbox::MIME->new($$msgref);
+ my $mime = PublicInbox::Eml->new($$msgref);
my $mids = mids($mime->header_obj);
my $cid = content_id($mime);
foreach my $mid (@$mids) {
$self->{current_info} = "multi_mid $oid";
my ($num, $mid0, $len);
my $msgref = $git->cat_file($oid, \$len);
- my $mime = PublicInbox::MIME->new($$msgref);
+ my $mime = PublicInbox::Eml->new($$msgref);
my $mids = mids($mime->header_obj);
my $cid = content_id($mime);
die "BUG: reindex_oid_m called for <=1 mids" if scalar(@$mids) <= 1;
}
}
-# reuse Msgmap to store num => oid mapping (rather than num => mid)
-sub multi_mid_q_new () {
- my ($fh, $fn) = tempfile('multi_mid-XXXXXXX', EXLOCK => 0, TMPDIR => 1);
- my $multi_mid = PublicInbox::Msgmap->new_file($fn, 1);
- $multi_mid->{dbh}->do('PRAGMA synchronous = OFF');
- # for Msgmap->DESTROY:
- $multi_mid->{tmp_name} = $fn;
- $multi_mid->{pid} = $$;
- close $fh or die "failed to close $fn: $!";
- $multi_mid
-}
-
-sub multi_mid_q_push ($$) {
- my ($sync, $oid) = @_;
- my $multi_mid = $sync->{multi_mid} //= multi_mid_q_new();
+sub multi_mid_q_push ($$$) {
+ my ($self, $sync, $oid) = @_;
+ my $multi_mid = $sync->{multi_mid} //= PublicInbox::MultiMidQueue->new;
if ($sync->{reindex}) { # no regen on reindex
- $multi_mid->mid_insert($oid);
+ $multi_mid->push_oid($oid, $self);
} else {
my $num = $sync->{regen}--;
die "BUG: ran out of article numbers" if $num <= 0;
- $multi_mid->mid_set($num, $oid);
+ $multi_mid->set_oid($num, $oid, $self);
}
}
sub reindex_oid ($$$$) {
my ($self, $sync, $git, $oid) = @_;
+ return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
my ($num, $mid0, $len);
my $msgref = $git->cat_file($oid, \$len);
return if $len == 0; # purged
- my $mime = PublicInbox::MIME->new($$msgref);
+ my $mime = PublicInbox::Eml->new($$msgref);
my $mids = mids($mime->header_obj);
my $cid = content_id($mime);
# do not delete from {mm_tmp}, since another
# single-MID message may use it.
} else { # handle them at the end:
- multi_mid_q_push($sync, $oid);
+ multi_mid_q_push($self, $sync, $oid);
}
return;
}
my ($self, $git, $oid, $unindexed) = @_;
my $mm = $self->{mm};
my $msgref = $git->cat_file($oid);
- my $mime = PublicInbox::MIME->new($msgref);
+ my $mime = PublicInbox::Eml->new($msgref);
my $mids = mids($mime->header_obj);
$mime = $msgref = undef;
my $over = $self->{over};
}
if (my $multi_mid = delete $sync->{multi_mid}) {
$git //= $self->{-inbox}->git;
- my ($min, $max) = $multi_mid->minmax;
+ my $min = $multi_mid->{min};
+ my $max = $multi_mid->{max};
if ($sync->{reindex}) {
# we may need to create new Message-IDs if mirrors
# were initially indexed with old versions
for (my $i = $max; $i >= $min; $i--) {
- my $oid = $multi_mid->mid_for($i);
+ my $oid;
+ $oid = $multi_mid->get_oid($i, $self) or next;
next unless defined $oid;
reindex_oid_m($self, $sync, $git, $oid);
}
} else { # regen on initial index
for my $num ($min..$max) {
- my $oid = $multi_mid->mid_for($num);
- next unless defined $oid;
+ my $oid;
+ $oid = $multi_mid->get_oid($num, $self) or next;
reindex_oid_m($self, $sync, $git, $oid, $num);
}
}