use base qw(PublicInbox::Lock);
use 5.010_001;
use PublicInbox::SearchIdxShard;
-use PublicInbox::MIME;
+use PublicInbox::Eml;
use PublicInbox::Git;
use PublicInbox::Import;
use PublicInbox::MID qw(mids references);
-use PublicInbox::ContentId qw(content_id content_digest);
+use PublicInbox::ContentHash qw(content_hash content_digest);
use PublicInbox::Inbox;
use PublicInbox::OverIdx;
use PublicInbox::Msgmap;
last_commit => [], # git repo -> commit
};
$self->{shards} = count_shards($self) || nproc_shards($creat);
+ $self->{index_max_size} = $v2ibx->{index_max_size};
bless $self, $class;
}
$rewrites;
}
-sub content_ids ($) {
+sub content_hashes ($) {
my ($mime) = @_;
- my @cids = ( content_id($mime) );
+ my @chashes = ( content_hash($mime) );
+ # We still support Email::MIME, here, and
# Email::MIME->as_string doesn't always round-trip, so we may
- # use a second content_id
- my $rt = content_id(PublicInbox::MIME->new(\($mime->as_string)));
- push @cids, $rt if $cids[0] ne $rt;
- \@cids;
+ # use a second content_hash
+ my $rt = content_hash(PublicInbox::Eml->new(\($mime->as_string)));
+ push @chashes, $rt if $chashes[0] ne $rt;
+ \@chashes;
}
sub content_matches ($$) {
- my ($cids, $existing) = @_;
- my $cid = content_id($existing);
- foreach (@$cids) {
- return 1 if $_ eq $cid
+ my ($chashes, $existing) = @_;
+ my $chash = content_hash($existing);
+ foreach (@$chashes) {
+ return 1 if $_ eq $chash
}
0
}
$im = $self->importer;
}
my $over = $self->{over};
- my $cids = content_ids($old_mime);
+ my $chashes = content_hashes($old_mime);
my @removed;
my $mids = mids($old_mime->header_obj);
# We avoid introducing new blobs into git since the raw content
# can be slightly different, so we do not need the user-supplied
- # message now that we have the mids and content_id
+ # message now that we have the mids and content_hash
$old_mime = undef;
my $mark;
next; # continue
}
my $orig = $$msg;
- my $cur = PublicInbox::MIME->new($msg);
- if (content_matches($cids, $cur)) {
+ my $cur = PublicInbox::Eml->new($msg);
+ if (content_matches($chashes, $cur)) {
$gone{$smsg->{num}} = [ $smsg, $cur, \$orig ];
}
}
sub git_init {
my ($self, $epoch) = @_;
my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git";
- my @cmd = (qw(git init --bare -q), $git_dir);
- PublicInbox::Import::run_die(\@cmd);
- @cmd = (qw/git config/, "--file=$git_dir/config",
+ PublicInbox::Import::init_bare($git_dir);
+ my @cmd = (qw/git config/, "--file=$git_dir/config",
'include.path', '../../all.git/config');
PublicInbox::Import::run_die(\@cmd);
fill_alternates($self, $epoch);
sub content_exists ($$$) {
my ($self, $mime, $mid) = @_;
my $over = $self->{over};
- my $cids = content_ids($mime);
+ my $chashes = content_hashes($mime);
my ($id, $prev);
while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
my $msg = get_blob($self, $smsg);
warn "broken smsg for $mid\n";
next;
}
- my $cur = PublicInbox::MIME->new($msg);
- return 1 if content_matches($cids, $cur);
+ my $cur = PublicInbox::Eml->new($msg);
+ return 1 if content_matches($chashes, $cur);
# XXX DEBUG_DIFF is experimental and may be removed
diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF};
sub mark_deleted ($$$$) {
my ($self, $sync, $git, $oid) = @_;
+ return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
my $msgref = $git->cat_file($oid);
- my $mime = PublicInbox::MIME->new($$msgref);
+ my $mime = PublicInbox::Eml->new($$msgref);
my $mids = mids($mime->header_obj);
- my $cid = content_id($mime);
+ my $chash = content_hash($mime);
foreach my $mid (@$mids) {
- $sync->{D}->{"$mid\0$cid"} = $oid;
+ $sync->{D}->{"$mid\0$chash"} = $oid;
}
}
$self->{current_info} = "multi_mid $oid";
my ($num, $mid0, $len);
my $msgref = $git->cat_file($oid, \$len);
- my $mime = PublicInbox::MIME->new($$msgref);
+ my $mime = PublicInbox::Eml->new($$msgref);
my $mids = mids($mime->header_obj);
- my $cid = content_id($mime);
+ my $chash = content_hash($mime);
die "BUG: reindex_oid_m called for <=1 mids" if scalar(@$mids) <= 1;
for my $mid (reverse @$mids) {
- delete($sync->{D}->{"$mid\0$cid"}) and
+ delete($sync->{D}->{"$mid\0$chash"}) and
die "BUG: reindex_oid should handle <$mid> delete";
}
my $over = $self->{over};
}
}
-# reuse Msgmap to store num => oid mapping (rather than num => mid)
-sub multi_mid_q_new () {
- my ($fh, $fn) = tempfile('multi_mid-XXXXXXX', EXLOCK => 0, TMPDIR => 1);
- my $multi_mid = PublicInbox::Msgmap->new_file($fn, 1);
- $multi_mid->{dbh}->do('PRAGMA synchronous = OFF');
- # for Msgmap->DESTROY:
- $multi_mid->{tmp_name} = $fn;
- $multi_mid->{pid} = $$;
- close $fh or die "failed to close $fn: $!";
- $multi_mid
-}
-
sub multi_mid_q_push ($$$) {
my ($self, $sync, $oid) = @_;
my $multi_mid = $sync->{multi_mid} //= PublicInbox::MultiMidQueue->new;
sub reindex_oid ($$$$) {
my ($self, $sync, $git, $oid) = @_;
+ return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
my ($num, $mid0, $len);
my $msgref = $git->cat_file($oid, \$len);
return if $len == 0; # purged
- my $mime = PublicInbox::MIME->new($$msgref);
+ my $mime = PublicInbox::Eml->new($$msgref);
my $mids = mids($mime->header_obj);
- my $cid = content_id($mime);
+ my $chash = content_hash($mime);
if (scalar(@$mids) == 0) {
warn "E: $oid has no Message-ID, skipping\n";
my $mid = $mids->[0];
# was the file previously marked as deleted?, skip if so
- if (delete($sync->{D}->{"$mid\0$cid"})) {
+ if (delete($sync->{D}->{"$mid\0$chash"})) {
if (!$sync->{reindex}) {
$num = $sync->{regen}--;
$self->{mm}->num_highwater($num);
} else { # multiple MIDs are a weird case:
my $del = 0;
for (@$mids) {
- $del += delete($sync->{D}->{"$_\0$cid"}) // 0;
+ $del += delete($sync->{D}->{"$_\0$chash"}) // 0;
}
if ($del) {
unindex_oid_remote($self, $oid, $_) for @$mids;
my ($self, $git, $oid, $unindexed) = @_;
my $mm = $self->{mm};
my $msgref = $git->cat_file($oid);
- my $mime = PublicInbox::MIME->new($msgref);
+ my $mime = PublicInbox::Eml->new($msgref);
my $mids = mids($mime->header_obj);
$mime = $msgref = undef;
my $over = $self->{over};
return unless defined $latest;
$self->idx_init($opt); # acquire lock
my $sync = {
- D => {}, # "$mid\0$cid" => $oid
+ D => {}, # "$mid\0$chash" => $oid
unindex_range => {}, # EPOCH => oid_old..oid_new
reindex => $opt->{reindex},
-opt => $opt