$fetching = $min = $docid;
my $smsg = $ibx->over->get_art($xnum);
- my $oidhex = unpack('H*', $oidbin);
my $err;
if (!$smsg) {
$err = 'stale';
- } elsif ($smsg->{blob} ne $oidhex) {
+ } elsif (pack('H*', $smsg->{blob}) ne $oidbin) {
$err = "mismatch (!= $smsg->{blob})";
} else {
next; # likely, all good
}
# current_info already has eidx_key
+ my $oidhex = unpack('H*', $oidbin);
warn "$xnum:$oidhex (#$docid): $err\n";
my $del = $self->{oidx}->dbh->prepare_cached(<<'');
DELETE FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ?
}
}
-sub eidx_dedupe ($$) {
- my ($self, $sync) = @_;
+sub eidx_dedupe ($$$) {
+ my ($self, $sync, $msgids) = @_;
$sync->{dedupe_cull} = 0;
my $candidates = 0;
my $nr_mid = 0;
return unless eidxq_lock_acquire($self);
- my $iter;
+ my ($iter, $cur_mid);
my $min_id = 0;
- local $sync->{-regen_fmt} = "dedupe %u/".$self->{oidx}->max."\n";
+ my $idx = 0;
+ my ($max_id) = $self->{oidx}->dbh->selectrow_array(<<EOS);
+SELECT MAX(id) FROM msgid
+EOS
+ local $sync->{-regen_fmt} = "dedupe %u/$max_id\n";
# note: we could write this query more intelligently,
# but that causes lock contention with read-only processes
dedupe_restart:
- $iter = $self->{oidx}->dbh->prepare(<<EOS);
+ $cur_mid = $msgids->[$idx];
+ if ($cur_mid eq '') { # all Message-IDs
+ $iter = $self->{oidx}->dbh->prepare(<<EOS);
SELECT mid,id FROM msgid WHERE id > ? ORDER BY id ASC
EOS
- $iter->execute($min_id);
+ $iter->execute($min_id);
+ } else {
+ $iter = $self->{oidx}->dbh->prepare(<<EOS);
+SELECT mid,id FROM msgid WHERE mid = ? AND id > ? ORDER BY id ASC
+EOS
+ $iter->execute($cur_mid, $min_id);
+ }
while (my ($mid, $id) = $iter->fetchrow_array) {
last if $sync->{quit};
$self->{current_info} = "dedupe $mid";
};
$self->git->cat_async($smsg->{blob}, \&dd_smsg, $dd);
}
- # need to wait on every single one
+ # need to wait on every single one @smsg contents can get
+ # invalidated inside dd_smsg for messages with multiple
+ # Message-IDs.
$self->git->async_wait_all;
if (checkpoint_due($sync)) {
goto dedupe_restart;
}
}
+ goto dedupe_restart if defined($msgids->[++$idx]);
+
my $n = delete $sync->{dedupe_cull};
if (my $pr = $sync->{-opt}->{-progress}) {
$pr->("culled $n/$candidates candidates ($nr_mid msgids)\n");
for my $ibx (@{ibx_sorted($self)}) {
$ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key);
}
- if (delete($opt->{dedupe})) {
+ if (my $msgids = delete($opt->{dedupe})) {
local $sync->{checkpoint_unlocks} = 1;
- eidx_dedupe($self, $sync);
+ eidx_dedupe($self, $sync, $msgids);
}
if (delete($opt->{reindex})) {
local $sync->{checkpoint_unlocks} = 1;