This should help us detect bugs in our code or storage
synchronization problems more easily. This probably won't
detect corrupted git storage, but can detect corrupted SQLite
files.
"Bad blobs, bad blobs, whatcha gonna do when they come for you?"
use parent qw(PublicInbox::ExtSearch PublicInbox::Lock);
use Carp qw(croak carp);
use PublicInbox::Search;
use parent qw(PublicInbox::ExtSearch PublicInbox::Lock);
use Carp qw(croak carp);
use PublicInbox::Search;
-use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor);
+use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor
+ is_bad_blob);
use PublicInbox::OverIdx;
use PublicInbox::MiscIdx;
use PublicInbox::MID qw(mids);
use PublicInbox::OverIdx;
use PublicInbox::MiscIdx;
use PublicInbox::MID qw(mids);
$cfg->each_inbox(\&_ibx_attach, $self);
}
$cfg->each_inbox(\&_ibx_attach, $self);
}
-sub is_bad_blob ($$$$) {
- my ($oid, $type, $size, $expect_oid) = @_;
- if ($type ne 'blob') {
- carp "W: $expect_oid is not a blob (type=$type)";
- return 1;
- }
- croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid;
- $size == 0 ? 1 : 0; # size == 0 means purged
-}
-
sub check_batch_limit ($) {
my ($req) = @_;
my $self = $req->{self};
sub check_batch_limit ($) {
my ($req) = @_;
my $self = $req->{self};
use PublicInbox::MID qw(mids_for_index mids);
use PublicInbox::MsgIter;
use PublicInbox::IdxStack;
use PublicInbox::MID qw(mids_for_index mids);
use PublicInbox::MsgIter;
use PublicInbox::IdxStack;
+use Carp qw(croak carp);
use POSIX qw(strftime);
use Time::Local qw(timegm);
use PublicInbox::OverIdx;
use POSIX qw(strftime);
use Time::Local qw(timegm);
use PublicInbox::OverIdx;
use PublicInbox::Git qw(git_unquote);
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack
use PublicInbox::Git qw(git_unquote);
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack
- index_text term_generator add_val);
+ index_text term_generator add_val is_bad_blob);
my $X = \%PublicInbox::Search::X;
our ($DB_CREATE_OR_OPEN, $DB_OPEN);
our $DB_NO_SYNC = 0;
my $X = \%PublicInbox::Search::X;
our ($DB_CREATE_OR_OPEN, $DB_OPEN);
our $DB_NO_SYNC = 0;
+sub is_bad_blob ($$$$) {
+ my ($oid, $type, $size, $expect_oid) = @_;
+ if ($type ne 'blob') {
+ carp "W: $expect_oid is not a blob (type=$type)";
+ return 1;
+ }
+ croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid;
+ $size == 0 ? 1 : 0; # size == 0 means purged
+}
+
sub index_both { # git->cat_async callback
my ($bref, $oid, $type, $size, $sync) = @_;
sub index_both { # git->cat_async callback
my ($bref, $oid, $type, $size, $sync) = @_;
+ return if is_bad_blob($oid, $type, $size, $sync->{oid});
my ($nr, $max) = @$sync{qw(nr max)};
++$$nr;
$$max -= $size;
my ($nr, $max) = @$sync{qw(nr max)};
++$$nr;
$$max -= $size;
sub unindex_both { # git->cat_async callback
my ($bref, $oid, $type, $size, $sync) = @_;
sub unindex_both { # git->cat_async callback
my ($bref, $oid, $type, $size, $sync) = @_;
+ return if is_bad_blob($oid, $type, $size, $sync->{oid});
unindex_eml($sync->{sidx}, $oid, PublicInbox::Eml->new($bref));
# may be undef if leftover
if (defined(my $cur_cmt = $sync->{cur_cmt})) {
unindex_eml($sync->{sidx}, $oid, PublicInbox::Eml->new($bref));
# may be undef if leftover
if (defined(my $cur_cmt = $sync->{cur_cmt})) {
$sync->{index_oid} = \&index_both;
}
while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) {
$sync->{index_oid} = \&index_both;
}
while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) {
- my $arg = { %$sync, cur_cmt => $cur_cmt };
+ my $arg = { %$sync, cur_cmt => $cur_cmt, oid => $oid };
last if $sync->{quit};
if ($f eq 'm') {
$arg->{autime} = $at;
last if $sync->{quit};
if ($f eq 'm') {
$arg->{autime} = $at;
use PublicInbox::OverIdx;
use PublicInbox::Msgmap;
use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::OverIdx;
use PublicInbox::Msgmap;
use PublicInbox::Spawn qw(spawn popen_rd);
-use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size);
+use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size
+ is_bad_blob);
use IO::Handle; # ->autoflush
use File::Temp ();
use IO::Handle; # ->autoflush
use File::Temp ();
sub index_oid { # cat_async callback
my ($bref, $oid, $type, $size, $arg) = @_;
sub index_oid { # cat_async callback
my ($bref, $oid, $type, $size, $arg) = @_;
+ return if is_bad_blob($oid, $type, $size, $arg->{oid});
my $self = $arg->{self};
local $self->{current_info} = "$self->{current_info} $oid";
return if $size == 0; # purged
my $self = $arg->{self};
local $self->{current_info} = "$self->{current_info} $oid";
return if $size == 0; # purged
sub unindex_oid ($$;$) { # git->cat_async callback
my ($bref, $oid, $type, $size, $sync) = @_;
sub unindex_oid ($$;$) { # git->cat_async callback
my ($bref, $oid, $type, $size, $sync) = @_;
+ return if is_bad_blob($oid, $type, $size, $sync->{oid});
my $self = $sync->{self};
local $self->{current_info} = "$self->{current_info} $oid";
my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
my $self = $sync->{self};
local $self->{current_info} = "$self->{current_info} $oid";
my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;