Xapian memory usage is tied to the size of the indexed
text, so take the raw message size into account when
deciding when to flush Xapian data.
More importantly, we now flush Xapian before we have it
buffer beyond our maximum; and we do it unconditionally
to prevent even high priority processes from OOM-ing.
use POSIX qw(strftime);
require PublicInbox::Git;
use POSIX qw(strftime);
require PublicInbox::Git;
-use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
+ MAX_MID_SIZE => 244, # max term size - 1 in Xapian
PERM_UMASK => 0,
OLD_PERM_GROUP => 1,
OLD_PERM_EVERYBODY => 2,
PERM_GROUP => 0660,
PERM_EVERYBODY => 0664,
PERM_UMASK => 0,
OLD_PERM_GROUP => 1,
OLD_PERM_EVERYBODY => 2,
PERM_GROUP => 0660,
PERM_EVERYBODY => 0664,
+ BATCH_BYTES => 1_000_000,
require File::Path;
_lock_acquire($self);
File::Path::mkpath($dir);
require File::Path;
_lock_acquire($self);
File::Path::mkpath($dir);
- $self->{batch_size} = 100;
$flag = Search::Xapian::DB_CREATE_OR_OPEN;
}
$self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
$flag = Search::Xapian::DB_CREATE_OR_OPEN;
}
$self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
with_umask($self, sub { $self->_index_sync($opts) });
}
with_umask($self, sub { $self->_index_sync($opts) });
}
+sub batch_adjust ($$$$) {
+ my ($max, $bytes, $batch_cb, $latest) = @_;
+ $$max -= $bytes;
+ if ($$max <= 0) {
+ $$max = BATCH_BYTES;
+ $batch_cb->($latest, 1);
+ }
+}
+
sub rlog {
my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_;
my $hex = '[a-f0-9]';
sub rlog {
my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_;
my $hex = '[a-f0-9]';
my $git = $self->{git};
my $latest;
my $bytes;
my $git = $self->{git};
my $latest;
my $bytes;
- my $max = $self->{batch_size}; # may be undef
local $/ = "\n";
my $line;
while (defined($line = <$log>)) {
if ($line =~ /$addmsg/o) {
my $blob = $1;
my $mime = do_cat_mail($git, $blob, \$bytes) or next;
local $/ = "\n";
my $line;
while (defined($line = <$log>)) {
if ($line =~ /$addmsg/o) {
my $blob = $1;
my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+ batch_adjust(\$max, $bytes, $batch_cb, $latest);
$add_cb->($self, $mime, $bytes, $blob);
} elsif ($line =~ /$delmsg/o) {
my $blob = $1;
$add_cb->($self, $mime, $bytes, $blob);
} elsif ($line =~ /$delmsg/o) {
my $blob = $1;
- my $mime = do_cat_mail($git, $blob) or next;
+ my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+ batch_adjust(\$max, $bytes, $batch_cb, $latest);
$del_cb->($self, $mime);
} elsif ($line =~ /^commit ($h40)/o) {
$del_cb->($self, $mime);
} elsif ($line =~ /^commit ($h40)/o) {
- if (defined $max && --$max <= 0) {
- $max = $self->{batch_size};
- $batch_cb->($latest, 1);
- }