X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FV2Writable.pm;h=fc2f33f9dd2cd09f48b89cc5d2629ae2f1e6f3b7;hb=95bdac7f09c69036efed537a4d03d5bdd2ae4eb6;hp=77b3bde4d7b35939ba633ae961c671489d5f3146;hpb=cd8dd7b08fddc7c2b5f218c3fcaa5dca5f9ad945;p=public-inbox.git diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 77b3bde4..fc2f33f9 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2019 all contributors +# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ # This interface wraps and mimics PublicInbox::Import @@ -7,6 +7,7 @@ package PublicInbox::V2Writable; use strict; use warnings; use base qw(PublicInbox::Lock); +use 5.010_001; use PublicInbox::SearchIdxShard; use PublicInbox::MIME; use PublicInbox::Git; @@ -16,9 +17,9 @@ use PublicInbox::ContentId qw(content_id content_digest); use PublicInbox::Inbox; use PublicInbox::OverIdx; use PublicInbox::Msgmap; -use PublicInbox::Spawn qw(spawn); +use PublicInbox::Spawn qw(spawn popen_rd); use PublicInbox::SearchIdx; -use IO::Handle; +use IO::Handle; # ->autoflush use File::Temp qw(tempfile); # an estimate of the post-packed size to the raw uncompressed size @@ -32,19 +33,29 @@ my $PACKING_FACTOR = 0.4; # to increase Xapian shards our $NPROC_MAX_DEFAULT = 4; -sub nproc_shards ($) { - my ($creat_opt) = @_; - if (ref($creat_opt) eq 'HASH') { - if (defined(my $n = $creat_opt->{nproc})) { - return $n - } +sub detect_nproc () { + for my $nproc (qw(nproc gnproc)) { # GNU coreutils nproc + `$nproc 2>/dev/null` =~ /^(\d+)$/ and return $1; + } + + # getconf(1) is POSIX, but *NPROCESSORS* vars are not + for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) { + `getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1; } - my $n = $ENV{NPROC}; + # should we bother with `sysctl hw.ncpu`? Those only give + # us total processor count, not online processor count. + undef +} + +sub nproc_shards ($) { + my ($creat_opt) = @_; + my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH'; + $n //= $ENV{NPROC}; if (!$n) { - chomp($n = `nproc 2>/dev/null`); - # assume 2 cores if GNU nproc(1) is not available - $n = 2 if !$n; + # assume 2 cores if not detectable or zero + state $NPROC_DETECTED = detect_nproc() || 2; + $n = $NPROC_DETECTED; $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT; } @@ -112,8 +123,11 @@ sub new { # public (for now?) sub init_inbox { - my ($self, $parallel, $skip_epoch) = @_; - $self->{parallel} = $parallel; + my ($self, $shards, $skip_epoch) = @_; + if (defined $shards) { + $self->{parallel} = 0 if $shards == 0; + $self->{shards} = $shards if $shards > 0; + } $self->idx_init; my $epoch_max = -1; git_dir_latest($self, \$epoch_max); @@ -471,17 +485,12 @@ sub git_hash_raw ($$) { print $tmp_fh $$raw or die "print \$tmp_fh: $!"; sysseek($tmp_fh, 0, 0) or die "seek failed: $!"; - my ($r, $w); - pipe($r, $w) or die "failed to create pipe: $!"; - my $rdr = { 0 => fileno($tmp_fh), 1 => fileno($w) }; my $git_dir = $self->{-inbox}->git->{git_dir}; my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)]; - my $pid = spawn($cmd, undef, $rdr); - close $w; + my $r = popen_rd($cmd, undef, { 0 => $tmp_fh }); local $/ = "\n"; chomp(my $oid = <$r>); - waitpid($pid, 0) == $pid or die "git hash-object did not finish"; - die "git hash-object failed: $?" if $?; + close $r or die "git hash-object failed: $?"; $oid =~ /\A[a-f0-9]{40}\z/ or die "OID not expected: $oid"; $oid; } @@ -668,23 +677,43 @@ sub fill_alternates ($$) { unless (-d $all) { PublicInbox::Import::init_bare($all); } - my $alt = "$all/objects/info/alternates"; - my %alts; - my @add; + my $info_dir = "$all/objects/info"; + my $alt = "$info_dir/alternates"; + my (%alt, $new); + my $mode = 0644; if (-e $alt) { open(my $fh, '<', $alt) or die "open < $alt: $!\n"; - %alts = map { chomp; $_ => 1 } (<$fh>); + $mode = (stat($fh))[2] & 07777; + + # we assign a sort score to every alternate and favor + # the newest (highest numbered) one when we + my $score; + my $other = 0; # in case admin adds non-epoch repos + %alt = map {; + if (m!\A\Q../../\E([0-9]+)\.git/objects\z!) { + $score = $1 + 0; + } else { + $score = --$other; + } + $_ => $score; + } split(/\n+/, do { local $/; <$fh> }); } + foreach my $i (0..$epoch) { my $dir = "../../git/$i.git/objects"; - push @add, $dir if !$alts{$dir} && -d "$pfx/$i.git"; - } - return unless @add; - open my $fh, '>>', $alt or die "open >> $alt: $!\n"; - foreach my $dir (@add) { - print $fh "$dir\n" or die "print >> $alt: $!\n"; + if (!exists($alt{$dir}) && -d "$pfx/$i.git") { + $alt{$dir} = $i; + $new = 1; + } } - close $fh or die "close $alt: $!\n"; + return unless $new; + + my ($fh, $tmp) = tempfile('alt-XXXXXXXX', DIR => $info_dir); + print $fh join("\n", sort { $alt{$b} <=> $alt{$a} } keys %alt), "\n" + or die "print $tmp: $!\n"; + chmod($mode, $fh) or die "fchmod $tmp: $!\n"; + close $fh or die "close $tmp $!\n"; + rename($tmp, $alt) or die "rename $tmp => $alt: $!\n"; } sub git_init { @@ -777,7 +806,6 @@ sub diff ($$$) { my $cmd = [ qw(diff -u), $an, $bn ]; print STDERR "# MID conflict <$mid>\n"; my $pid = spawn($cmd, undef, { 1 => 2 }); - defined $pid or die "diff failed to spawn $!"; waitpid($pid, 0) == $pid or die "diff did not finish"; unlink($an, $bn); }