X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FImport.pm;h=2c4bad92996dde45489c84a590cc4cc94f6ebf59;hb=2984ff86d913c3a9a9f53e67e141f7a39bf77160;hp=29c482f9b4339b09089421723f3991b24e57f553;hpb=cf35d38e7f845393659dfce0249a76d529a2c92c;p=public-inbox.git diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 29c482f9..2c4bad92 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -2,8 +2,9 @@ # License: AGPL-3.0+ # # git fast-import-based ssoma-mda MDA replacement -# This is only ever run by public-inbox-mda and public-inbox-learn, -# not the WWW or NNTP code which only requires read-only access. +# This is only ever run by public-inbox-mda, public-inbox-learn +# and public-inbox-watch. Not the WWW or NNTP code which only +# requires read-only access. package PublicInbox::Import; use strict; use warnings; @@ -17,19 +18,22 @@ use PublicInbox::MDA; use POSIX qw(strftime); sub new { + # we can't change arg order, this is documented in POD + # and external projects may rely on it: my ($class, $git, $name, $email, $ibx) = @_; my $ref = 'refs/heads/master'; if ($ibx) { $ref = $ibx->{ref_head} || 'refs/heads/master'; $name ||= $ibx->{name}; $email ||= $ibx->{-primary_address}; + $git ||= $ibx->git; } bless { git => $git, ident => "$name <$email>", mark => 1, ref => $ref, - inbox => $ibx, + -inbox => $ibx, path_type => '2/38', # or 'v2' lock_path => "$git->{git_dir}/ssoma.lock", # v2 changes this bytes_added => 0, @@ -102,7 +106,7 @@ sub _cat_blob ($$$) { local $/ = "\n"; my $info = <$r>; defined $info or die "EOF from fast-import / cat-blob: $!"; - $info =~ /\A[a-f0-9]{40} blob (\d+)\n\z/ or return; + $info =~ /\A[a-f0-9]{40} blob ([0-9]+)\n\z/ or return; my $left = $1; my $offset = 0; my $buf = ''; @@ -177,8 +181,8 @@ sub _update_git_info ($$) { run_die([@cmd, 'update-server-info'], undef); ($self->{path_type} eq '2/38') and eval { require PublicInbox::SearchIdx; - my $inbox = $self->{inbox} || $git_dir; - my $s = PublicInbox::SearchIdx->new($inbox); + my $ibx = $self->{-inbox} || $git_dir; + my $s = PublicInbox::SearchIdx->new($ibx); $s->index_sync({ ref => $self->{ref} }); }; eval { run_die([@cmd, qw(gc --auto)], undef) } if $do_gc; @@ -363,10 +367,14 @@ sub add { my @ct = msg_timestamp($hdr); my $author_time_raw = git_timestamp(@at); my $commit_time_raw = git_timestamp(@ct); + my $subject = $mime->header('Subject'); $subject = '(no subject)' unless defined $subject; - my $path_type = $self->{path_type}; + # Mime decoding can create nulls replace them with spaces to protect git + $subject =~ tr/\0/ /; + utf8::encode($subject); + my $path_type = $self->{path_type}; my $path; if ($path_type eq '2/38') { $path = mid2path(v1_mid0($mime)); @@ -407,9 +415,6 @@ sub add { print $w "reset $ref\n" or wfail; } - # Mime decoding can create nulls replace them with spaces to protect git - $subject =~ tr/\0/ /; - utf8::encode($subject); print $w "commit $ref\nmark :$commit\n", "author $name <$email> $author_time_raw\n", "committer $self->{ident} $commit_time_raw\n" or wfail; @@ -432,6 +437,16 @@ sub run_die ($;$$) { $? == 0 or die join(' ', @$cmd) . " failed: $?\n"; } +sub init_bare { + my ($dir) = @_; + my @cmd = (qw(git init --bare -q), $dir); + run_die(\@cmd); + # set a reasonable default: + @cmd = (qw/git config/, "--file=$dir/config", + 'repack.writeBitmaps', 'true'); + run_die(\@cmd); +} + sub done { my ($self) = @_; my $w = delete $self->{out} or return; @@ -478,9 +493,9 @@ sub clean_purge_buffer { foreach my $i (0..$#$buf) { my $l = $buf->[$i]; - if ($l =~ /^author .* (\d+ [\+-]?\d+)$/) { + if ($l =~ /^author .* ([0-9]+ [\+-]?[0-9]+)$/) { $buf->[$i] = "author <> $1\n"; - } elsif ($l =~ /^data (\d+)/) { + } elsif ($l =~ /^data ([0-9]+)/) { $buf->[$i++] = "data " . length($cmt_msg) . "\n"; $buf->[$i] = $cmt_msg; last; @@ -494,7 +509,7 @@ sub purge_oids { my $old = $self->{'ref'}; my $git = $self->{git}; my @export = (qw(fast-export --no-data --use-done-feature), $old); - my ($rd, $pid) = $git->popen(@export); + my $rd = $git->popen(@export); my ($r, $w) = $self->gfi_start; my @buf; my $npurge = 0; @@ -510,7 +525,7 @@ sub purge_oids { @buf = (); } push @buf, "commit $tmp\n"; - } elsif (/^data (\d+)/) { + } elsif (/^data ([0-9]+)/) { # only commit message, so $len is small: my $len = $1; # + 1 for trailing "\n" push @buf, $_; @@ -542,13 +557,14 @@ sub purge_oids { @buf = (); } elsif ($_ eq "done\n") { $done = 1; - } elsif (/^mark :(\d+)$/) { + } elsif (/^mark :([0-9]+)$/) { push @buf, $_; $mark = $1; } else { push @buf, $_; } } + close $rd or die "close fast-export failed: $?"; if (@buf) { $w->print(@buf) or wfail; } @@ -584,7 +600,7 @@ __END__ =head1 NAME -PublicInbox::Import - message importer for public-inbox +PublicInbox::Import - message importer for public-inbox v1 inboxes =head1 VERSION