]> Sergey Matveev's repositories - public-inbox.git/blobdiff - lib/PublicInbox/LeiInput.pm
lei import: speed up repeated Maildir imports
[public-inbox.git] / lib / PublicInbox / LeiInput.pm
index 86f300c3a572438ac01e475ecaca2ad58c419011..24211bf0cb0dff8a056cff8b84bb85438521ebfc 100644 (file)
@@ -1,7 +1,7 @@
 # Copyright (C) 2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# parent class for LeiImport, LeiConvert
+# parent class for LeiImport, LeiConvert, LeiIndex
 package PublicInbox::LeiInput;
 use strict;
 use v5.10.1;
@@ -69,6 +69,12 @@ error reading $name: $!
                # but no Content-Length or "From " escaping.
                # "git format-patch" also generates such files by default.
                $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+
+               # a user may feed just a body: git diff | lei rediff -U9
+               if ($self->{-force_eml}) {
+                       my $eml = PublicInbox::Eml->new($buf);
+                       substr($buf, 0, 0) = "\n\n" if !$eml->{bdy};
+               }
                $self->input_eml_cb(PublicInbox::Eml->new(\$buf), @args);
        } else {
                # prepare_inputs already validated $ifmt
@@ -93,11 +99,7 @@ sub handle_http_input ($$@) {
        my ($fh, $pid) = popen_rd($cmd, undef, $rdr);
        grep(/\A--compressed\z/, @$curl) or
                $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
-       eval {
-               PublicInbox::MboxReader->mboxrd($fh,
-                                               $self->can('input_mbox_cb'),
-                                               $self, @args);
-       };
+       eval { $self->input_fh('mboxrd', $fh, $url, @args) };
        my $err = $@;
        waitpid($pid, 0);
        $? || $err and
@@ -147,11 +149,18 @@ sub input_path_url {
                $self->input_fh($ifmt, $mbl->{fh}, $input, @args);
        } elsif (-d _ && (-d "$input/cur" || -d "$input/new")) {
                return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
-$input appears to be a maildir, not $ifmt
+$input appears to be a maildir, not $ifmt
 EOM
-               PublicInbox::MdirReader->new->maildir_each_eml($input,
-                                       $self->can('input_maildir_cb'),
-                                       $self, @args);
+               my $mdr = PublicInbox::MdirReader->new;
+               if (my $pmd = $self->{pmd}) {
+                       $mdr->maildir_each_file($input,
+                                               $pmd->can('each_mdir_fn'),
+                                               $pmd, @args);
+               } else {
+                       $mdr->maildir_each_eml($input,
+                                               $self->can('input_maildir_cb'),
+                                               $self, @args);
+               }
        } else {
                $lei->fail("$input unsupported (TODO)");
        }
@@ -204,7 +213,7 @@ sub prepare_http_input ($$$) {
 sub prepare_inputs { # returns undef on error
        my ($self, $lei, $inputs) = @_;
        my $in_fmt = $lei->{opt}->{'in-format'};
-       my $sync = $lei->{opt}->{sync} ? {} : undef; # using LeiMailSync
+       my $sync = $lei->{opt}->{'mail-sync'} ? {} : undef; # using LeiMailSync
        if ($lei->{opt}->{stdin}) {
                @$inputs and return
                        $lei->fail("--stdin and @$inputs do not mix");
@@ -213,7 +222,7 @@ sub prepare_inputs { # returns undef on error
                push @{$sync->{no}}, '/dev/stdin' if $sync;
        }
        my $net = $lei->{net}; # NetWriter may be created by l2m
-       my (@f, @d);
+       my (@f, @md);
        # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
        for my $input (@$inputs) {
                my $input_path = $input;
@@ -221,14 +230,10 @@ sub prepare_inputs { # returns undef on error
                        require PublicInbox::NetReader;
                        $net //= PublicInbox::NetReader->new;
                        $net->add_url($input);
-                       if ($sync) {
-                               if ($input =~ m!\Aimaps?://!) {
-                                       push @{$sync->{ok}}, $input;
-                               } else {
-                                       push @{$sync->{no}}, $input;
-                               }
-                       }
-               } elsif ($input_path =~ m!\Ahttps?://!i) {
+                       push @{$sync->{ok}}, $input if $sync;
+               } elsif ($input_path =~ m!\Ahttps?://!i) { # mboxrd.gz
+                       # TODO: how would we detect r/w JMAP?
+                       push @{$sync->{no}}, $input if $sync;
                        prepare_http_input($self, $lei, $input_path) or return;
                } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
                        my $ifmt = lc $1;
@@ -237,12 +242,10 @@ sub prepare_inputs { # returns undef on error
 --in-format=$in_fmt and `$ifmt:' conflict
 
                        }
-                       if ($sync) {
-                               if ($ifmt =~ /\A(?:maildir|mh)\z/i) {
-                                       push @{$sync->{ok}}, $input;
-                               } else {
-                                       push @{$sync->{no}}, $input;
-                               }
+                       if ($ifmt =~ /\A(?:maildir|mh)\z/i) {
+                               push @{$sync->{ok}}, $input if $sync;
+                       } else {
+                               push @{$sync->{no}}, $input if $sync;
                        }
                        my $devfd = $lei->path_to_fd($input_path) // return;
                        if ($devfd >= 0 || (-f $input_path || -p _)) {
@@ -251,14 +254,15 @@ sub prepare_inputs { # returns undef on error
                                PublicInbox::MboxReader->reads($ifmt) or return
                                        $lei->fail("$ifmt not supported");
                        } elsif (-d $input_path) {
-                               require PublicInbox::MdirReader;
                                $ifmt eq 'maildir' or return
                                        $lei->fail("$ifmt not supported");
-                               $input = $lei->abs_path($input) if $sync;
+                               $sync and $input = 'maildir:'.
+                                               $lei->abs_path($input_path);
+                               push @md, $input;
                        } else {
                                return $lei->fail("Unable to handle $input");
                        }
-               } elsif ($input =~ /\.(eml|patch)\z/i && -f $input) {
+               } elsif ($input =~ /\.(?:eml|patch)\z/i && -f $input) {
                        lc($in_fmt//'eml') eq 'eml' or return $lei->fail(<<"");
 $input is `eml', not --in-format=$in_fmt
 
@@ -269,28 +273,25 @@ $input is `eml', not --in-format=$in_fmt
                        if ($devfd >= 0 || -f $input || -p _) {
                                push @{$sync->{no}}, $input if $sync;
                                push @f, $input;
-                       } elsif (-d $input) {
+                       } elsif (-d "$input/new" && -d "$input/cur") {
                                if ($sync) {
                                        $input = $lei->abs_path($input);
                                        push @{$sync->{ok}}, $input;
                                }
-                               push @d, $input;
+                               push @md, $input;
                        } else {
                                return $lei->fail("Unable to handle $input")
                        }
                }
        }
        if (@f) { check_input_format($lei, \@f) or return }
-       if (@d) { # TODO: check for MH vs Maildir, here
-               require PublicInbox::MdirReader;
-       }
        if ($sync && $sync->{no}) {
                return $lei->fail(<<"") if !$sync->{ok};
---sync specified but no inputs support it
+--mail-sync specified but no inputs support it
 
                # non-fatal if some inputs support support sync
-               $lei->err("# --sync will only be used for @{$sync->{ok}}");
-               $lei->err("# --sync is not supported for: @{$sync->{no}}");
+               $lei->err("# --mail-sync will only be used for @{$sync->{ok}}");
+               $lei->err("# --mail-sync is not supported for: @{$sync->{no}}");
        }
        if ($net) {
                $net->{-can_die} = 1;
@@ -302,6 +303,13 @@ $input is `eml', not --in-format=$in_fmt
                $lei->{auth} //= PublicInbox::LeiAuth->new;
                $lei->{net} //= $net;
        }
+       if (scalar(@md)) {
+               require PublicInbox::MdirReader;
+               if ($self->can('pmdir_cb')) {
+                       require PublicInbox::LeiPmdir;
+                       $self->{pmd} = PublicInbox::LeiPmdir->new($lei, $self);
+               }
+       }
        $self->{inputs} = $inputs;
 }
 
@@ -329,6 +337,13 @@ sub input_only_atfork_child {
        undef;
 }
 
+# alias this as "net_merge_all_done" to use as an LeiAuth callback
+sub input_only_net_merge_all_done {
+       my ($self) = @_;
+       $self->wq_io_do('process_inputs');
+       $self->wq_close(1);
+}
+
 # like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
 # for update_xvmd -> update_vmd
 sub vmd_mod_extract {