X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FInboxWritable.pm;h=17dfbe18500a6dfa00e87f50209baec4126f61c3;hb=23af251dd607c4e75ab1e68063f2c885c48cc035;hp=174e4245ab08157714b05f6e19377018023a1fc9;hpb=fece7fca6aeac74410a813cffcb0da338017d0ed;p=public-inbox.git diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 174e4245..17dfbe18 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -1,14 +1,16 @@ -# Copyright (C) 2018-2019 all contributors +# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ # Extends read-only Inbox for writing package PublicInbox::InboxWritable; use strict; -use warnings; -use base qw(PublicInbox::Inbox); +use v5.10.1; +use parent qw(PublicInbox::Inbox Exporter); use PublicInbox::Import; -use PublicInbox::Filter::Base; -*REJECT = *PublicInbox::Filter::Base::REJECT; +use PublicInbox::Filter::Base qw(REJECT); +use Errno qw(ENOENT); +our @EXPORT_OK = qw(eml_from_path); +use Fcntl qw(O_RDONLY O_NONBLOCK); use constant { PERM_UMASK => 0, @@ -20,23 +22,51 @@ use constant { sub new { my ($class, $ibx, $creat_opt) = @_; + return $ibx if ref($ibx) eq $class; my $self = bless $ibx, $class; # TODO: maybe stop supporting this if ($creat_opt) { # for { nproc => $N } $self->{-creat_opt} = $creat_opt; - init_inbox($self) if ($self->{version} || 1) == 1; + init_inbox($self) if $self->version == 1; } $self; } +sub assert_usable_dir { + my ($self) = @_; + my $dir = $self->{inboxdir}; + return $dir if defined($dir) && $dir ne ''; + die "no inboxdir defined for $self->{name}\n"; +} + +sub _init_v1 { + my ($self, $skip_artnum) = @_; + if (defined($self->{indexlevel}) || defined($skip_artnum)) { + require PublicInbox::SearchIdx; + require PublicInbox::Msgmap; + my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create + $sidx->begin_txn_lazy; + my $mm = PublicInbox::Msgmap->new_file($self, 1); + if (defined $skip_artnum) { + $mm->{dbh}->begin_work; + $mm->skip_artnum($skip_artnum); + $mm->{dbh}->commit; + } + undef $mm; # ->created_at set + $sidx->commit_txn_lazy; + } else { + open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or + die "$self->{inboxdir}/ssoma.lock: $!\n"; + } +} + sub init_inbox { my ($self, $shards, $skip_epoch, $skip_artnum) = @_; - # TODO: honor skip_artnum - my $v = $self->{version} || 1; - if ($v == 1) { - my $dir = $self->{inboxdir} or die "no inboxdir in inbox\n"; + if ($self->version == 1) { + my $dir = assert_usable_dir($self); PublicInbox::Import::init_bare($dir); + $self->with_umask(\&_init_v1, $self, $skip_artnum); } else { my $v2w = importer($self); $v2w->init_inbox($shards, $skip_epoch, $skip_artnum); @@ -45,22 +75,20 @@ sub init_inbox { sub importer { my ($self, $parallel) = @_; - $self->{-importer} ||= do { - my $v = $self->{version} || 1; - if ($v == 2) { - eval { require PublicInbox::V2Writable }; - die "v2 not supported: $@\n" if $@; - my $opt = $self->{-creat_opt}; - my $v2w = PublicInbox::V2Writable->new($self, $opt); - $v2w->{parallel} = $parallel; - $v2w; - } elsif ($v == 1) { - my @arg = (undef, undef, undef, $self); - PublicInbox::Import->new(@arg); - } else { - $! = 78; # EX_CONFIG 5.3.5 local configuration error - die "unsupported inbox version: $v\n"; - } + my $v = $self->version; + if ($v == 2) { + eval { require PublicInbox::V2Writable }; + die "v2 not supported: $@\n" if $@; + my $opt = $self->{-creat_opt}; + my $v2w = PublicInbox::V2Writable->new($self, $opt); + $v2w->{parallel} = $parallel if defined $parallel; + $v2w; + } elsif ($v == 1) { + my @arg = (undef, undef, undef, $self); + PublicInbox::Import->new(@arg); + } else { + $! = 78; # EX_CONFIG 5.3.5 local configuration error + die "unsupported inbox version: $v\n"; } } @@ -71,11 +99,11 @@ sub filter { # v2 keeps msgmap open, which causes conflicts for filters # such as PublicInbox::Filter::RubyLang which overload msgmap # for a predictable serial number. - if ($im && ($self->{version} || 1) >= 2 && $self->{altid}) { + if ($im && $self->version >= 2 && $self->{altid}) { $im->done; } - my @args = (-inbox => $self); + my @args = (ibx => $self); # basic line splitting, only # Perhaps we can have proper quote splitting one day... ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/; @@ -91,105 +119,60 @@ sub filter { undef; } -sub is_maildir_basename ($) { - my ($bn) = @_; - return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; - if ($bn =~ /:2,([A-Z]+)\z/i) { - my $flags = $1; - return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail - } - 1; -} - -sub is_maildir_path ($) { +sub eml_from_path ($) { my ($path) = @_; - my @p = split(m!/+!, $path); - (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; + if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) { + return unless -f $fh; # no FIFOs or directories + my $str = do { local $/; <$fh> } or return; + PublicInbox::Eml->new(\$str); + } else { # ENOENT is common with Maildir + warn "failed to open $path: $!\n" if $! != ENOENT; + undef; + } } -sub maildir_path_load ($) { - my ($path) = @_; - if (open my $fh, '<', $path) { - local $/; - my $str = <$fh>; - $str or return; - return PublicInbox::MIME->new(\$str); - } elsif ($!{ENOENT}) { - # common with Maildir - return; - } else { - warn "failed to open $path: $!\n"; - return; +sub _each_maildir_eml { + my ($fn, $kw, $eml, $im, $self) = @_; + return if grep(/\Adraft\z/, @$kw); + if ($self && (my $filter = $self->filter($im))) { + my $ret = $filter->scrub($eml) or return; + return if $ret == REJECT(); + $eml = $ret; } + $im->add($eml); } +# XXX does anybody use this? sub import_maildir { my ($self, $dir) = @_; - my $im = $self->importer(1); - foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } - foreach my $sub (qw(cur new)) { - opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; - while (defined(my $fn = readdir($dh))) { - next unless is_maildir_basename($fn); - my $mime = maildir_file_load("$dir/$fn") or next; - - if (my $filter = $self->filter($im)) { - my $ret = $filter->scrub($mime) or return; - return if $ret == REJECT(); - $mime = $ret; - } - $im->add($mime); - } - } + my $im = $self->importer(1); + my @self = $self->filter($im) ? ($self) : (); + require PublicInbox::MdirReader; + PublicInbox::MdirReader->new->maildir_each_eml($dir, + \&_each_maildir_eml, $im, @self); $im->done; } -# asctime: From example@example.com Fri Jun 23 02:56:55 2000 -my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; - -sub mb_add ($$$$) { - my ($im, $variant, $filter, $msg) = @_; - $$msg =~ s/(\r?\n)+\z/$1/s; - my $mime = PublicInbox::MIME->new($msg); - if ($variant eq 'mboxrd') { - $$msg =~ s/^>(>*From )/$1/sm; - } elsif ($variant eq 'mboxo') { - $$msg =~ s/^>From /From /sm; - } +sub _mbox_eml_cb { # MboxReader->mbox* callback + my ($eml, $im, $filter) = @_; if ($filter) { - my $ret = $filter->scrub($mime) or return; + my $ret = $filter->scrub($eml) or return; return if $ret == REJECT(); - $mime = $ret; + $eml = $ret; } - $im->add($mime) + $im->add($eml); } sub import_mbox { my ($self, $fh, $variant) = @_; - if ($variant !~ /\A(?:mboxrd|mboxo)\z/) { - die "variant must be 'mboxrd' or 'mboxo'\n"; - } + require PublicInbox::MboxReader; + my $cb = PublicInbox::MboxReader->reads($variant) or + die "$variant not supported\n"; my $im = $self->importer(1); - my $prev = undef; - my $msg = ''; - my $filter = $self->filter; - while (defined(my $l = <$fh>)) { - if ($l =~ /$from_strict/o) { - if (!defined($prev) || $prev =~ /^\r?$/) { - mb_add($im, $variant, $filter, \$msg) if $msg; - $msg = ''; - $prev = $l; - next; - } - warn "W[$.] $l\n"; - } - $prev = $l; - $msg .= $l; - } - mb_add($im, $variant, $filter, \$msg) if $msg; + $cb->(undef, $fh, \&_mbox_eml_cb, $im, $self->filter); $im->done; } @@ -236,9 +219,9 @@ sub _umask_for { } sub with_umask { - my ($self, $cb) = @_; - my $old = umask $self->{umask}; - my $rv = eval { $cb->() }; + my ($self, $cb, @arg) = @_; + my $old = umask($self->{umask} //= umask_prepare($self)); + my $rv = eval { $cb->(@arg) }; my $err = $@; umask $old; die $err if $err; @@ -248,8 +231,18 @@ sub with_umask { sub umask_prepare { my ($self) = @_; my $perm = _git_config_perm($self); - my $umask = _umask_for($perm); - $self->{umask} = $umask; + _umask_for($perm); +} + +sub cleanup ($) { + delete @{$_[0]}{qw(over mm git search)}; +} + +# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove +sub git_dir_latest { + my ($self, $max) = @_; + defined($$max = $self->max_git_epoch) ? + "$self->{inboxdir}/git/$$max.git" : undef; } 1;