X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FInboxWritable.pm;h=4980904541af7f245c09787d6bea59f8082d64c6;hb=0d38f65c490466837ae091afa7a7b6f59d04ce7c;hp=0a976ea2dc3dfe6278747d54a88a2ef8ba19dcad;hpb=70207d974c5a965ef849b58c27b63fd644b3293e;p=public-inbox.git diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 0a976ea2..49809045 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -1,44 +1,108 @@ -# Copyright (C) 2018 all contributors +# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ # Extends read-only Inbox for writing package PublicInbox::InboxWritable; use strict; -use warnings; -use base qw(PublicInbox::Inbox); +use v5.10.1; +use parent qw(PublicInbox::Inbox Exporter); use PublicInbox::Import; +use PublicInbox::Filter::Base qw(REJECT); +use Errno qw(ENOENT); +our @EXPORT_OK = qw(eml_from_path warn_ignore_cb); + +use constant { + PERM_UMASK => 0, + OLD_PERM_GROUP => 1, + OLD_PERM_EVERYBODY => 2, + PERM_GROUP => 0660, + PERM_EVERYBODY => 0664, +}; sub new { - my ($class, $ibx) = @_; - bless $ibx, $class; + my ($class, $ibx, $creat_opt) = @_; + return $ibx if ref($ibx) eq $class; + my $self = bless $ibx, $class; + + # TODO: maybe stop supporting this + if ($creat_opt) { # for { nproc => $N } + $self->{-creat_opt} = $creat_opt; + init_inbox($self) if $self->version == 1; + } + $self; +} + +sub assert_usable_dir { + my ($self) = @_; + my $dir = $self->{inboxdir}; + return $dir if defined($dir) && $dir ne ''; + die "no inboxdir defined for $self->{name}\n"; +} + +sub _init_v1 { + my ($self, $skip_artnum) = @_; + if (defined($self->{indexlevel}) || defined($skip_artnum)) { + require PublicInbox::SearchIdx; + require PublicInbox::Msgmap; + my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create + $sidx->begin_txn_lazy; + if (defined $skip_artnum) { + my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1); + $mm->{dbh}->begin_work; + $mm->skip_artnum($skip_artnum); + $mm->{dbh}->commit; + } + $sidx->commit_txn_lazy; + } else { + open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or + die "$self->{inboxdir}/ssoma.lock: $!\n"; + } +} + +sub init_inbox { + my ($self, $shards, $skip_epoch, $skip_artnum) = @_; + if ($self->version == 1) { + my $dir = assert_usable_dir($self); + PublicInbox::Import::init_bare($dir); + $self->umask_prepare; + $self->with_umask(\&_init_v1, $self, $skip_artnum); + } else { + my $v2w = importer($self); + $v2w->init_inbox($shards, $skip_epoch, $skip_artnum); + } } sub importer { my ($self, $parallel) = @_; - $self->{-importer} ||= eval { - my $v = $self->{version} || 1; - if ($v == 2) { - eval { require PublicInbox::V2Writable }; - die "v2 not supported: $@\n" if $@; - my $v2w = PublicInbox::V2Writable->new($self); - $v2w->{parallel} = $parallel; - $v2w; - } elsif ($v == 1) { - my $git = $self->git; - my $name = $self->{name}; - my $addr = $self->{-primary_address}; - PublicInbox::Import->new($git, $name, $addr, $self); - } else { - die "unsupported inbox version: $v\n"; - } + my $v = $self->version; + if ($v == 2) { + eval { require PublicInbox::V2Writable }; + die "v2 not supported: $@\n" if $@; + my $opt = $self->{-creat_opt}; + my $v2w = PublicInbox::V2Writable->new($self, $opt); + $v2w->{parallel} = $parallel if defined $parallel; + $v2w; + } elsif ($v == 1) { + my @arg = (undef, undef, undef, $self); + PublicInbox::Import->new(@arg); + } else { + $! = 78; # EX_CONFIG 5.3.5 local configuration error + die "unsupported inbox version: $v\n"; } } sub filter { - my ($self) = @_; + my ($self, $im) = @_; my $f = $self->{filter}; if ($f && $f =~ /::/) { - my @args = (-inbox => $self); + # v2 keeps msgmap open, which causes conflicts for filters + # such as PublicInbox::Filter::RubyLang which overload msgmap + # for a predictable serial number. + if ($im && $self->version >= 2 && $self->{altid}) { + $im->done; + } + + my @args = (ibx => $self); # basic line splitting, only # Perhaps we can have proper quote splitting one day... ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/; @@ -54,4 +118,205 @@ sub filter { undef; } +sub is_maildir_basename ($) { + my ($bn) = @_; + return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; + if ($bn =~ /:2,([A-Z]+)\z/i) { + my $flags = $1; + return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail + } + 1; +} + +sub is_maildir_path ($) { + my ($path) = @_; + my @p = split(m!/+!, $path); + (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; +} + +sub eml_from_path ($) { + my ($path) = @_; + if (open my $fh, '<', $path) { + my $str = do { local $/; <$fh> } or return; + PublicInbox::Eml->new(\$str); + } else { # ENOENT is common with Maildir + warn "failed to open $path: $!\n" if $! != ENOENT; + undef; + } +} + +sub import_maildir { + my ($self, $dir) = @_; + my $im = $self->importer(1); + + foreach my $sub (qw(cur new tmp)) { + -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; + } + foreach my $sub (qw(cur new)) { + opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; + while (defined(my $fn = readdir($dh))) { + next unless is_maildir_basename($fn); + my $mime = eml_from_path("$dir/$fn") or next; + + if (my $filter = $self->filter($im)) { + my $ret = $filter->scrub($mime) or return; + return if $ret == REJECT(); + $mime = $ret; + } + $im->add($mime); + } + } + $im->done; +} + +# asctime: From example@example.com Fri Jun 23 02:56:55 2000 +my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; + +sub mb_add ($$$$) { + my ($im, $variant, $filter, $msg) = @_; + $$msg =~ s/(\r?\n)+\z/$1/s; + if ($variant eq 'mboxrd') { + $$msg =~ s/^>(>*From )/$1/gms; + } elsif ($variant eq 'mboxo') { + $$msg =~ s/^>From /From /gms; + } + my $mime = PublicInbox::Eml->new($msg); + if ($filter) { + my $ret = $filter->scrub($mime) or return; + return if $ret == REJECT(); + $mime = $ret; + } + $im->add($mime) +} + +sub import_mbox { + my ($self, $fh, $variant) = @_; + if ($variant !~ /\A(?:mboxrd|mboxo)\z/) { + die "variant must be 'mboxrd' or 'mboxo'\n"; + } + my $im = $self->importer(1); + my $prev = undef; + my $msg = ''; + my $filter = $self->filter; + while (defined(my $l = <$fh>)) { + if ($l =~ /$from_strict/o) { + if (!defined($prev) || $prev =~ /^\r?$/) { + mb_add($im, $variant, $filter, \$msg) if $msg; + $msg = ''; + $prev = $l; + next; + } + warn "W[$.] $l\n"; + } + $prev = $l; + $msg .= $l; + } + mb_add($im, $variant, $filter, \$msg) if $msg; + $im->done; +} + +sub _read_git_config_perm { + my ($self) = @_; + chomp(my $perm = $self->git->qx('config', 'core.sharedRepository')); + $perm; +} + +sub _git_config_perm { + my $self = shift; + my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self); + return PERM_UMASK if (!defined($perm) || $perm eq ''); + return PERM_UMASK if ($perm eq 'umask'); + return PERM_GROUP if ($perm eq 'group'); + if ($perm =~ /\A(?:all|world|everybody)\z/) { + return PERM_EVERYBODY; + } + return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/); + return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/); + + my $i = oct($perm); + return PERM_UMASK if ($i == PERM_UMASK); + return PERM_GROUP if ($i == OLD_PERM_GROUP); + return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY); + + if (($i & 0600) != 0600) { + die "core.sharedRepository mode invalid: ". + sprintf('%.3o', $i) . "\nOwner must have permissions\n"; + } + ($i & 0666); +} + +sub _umask_for { + my ($perm) = @_; # _git_config_perm return value + my $rv = $perm; + return umask if $rv == 0; + + # set +x bit if +r or +w were set + $rv |= 0100 if ($rv & 0600); + $rv |= 0010 if ($rv & 0060); + $rv |= 0001 if ($rv & 0006); + (~$rv & 0777); +} + +sub with_umask { + my ($self, $cb, @arg) = @_; + my $old = umask $self->{umask}; + my $rv = eval { $cb->(@arg) }; + my $err = $@; + umask $old; + die $err if $err; + $rv; +} + +sub umask_prepare { + my ($self) = @_; + my $perm = _git_config_perm($self); + my $umask = _umask_for($perm); + $self->{umask} = $umask; +} + +sub cleanup ($) { + delete @{$_[0]}{qw(over mm git search)}; +} + +# warnings to ignore when handling spam mailboxes and maybe other places +sub warn_ignore { + my $s = "@_"; + # Email::Address::XS warnings + $s =~ /^Argument contains empty address at / + || $s =~ /^Element at index [0-9]+ contains / + # PublicInbox::MsgTime + || $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/ + || $s =~ /^bad Date: .+? in / +} + +# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..." +sub warn_ignore_cb { + my $cb = $SIG{__WARN__} // sub { print STDERR @_ }; + sub { + return if warn_ignore(@_); + $cb->(@_); + } +} + +# v2+ only +sub git_dir_n { "$_[0]->{inboxdir}/git/$_[1].git" } + +# v2+ only +sub git_dir_latest { + my ($self, $max) = @_; + $$max = -1; + my $pfx = "$self->{inboxdir}/git"; + return unless -d $pfx; + my $latest; + opendir my $dh, $pfx or die "opendir $pfx: $!\n"; + while (defined(my $git_dir = readdir($dh))) { + $git_dir =~ m!\A([0-9]+)\.git\z! or next; + if ($1 > $$max) { + $$max = $1; + $latest = "$pfx/$git_dir"; + } + } + $latest; +} + 1;