X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FIMAP.pm;h=2d0d005e6f8b71239a342669a03645c0600f82ef;hb=3d83cc1dae085b0bc2044cb82aa86e35a8b5172a;hp=4631ea7eabc5e624e501c18ad049fe40c5d345d2;hpb=95efed60fe2d20ee4382c485e7faf58b3fee25af;p=public-inbox.git diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm index 4631ea7e..2d0d005e 100644 --- a/lib/PublicInbox/IMAP.pm +++ b/lib/PublicInbox/IMAP.pm @@ -21,12 +21,18 @@ # as a 50K uint16_t array (via pack("S*", ...)). "UID offset" # is the offset from {uid_base} which determines the start of # the mailbox slice. - +# +# fields: +# imapd: PublicInbox::IMAPD ref +# ibx: PublicInbox::Inbox ref +# long_cb: long_response private data +# uid_base: base UID for mailbox slice (0-based) +# -login_tag: IMAP TAG for LOGIN +# -idle_tag: IMAP response tag for IDLE +# uo2m: UID-to-MSN mapping package PublicInbox::IMAP; use strict; -use base qw(PublicInbox::DS); -use fields qw(imapd ibx long_cb -login_tag - uid_base -idle_tag uo2m); +use parent qw(PublicInbox::DS); use PublicInbox::Eml; use PublicInbox::EmlContentFoo qw(parse_content_disposition); use PublicInbox::DS qw(now); @@ -34,11 +40,7 @@ use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); use PublicInbox::GitAsyncCat; use Text::ParseWords qw(parse_line); use Errno qw(EAGAIN); -use Time::Local qw(timegm); -use POSIX qw(strftime); -use Hash::Util qw(unlock_hash); # dependency of fields for perl 5.10+, anyways -use PublicInbox::Search; -*mdocid = \&PublicInbox::Search::mdocid; +use PublicInbox::IMAPsearchqp; my $Address; for my $mod (qw(Email::Address::XS Mail::Address)) { @@ -97,10 +99,6 @@ undef %FETCH_NEED; my $valid_range = '[0-9]+|[0-9]+:[0-9]+|[0-9]+:\*'; $valid_range = qr/\A(?:$valid_range)(?:,(?:$valid_range))*\z/; -my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); -my %MoY; -@MoY{@MoY} = (0..11); - # RFC 3501 5.4. Autologout Timer needs to be >= 30min $PublicInbox::DS::EXPTIME = 60 * 30; @@ -112,8 +110,7 @@ sub greet ($) { sub new ($$$) { my ($class, $sock, $imapd) = @_; - my $self = fields::new('PublicInbox::IMAP_preauth'); - unlock_hash(%$self); + my $self = bless { imapd => $imapd }, 'PublicInbox::IMAP_preauth'; my $ev = EPOLLIN; my $wbuf; if ($sock->can('accept_SSL') && !$sock->accept_SSL) { @@ -122,7 +119,6 @@ sub new ($$$) { $wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ]; } $self->SUPER::new($sock, $ev | EPOLLONESHOT); - $self->{imapd} = $imapd; if ($wbuf) { $self->{wbuf} = $wbuf; } else { @@ -171,7 +167,7 @@ sub cmd_login ($$$$) { sub cmd_close ($$) { my ($self, $tag) = @_; - delete $self->{uid_base}; + delete @$self{qw(uid_base uo2m)}; delete $self->{ibx} ? "$tag OK Close done\r\n" : "$tag BAD No mailbox\r\n"; } @@ -195,12 +191,10 @@ sub cmd_capability ($$) { '* '.capa($self)."\r\n$tag OK Capability done\r\n"; } -sub cmd_noop ($$) { "$_[1] OK Noop done\r\n" } - # uo2m: UID Offset to MSN, this is an arrayref by default, # but uo2m_hibernate can compact and deduplicate it -sub uo2m_ary_new ($) { - my ($self) = @_; +sub uo2m_ary_new ($;$) { + my ($self, $exists) = @_; my $base = $self->{uid_base}; my $uids = $self->{ibx}->over->uid_range($base + 1, $base + UID_SLICE); @@ -209,6 +203,7 @@ sub uo2m_ary_new ($) { my $msn = 0; ++$base; $tmp[$_ - $base] = ++$msn for @$uids; + $$exists = $msn if $exists; \@tmp; } @@ -225,7 +220,7 @@ sub uo2m_hibernate ($) { sub uo2m_last_uid ($) { my ($self) = @_; - my $uo2m = $self->{uo2m} or die 'BUG: uo2m_last_uid w/o {uo2m}'; + defined(my $uo2m = $self->{uo2m}) or die 'BUG: uo2m_last_uid w/o {uo2m}'; (ref($uo2m) ? @$uo2m : (length($uo2m) >> 1)) + $self->{uid_base}; } @@ -238,7 +233,7 @@ sub uo2m_pack ($) { # extend {uo2m} to account for new messages which arrived since # {uo2m} was created. -sub uo2m_extend ($$) { +sub uo2m_extend ($$;$) { my ($self, $new_uid_max) = @_; defined(my $uo2m = $self->{uo2m}) or return($self->{uo2m} = uo2m_ary_new($self)); @@ -249,21 +244,32 @@ sub uo2m_extend ($$) { my $base = $self->{uid_base}; ++$beg; my $uids = $self->{ibx}->over->uid_range($beg, $base + UID_SLICE); + return $uo2m if !scalar(@$uids); my @tmp; # [$UID_OFFSET] => $MSN + my $write_method = $_[2] // 'msg_more'; if (ref($uo2m)) { my $msn = $uo2m->[-1]; $tmp[$_ - $beg] = ++$msn for @$uids; + $self->$write_method("* $msn EXISTS\r\n"); push @$uo2m, @tmp; $uo2m; } else { my $msn = unpack('S', substr($uo2m, -2, 2)); $tmp[$_ - $beg] = ++$msn for @$uids; + $self->$write_method("* $msn EXISTS\r\n"); $uo2m .= uo2m_pack(\@tmp); my %dedupe = ($uo2m => undef); $self->{uo2m} = (keys %dedupe)[0]; } } +sub cmd_noop ($$) { + my ($self, $tag) = @_; + defined($self->{uid_base}) and + uo2m_extend($self, $self->{uid_base} + UID_SLICE); + \"$tag OK Noop done\r\n"; +} + # the flexible version which works on scalars and array refs. # Must call uo2m_extend before this sub uid2msn ($$) { @@ -300,14 +306,10 @@ sub msn_to_uid_range ($$) { # called by PublicInbox::InboxIdle sub on_inbox_unlock { my ($self, $ibx) = @_; - my $old = uo2m_last_uid($self); my $uid_end = $self->{uid_base} + UID_SLICE; - uo2m_extend($self, $uid_end); + uo2m_extend($self, $uid_end, 'write'); my $new = uo2m_last_uid($self); - if ($new > $old) { - my $msn = uid2msn($self, $new); - $self->write(\"* $msn EXISTS\r\n"); - } elsif ($new == $uid_end) { # max exceeded $uid_end + if ($new == $uid_end) { # max exceeded $uid_end # continue idling w/o inotify my $sock = $self->{sock} or return; $ibx->unsubscribe_unlock(fileno($sock)); @@ -334,14 +336,13 @@ sub cmd_idle ($$) { my ($self, $tag) = @_; # IDLE seems allowed by dovecot w/o a mailbox selected *shrug* my $ibx = $self->{ibx} or return "$tag BAD no mailbox selected\r\n"; - $self->{-idle_tag} = $tag; - my $max = $ibx->over->max; my $uid_end = $self->{uid_base} + UID_SLICE; + uo2m_extend($self, $uid_end); my $sock = $self->{sock} or return; my $fd = fileno($sock); + $self->{-idle_tag} = $tag; # only do inotify on most recent slice - if ($max < $uid_end) { - uo2m_extend($self, $uid_end); + if ($ibx->over->max < $uid_end) { $ibx->subscribe_unlock($fd, $self); $self->{imapd}->idler_start; } @@ -387,43 +388,48 @@ sub ensure_slices_exist ($$$) { push @$l, map { qq[* LIST (\\HasNoChildren) "." $_\r\n] } @created; } -sub inbox_lookup ($$) { - my ($self, $mailbox) = @_; - my ($ibx, $exists, $uidnext, $uid_base); - if ($mailbox =~ /\A(.+)\.([0-9]+)\z/) { - # old mail: inbox.comp.foo.$SLICE_IDX - my $mb_top = $1; - $uid_base = $2 * UID_SLICE; - $ibx = $self->{imapd}->{mailboxes}->{lc $mailbox} or return; - my $max; - ($exists, $uidnext, $max) = $ibx->over->imap_status($uid_base, - $uid_base + UID_SLICE); - ensure_slices_exist($self->{imapd}, $ibx, $max); - } else { # check for dummy inboxes - $mailbox = lc $mailbox; - $ibx = $self->{imapd}->{mailboxes}->{$mailbox} or return; - +sub inbox_lookup ($$;$) { + my ($self, $mailbox, $examine) = @_; + my ($ibx, $exists, $uidmax, $uid_base) = (undef, 0, 0, 0); + $mailbox = lc $mailbox; + $ibx = $self->{imapd}->{mailboxes}->{$mailbox} or return; + my $over = $ibx->over; + if ($over != $ibx) { # not a dummy + $mailbox =~ /\.([0-9]+)\z/ or + die "BUG: unexpected dummy mailbox: $mailbox\n"; + $uid_base = $1 * UID_SLICE; + + # ->num_highwater caches for writers, so use ->meta_accessor + $uidmax = $ibx->mm->meta_accessor('num_highwater') // 0; + if ($examine) { + $self->{uid_base} = $uid_base; + $self->{ibx} = $ibx; + $self->{uo2m} = uo2m_ary_new($self, \$exists); + } else { + $exists = $over->imap_exists; + } + ensure_slices_exist($self->{imapd}, $ibx, $over->max); + } else { + if ($examine) { + $self->{uid_base} = $uid_base; + $self->{ibx} = $ibx; + delete $self->{uo2m}; + } # if "INBOX.foo.bar" is selected and "INBOX.foo.bar.0", # check for new UID ranges (e.g. "INBOX.foo.bar.1") if (my $z = $self->{imapd}->{mailboxes}->{"$mailbox.0"}) { ensure_slices_exist($self->{imapd}, $z, $z->over->max); } - - $uid_base = $exists = 0; - $uidnext = 1; } - ($ibx, $exists, $uidnext, $uid_base); + ($ibx, $exists, $uidmax + 1, $uid_base); } sub cmd_examine ($$$) { my ($self, $tag, $mailbox) = @_; - my ($ibx, $exists, $uidnext, $base) = inbox_lookup($self, $mailbox); - return "$tag NO Mailbox doesn't exist: $mailbox\r\n" if !$ibx; - $self->{uid_base} = $base; - # XXX: do we need this? RFC 5162/7162 my $ret = $self->{ibx} ? "* OK [CLOSED] previous closed\r\n" : ''; - $self->{ibx} = $ibx; + my ($ibx, $exists, $uidnext, $base) = inbox_lookup($self, $mailbox, 1); + return "$tag NO Mailbox doesn't exist: $mailbox\r\n" if !$ibx; $ret .= <cat_async via git_async_cat if (!defined($oid)) { # it's possible to have TOCTOU if an admin runs # public-inbox-(edit|purge), just move onto the next message + warn "E: $smsg->{blob} missing in $self->{ibx}->{inboxdir}\n"; return requeue_once($self); } else { $smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid"; } + my $pre; + if (!$self->{wbuf} && (my $nxt = $msgs->[0])) { + $pre = $self->{ibx}->git->async_prefetch($nxt->{blob}, + \&fetch_blob_cb, $fetch_arg); + } fetch_run_ops($self, $smsg, $bref, $ops, $partial); - requeue_once($self); + $pre ? $self->zflush : requeue_once($self); } sub emit_rfc822 { @@ -1076,16 +1088,6 @@ sub cmd_fetch ($$$$;@) { long_response($self, $cb, $tag, [], $range_info, $ops, $partial); } -sub parse_date ($) { # 02-Oct-1993 - my ($date_text) = @_; - my ($dd, $mon, $yyyy) = split(/-/, $_[0], 3); - defined($yyyy) or return; - my $mm = $MoY{$mon} // return; - $dd =~ /\A[0123]?[0-9]\z/ or return; - $yyyy =~ /\A[0-9]{4,}\z/ or return; # Y10K-compatible! - timegm(0, 0, 0, $dd, $mm, $yyyy); -} - sub msn_convert ($$) { my ($self, $uids) = @_; my $adj = $self->{uid_base} + 1; @@ -1107,142 +1109,15 @@ sub search_uid_range { # long_response 1; # more } -sub date_search { - my ($q, $k, $d) = @_; - my $sql = $q->{sql}; - - # Date: header - if ($k eq 'SENTON') { - my $end = $d + 86399; # no leap day... - my $da = strftime('%Y%m%d%H%M%S', gmtime($d)); - my $db = strftime('%Y%m%d%H%M%S', gmtime($end)); - $q->{xap} .= " dt:$da..$db"; - $$sql .= " AND ds >= $d AND ds <= $end" if defined($sql); - } elsif ($k eq 'SENTBEFORE') { - $q->{xap} .= ' d:..'.strftime('%Y%m%d', gmtime($d)); - $$sql .= " AND ds <= $d" if defined($sql); - } elsif ($k eq 'SENTSINCE') { - $q->{xap} .= ' d:'.strftime('%Y%m%d', gmtime($d)).'..'; - $$sql .= " AND ds >= $d" if defined($sql); - - # INTERNALDATE (Received) - } elsif ($k eq 'ON') { - my $end = $d + 86399; # no leap day... - $q->{xap} .= " ts:$d..$end"; - $$sql .= " AND ts >= $d AND ts <= $end" if defined($sql); - } elsif ($k eq 'BEFORE') { - $q->{xap} .= " ts:..$d"; - $$sql .= " AND ts <= $d" if defined($sql); - } elsif ($k eq 'SINCE') { - $q->{xap} .= " ts:$d.."; - $$sql .= " AND ts >= $d" if defined($sql); - } else { - die "BUG: $k not recognized"; - } -} - -# IMAP to Xapian search key mapping -my %I2X = ( - SUBJECT => 's:', - BODY => 'b:', - FROM => 'f:', - TEXT => '', # n.b. does not include all headers - TO => 't:', - CC => 'c:', - # BCC => 'bcc:', # TODO - # KEYWORD # TODO ? dfpre,dfpost,... -); - -# IMAP allows searching arbitrary headers via "HEADER $HDR_NAME $HDR_VAL" -# which gets silly expensive. We only allow the headers we already index. -my %H2X = (%I2X, 'MESSAGE-ID' => 'm:', 'LIST-ID' => 'l:'); - -sub xap_append ($$$$) { - my ($q, $rest, $k, $xk) = @_; - delete $q->{sql}; # can't use over.sqlite3 - defined(my $arg = shift @$rest) or return "BAD $k no arg"; - - # AFAIK Xapian can't handle [*"] in probabilistic terms - $arg =~ tr/*"//d; - ${$q->{xap}} .= qq[ $xk"$arg"]; - undef; -} - -sub parse_query { - my ($self, $rest) = @_; - if (uc($rest->[0]) eq 'CHARSET') { - shift @$rest; - defined(my $c = shift @$rest) or return 'BAD missing charset'; - $c =~ /\A(?:UTF-8|US-ASCII)\z/ or return 'NO [BADCHARSET]'; +sub parse_query ($$) { + my ($self, $query) = @_; + my $q = PublicInbox::IMAPsearchqp::parse($self, $query); + if (ref($q)) { + my $max = $self->{ibx}->over->max; + my $beg = 1; + uid_clamp($self, \$beg, \$max); + $q->{range_info} = [ $beg, $max ]; } - - my $sql = ''; # date conditions, {sql} deleted if Xapian is needed - my $xap = ''; - my $q = { sql => \$sql, xap => \$xap }; - my $msn2uid; - while (@$rest) { - my $k = uc(shift @$rest); - # default criteria - next if $k =~ /\A(?:ALL|RECENT|UNSEEN|NEW)\z/; - next if $k eq 'AND'; # the default, until we support OR - if ($k =~ $valid_range) { # convert sequence numbers to UIDs - msn_to_uid_range($msn2uid //= msn2uid($self), $k); - push @{$q->{uid}}, $k; - } elsif ($k eq 'UID') { - $k = shift(@$rest) // ''; - $k =~ $valid_range or return 'BAD UID range'; - push @{$q->{uid}}, $k; - } elsif ($k =~ /\A(?:SENT)?(?:SINCE|ON|BEFORE)\z/) { - my $d = parse_date(shift(@$rest) // ''); - defined $d or return "BAD $k date format"; - date_search($q, $k, $d); - } elsif ($k =~ /\A(?:SMALLER|LARGER)\z/) { - delete $q->{sql}; # can't use over.sqlite3 - my $bytes = shift(@$rest) // ''; - $bytes =~ /\A[0-9]+\z/ or return "BAD $k not a number"; - $xap .= ' bytes:' . ($k eq 'SMALLER' ? - '..'.(--$bytes) : - (++$bytes).'..'); - } elsif ($k eq 'HEADER') { - $k = uc(shift(@$rest) // ''); - my $xk = $H2X{$k} or - return "BAD HEADER $k not supported"; - my $err = xap_append($q, $rest, $k, $xk); - return $err if $err; - } elsif (defined(my $xk = $I2X{$k})) { - my $err = xap_append($q, $rest, $k, $xk); - return $err if $err; - } else { - # TODO: parentheses, OR, NOT ... - return "BAD $k not supported (yet?)"; - } - } - - # favor using over.sqlite3 if possible, since Xapian is optional - if (exists $q->{sql}) { - delete($q->{xap}); - delete($q->{sql}) if $sql eq ''; - } elsif (!$self->{ibx}->search) { - return 'BAD Xapian not configured for mailbox'; - } - my $max = $self->{ibx}->over->max; - if (my $uid = delete $q->{uid}) { - my $range_csv = join(',', @$uid); - do { - my $nxt = range_step($self, \$range_csv); - my ($beg, $end) = @$nxt; - if ($xap) { - $xap .= " uid:$beg..$end"; - } elsif ($beg == $end) { - $sql .= " AND num = $beg"; - } else { - $sql .= " AND num >= $beg AND num <= $end"; - } - } while ($range_csv); - } - my $beg = 1; - uid_clamp($self, \$beg, \$max); - $q->{range_info} = [ $beg, $max ]; $q; } @@ -1251,17 +1126,13 @@ sub refill_xap ($$$$) { my ($beg, $end) = @$range_info; my $srch = $self->{ibx}->search; my $opt = { mset => 2, limit => 1000 }; - my $nshard = $srch->{nshard} // 1; - while (1) { - my $mset = $srch->query("$$q uid:$beg..$end", $opt); - @$uids = map { mdocid($nshard, $_) } $mset->items; - if (@$uids) { - $range_info->[0] = $uids->[-1] + 1; # update $beg - return; - } else { # all done - return 0; - } + my $mset = $srch->mset("$q uid:$beg..$end", $opt); + @$uids = @{$srch->mset_to_artnums($mset)}; + if (@$uids) { + $range_info->[0] = $uids->[-1] + 1; # update $beg + return; # possibly more } + 0; # all done } sub search_xap_range { # long_response @@ -1278,9 +1149,9 @@ sub search_xap_range { # long_response } sub search_common { - my ($self, $tag, $rest, $want_msn) = @_; + my ($self, $tag, $query, $want_msn) = @_; my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n"; - my $q = parse_query($self, $rest); + my $q = parse_query($self, $query); return "$tag $q\r\n" if !ref($q); my ($sql, $range_info) = delete @$q{qw(sql range_info)}; if (!scalar(keys %$q)) { # overview.sqlite3 @@ -1288,6 +1159,8 @@ sub search_common { long_response($self, \&search_uid_range, $tag, $sql, $range_info, $want_msn); } elsif ($q = $q->{xap}) { + $self->{ibx}->search or + return "$tag BAD search not available for mailbox\r\n"; $self->msg_more('* SEARCH'); long_response($self, \&search_xap_range, $tag, $q, $range_info, $want_msn); @@ -1296,14 +1169,14 @@ sub search_common { } } -sub cmd_uid_search ($$$;) { - my ($self, $tag) = splice(@_, 0, 2); - search_common($self, $tag, \@_); +sub cmd_uid_search ($$$) { + my ($self, $tag, $query) = @_; + search_common($self, $tag, $query); } sub cmd_search ($$$;) { - my ($self, $tag) = splice(@_, 0, 2); - search_common($self, $tag, \@_, 1); + my ($self, $tag, $query) = @_; + search_common($self, $tag, $query, 1); } sub args_ok ($$) { # duplicated from PublicInbox::NNTP @@ -1321,6 +1194,7 @@ sub process_line ($$) { # TODO: IMAP allows literals for big requests to upload messages # (which we don't support) but maybe some big search queries use it. + # RFC 3501 9 (2) doesn't permit TAB or multiple SP my ($tag, $req, @args) = parse_line('[ \t]+', 0, $l); pop(@args) if (@args && !defined($args[-1])); if (@args && uc($req) eq 'UID') { @@ -1332,6 +1206,10 @@ sub process_line ($$) { idle_done($self, $tag) : "$idle_tag BAD expected DONE\r\n"; } elsif (my $cmd = $self->can('cmd_'.lc($req // ''))) { + if ($cmd == \&cmd_uid_search || $cmd == \&cmd_search) { + # preserve user-supplied quotes for search + (undef, @args) = split(/ search /i, $l, 2); + } $cmd->($self, $tag, @args); } else { # this is weird auth_challenge_ok($self) // @@ -1370,7 +1248,7 @@ sub long_step { } elsif ($more) { # $self->{wbuf}: $self->update_idle_time; - # control passed to $more may be a GitAsyncCat object + # control passed to git_async_cat if $more == \undef requeue_once($self) if !ref($more); } else { # all done! delete $self->{long_cb};