X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=blobdiff_plain;f=lib%2FPublicInbox%2FIMAP.pm;h=ce0dce0f317e8fb40a2a0b9da160594cafeb4fcf;hp=fe3c7d1a85bba20174642fb9399dce902bcdf0f8;hb=d07ba9c30800225052d17ccca458afbfa05a8ff0;hpb=4aa27d61cbf10fdad098b20ae4bcef8f4893e531 diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm index fe3c7d1a..ce0dce0f 100644 --- a/lib/PublicInbox/IMAP.pm +++ b/lib/PublicInbox/IMAP.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2020 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # # Each instance of this represents an IMAP client connected to @@ -7,23 +7,39 @@ # slow storage. # # data notes: -# * NNTP article numbers are UIDs - +# +# * NNTP article numbers are UIDs, mm->created_at is UIDVALIDITY +# +# * public-inboxes are sliced into mailboxes of 50K messages +# to not overload MUAs: $NEWSGROUP_NAME.$SLICE_INDEX +# Slices are similar in concept to v2 "epochs". Epochs +# are for the limitations of git clients, while slices are +# for the limitations of IMAP clients. +# +# * We also take advantage of slices being only 50K to store +# "UID offset" to message sequence number (MSN) mapping +# as a 50K uint16_t array (via pack("S*", ...)). "UID offset" +# is the offset from {uid_base} which determines the start of +# the mailbox slice. +# +# fields: +# imapd: PublicInbox::IMAPD ref +# ibx: PublicInbox::Inbox ref +# long_cb: long_response private data +# uid_base: base UID for mailbox slice (0-based) +# -login_tag: IMAP TAG for LOGIN +# -idle_tag: IMAP response tag for IDLE +# uo2m: UID-to-MSN mapping package PublicInbox::IMAP; use strict; -use base qw(PublicInbox::DS); -use fields qw(imapd ibx long_cb -login_tag - uid_base -idle_tag -idle_max); +use parent qw(PublicInbox::DS); use PublicInbox::Eml; use PublicInbox::EmlContentFoo qw(parse_content_disposition); use PublicInbox::DS qw(now); -use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); use PublicInbox::GitAsyncCat; use Text::ParseWords qw(parse_line); use Errno qw(EAGAIN); -use Time::Local qw(timegm); -use POSIX qw(strftime); -use Hash::Util qw(unlock_hash); # dependency of fields for perl 5.10+, anyways +use PublicInbox::IMAPsearchqp; my $Address; for my $mod (qw(Email::Address::XS Mail::Address)) { @@ -34,8 +50,10 @@ die "neither Email::Address::XS nor Mail::Address loaded: $@" if !$Address; sub LINE_MAX () { 8000 } # RFC 2683 3.2.1.5 -# changing this will cause grief for clients which cache -sub UID_BLOCK () { 50_000 } +# Changing UID_SLICE will cause grief for clients which cache. +# This also needs to be <64K: we pack it into a uint16_t +# for long_response UID (offset) => MSN mappings +sub UID_SLICE () { 50_000 } # these values area also used for sorting sub NEED_SMSG () { 1 } @@ -80,39 +98,15 @@ undef %FETCH_NEED; my $valid_range = '[0-9]+|[0-9]+:[0-9]+|[0-9]+:\*'; $valid_range = qr/\A(?:$valid_range)(?:,(?:$valid_range))*\z/; -my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); -my %MoY; -@MoY{@MoY} = (0..11); - -# RFC 3501 5.4. Autologout Timer needs to be >= 30min -$PublicInbox::DS::EXPTIME = 60 * 30; - -sub greet ($) { +sub do_greet { my ($self) = @_; my $capa = capa($self); $self->write(\"* OK [$capa] public-inbox-imapd ready\r\n"); } -sub new ($$$) { - my ($class, $sock, $imapd) = @_; - my $self = fields::new('PublicInbox::IMAP_preauth'); - unlock_hash(%$self); - my $ev = EPOLLIN; - my $wbuf; - if ($sock->can('accept_SSL') && !$sock->accept_SSL) { - return CORE::close($sock) if $! != EAGAIN; - $ev = PublicInbox::TLS::epollbit(); - $wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ]; - } - $self->SUPER::new($sock, $ev | EPOLLONESHOT); - $self->{imapd} = $imapd; - if ($wbuf) { - $self->{wbuf} = $wbuf; - } else { - greet($self); - } - $self->update_idle_time; - $self; +sub new { + my (undef, $sock, $imapd) = @_; + (bless { imapd => $imapd }, 'PublicInbox::IMAP_preauth')->greet($sock) } sub logged_in { 1 } @@ -154,7 +148,7 @@ sub cmd_login ($$$$) { sub cmd_close ($$) { my ($self, $tag) = @_; - delete $self->{uid_base}; + delete @$self{qw(uid_base uo2m)}; delete $self->{ibx} ? "$tag OK Close done\r\n" : "$tag BAD No mailbox\r\n"; } @@ -178,75 +172,173 @@ sub cmd_capability ($$) { '* '.capa($self)."\r\n$tag OK Capability done\r\n"; } -sub cmd_noop ($$) { "$_[1] OK Noop done\r\n" } +# uo2m: UID Offset to MSN, this is an arrayref by default, +# but uo2m_hibernate can compact and deduplicate it +sub uo2m_ary_new ($;$) { + my ($self, $exists) = @_; + my $ub = $self->{uid_base}; + my $uids = $self->{ibx}->over(1)->uid_range($ub + 1, $ub + UID_SLICE); + + # convert UIDs to offsets from {base} + my @tmp; # [$UID_OFFSET] => $MSN + my $msn = 0; + ++$ub; + $tmp[$_ - $ub] = ++$msn for @$uids; + $$exists = $msn if $exists; + \@tmp; +} + +# changes UID-offset-to-MSN mapping into a deduplicated scalar: +# uint16_t uo2m[UID_SLICE]. +# May be swapped out for idle clients if THP is disabled. +sub uo2m_hibernate ($) { + my ($self) = @_; + ref(my $uo2m = $self->{uo2m}) or return; + my %dedupe = ( uo2m_pack($uo2m) => undef ); + $self->{uo2m} = (keys(%dedupe))[0]; + undef; +} + +sub uo2m_last_uid ($) { + my ($self) = @_; + defined(my $uo2m = $self->{uo2m}) or die 'BUG: uo2m_last_uid w/o {uo2m}'; + (ref($uo2m) ? @$uo2m : (length($uo2m) >> 1)) + $self->{uid_base}; +} + +sub uo2m_pack ($) { + # $_[0] is an arrayref of MSNs, it may have undef gaps if there + # are gaps in the corresponding UIDs: [ msn1, msn2, undef, msn3 ] + no warnings 'uninitialized'; + pack('S*', @{$_[0]}); +} + +# extend {uo2m} to account for new messages which arrived since +# {uo2m} was created. +sub uo2m_extend ($$;$) { + my ($self, $new_uid_max) = @_; + defined(my $uo2m = $self->{uo2m}) or + return($self->{uo2m} = uo2m_ary_new($self)); + my $beg = uo2m_last_uid($self); # last UID we've learned + return $uo2m if $beg >= $new_uid_max; # fast path + + # need to extend the current range: + my $base = $self->{uid_base}; + ++$beg; + my $uids = $self->{ibx}->over(1)->uid_range($beg, $base + UID_SLICE); + return $uo2m if !scalar(@$uids); + my @tmp; # [$UID_OFFSET] => $MSN + my $write_method = $_[2] // 'msg_more'; + if (ref($uo2m)) { + my $msn = $uo2m->[-1]; + $tmp[$_ - $beg] = ++$msn for @$uids; + $self->$write_method("* $msn EXISTS\r\n"); + push @$uo2m, @tmp; + $uo2m; + } else { + my $msn = unpack('S', substr($uo2m, -2, 2)); + $tmp[$_ - $beg] = ++$msn for @$uids; + $self->$write_method("* $msn EXISTS\r\n"); + $uo2m .= uo2m_pack(\@tmp); + my %dedupe = ($uo2m => undef); + $self->{uo2m} = (keys %dedupe)[0]; + } +} + +sub cmd_noop ($$) { + my ($self, $tag) = @_; + defined($self->{uid_base}) and + uo2m_extend($self, $self->{uid_base} + UID_SLICE); + \"$tag OK Noop done\r\n"; +} + +# the flexible version which works on scalars and array refs. +# Must call uo2m_extend before this +sub uid2msn ($$) { + my ($self, $uid) = @_; + my $uo2m = $self->{uo2m}; + my $off = $uid - $self->{uid_base} - 1; + ref($uo2m) ? $uo2m->[$off] : unpack('S', substr($uo2m, $off << 1, 2)); +} + +# returns an arrayref of UIDs, so MSNs can be translated to UIDs via: +# $msn2uid->[$MSN-1] => $UID. The result of this is always ephemeral +# and does not live beyond the event loop. +sub msn2uid ($) { + my ($self) = @_; + my $base = $self->{uid_base}; + my $uo2m = uo2m_extend($self, $base + UID_SLICE); + $uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m); + + my $uo = 0; + my @msn2uid; + for my $msn (@$uo2m) { + ++$uo; + $msn2uid[$msn - 1] = $uo + $base if $msn; + } + \@msn2uid; +} + +# converts a set of message sequence numbers in requests to UIDs: +sub msn_to_uid_range ($$) { + my $msn2uid = $_[0]; + $_[1] =~ s!([0-9]+)!$msn2uid->[$1 - 1] // ($msn2uid->[-1] // 0 + 1)!sge; +} # called by PublicInbox::InboxIdle sub on_inbox_unlock { my ($self, $ibx) = @_; - my $new = $ibx->mm->max; - my $uid_base = $self->{uid_base} // 0; - my $uid_end = $uid_base + UID_BLOCK; - defined(my $old = $self->{-idle_max}) or die 'BUG: -idle_max unset'; - $new = $uid_end if $new > $uid_end; - if ($new > $old) { - $self->{-idle_max} = $new; - $new -= $uid_base; - $old -= $uid_base; - $self->msg_more("* $_ EXISTS\r\n") for (($old + 1)..($new - 1)); - $self->write(\"* $new EXISTS\r\n"); - } elsif ($new == $uid_end) { # max exceeded $uid_end + my $uid_end = $self->{uid_base} + UID_SLICE; + uo2m_extend($self, $uid_end, 'write'); + my $new = uo2m_last_uid($self); + if ($new == $uid_end) { # max exceeded $uid_end # continue idling w/o inotify - delete $self->{-idle_max}; my $sock = $self->{sock} or return; $ibx->unsubscribe_unlock(fileno($sock)); } } -# called every X minute(s) or so by PublicInbox::DS::later -my $IDLERS = {}; -my $idle_timer; +# called every minute or so by PublicInbox::DS::later +my $IDLERS; # fileno($obj->{sock}) => PublicInbox::IMAP sub idle_tick_all { my $old = $IDLERS; - $IDLERS = {}; + $IDLERS = undef; for my $i (values %$old) { next if ($i->{wbuf} || !exists($i->{-idle_tag})); - $i->update_idle_time or next; $IDLERS->{fileno($i->{sock})} = $i; $i->write(\"* OK Still here\r\n"); } - $idle_timer = scalar keys %$IDLERS ? - PublicInbox::DS::later(\&idle_tick_all) : undef; + $IDLERS and + PublicInbox::DS::add_uniq_timer('idle', 60, \&idle_tick_all); } sub cmd_idle ($$) { my ($self, $tag) = @_; # IDLE seems allowed by dovecot w/o a mailbox selected *shrug* my $ibx = $self->{ibx} or return "$tag BAD no mailbox selected\r\n"; - $self->{-idle_tag} = $tag; - my $max = $ibx->mm->max // 0; - my $uid_end = $self->{uid_base} + UID_BLOCK; + my $uid_end = $self->{uid_base} + UID_SLICE; + uo2m_extend($self, $uid_end); my $sock = $self->{sock} or return; my $fd = fileno($sock); + $self->{-idle_tag} = $tag; # only do inotify on most recent slice - if ($max < $uid_end) { + if ($ibx->over(1)->max < $uid_end) { $ibx->subscribe_unlock($fd, $self); $self->{imapd}->idler_start; - $self->{-idle_max} = $max; } - $idle_timer //= PublicInbox::DS::later(\&idle_tick_all); + PublicInbox::DS::add_uniq_timer('idle', 60, \&idle_tick_all); $IDLERS->{$fd} = $self; \"+ idling\r\n" } sub stop_idle ($$) { - my ($self, $ibx); + my ($self, $ibx) = @_; my $sock = $self->{sock} or return; my $fd = fileno($sock); delete $IDLERS->{$fd}; $ibx->unsubscribe_unlock($fd); } -sub cmd_done ($$) { +sub idle_done ($$) { my ($self, $tag) = @_; # $tag is "DONE" (case-insensitive) defined(my $idle_tag = delete $self->{-idle_tag}) or return "$tag BAD not idle\r\n"; @@ -258,12 +350,12 @@ sub cmd_done ($$) { "$idle_tag OK Idle done\r\n"; } -sub ensure_ranges_exist ($$$) { +sub ensure_slices_exist ($$$) { my ($imapd, $ibx, $max) = @_; defined(my $mb_top = $ibx->{newsgroup}) or return; my $mailboxes = $imapd->{mailboxes}; my @created; - for (my $i = int($max/UID_BLOCK); $i >= 0; --$i) { + for (my $i = int($max/UID_SLICE); $i >= 0; --$i) { my $sub_mailbox = "$mb_top.$i"; last if exists $mailboxes->{$sub_mailbox}; $mailboxes->{$sub_mailbox} = $ibx; @@ -271,41 +363,53 @@ sub ensure_ranges_exist ($$$) { push @created, $sub_mailbox; } return unless @created; - my $l = $imapd->{inboxlist} or return; + my $l = $imapd->{mailboxlist} or return; push @$l, map { qq[* LIST (\\HasNoChildren) "." $_\r\n] } @created; } -sub inbox_lookup ($$) { - my ($self, $mailbox) = @_; - my ($ibx, $exists, $uidnext, $uid_base); - if ($mailbox =~ /\A(.+)\.([0-9]+)\z/) { - # old mail: inbox.comp.foo.$uid_block_idx - my $mb_top = $1; - $uid_base = $2 * UID_BLOCK; - $ibx = $self->{imapd}->{mailboxes}->{lc $mailbox} or return; - $exists = $ibx->mm->max // 0; - ensure_ranges_exist($self->{imapd}, $ibx, $exists); - my $uid_end = $uid_base + UID_BLOCK; - $exists = $uid_end if $exists > $uid_end; - $uidnext = $exists + 1; - $exists -= $uid_base; - } else { # check for dummy inboxes - $ibx = $self->{imapd}->{mailboxes}->{lc $mailbox} or return; - $uid_base = $exists = 0; - $uidnext = 1; +sub inbox_lookup ($$;$) { + my ($self, $mailbox, $examine) = @_; + my ($ibx, $exists, $uidmax, $uid_base) = (undef, 0, 0, 0); + $mailbox = lc $mailbox; + $ibx = $self->{imapd}->{mailboxes}->{$mailbox} or return; + my $over = $ibx->over(1); + if ($over != $ibx) { # not a dummy + $mailbox =~ /\.([0-9]+)\z/ or + die "BUG: unexpected dummy mailbox: $mailbox\n"; + $uid_base = $1 * UID_SLICE; + + $uidmax = $ibx->mm->num_highwater // 0; + if ($examine) { + $self->{uid_base} = $uid_base; + $self->{ibx} = $ibx; + $self->{uo2m} = uo2m_ary_new($self, \$exists); + } else { + my $uid_end = $uid_base + UID_SLICE; + $exists = $over->imap_exists($uid_base, $uid_end); + } + ensure_slices_exist($self->{imapd}, $ibx, $over->max); + } else { + if ($examine) { + $self->{uid_base} = $uid_base; + $self->{ibx} = $ibx; + delete $self->{uo2m}; + } + # if "INBOX.foo.bar" is selected and "INBOX.foo.bar.0", + # check for new UID ranges (e.g. "INBOX.foo.bar.1") + if (my $z = $self->{imapd}->{mailboxes}->{"$mailbox.0"}) { + ensure_slices_exist($self->{imapd}, $z, + $z->over(1)->max); + } } - ($ibx, $exists, $uidnext, $uid_base); + ($ibx, $exists, $uidmax + 1, $uid_base); } sub cmd_examine ($$$) { my ($self, $tag, $mailbox) = @_; - my ($ibx, $exists, $uidnext, $base) = inbox_lookup($self, $mailbox); - return "$tag NO Mailbox doesn't exist: $mailbox\r\n" if !$ibx; - $self->{uid_base} = $base; - # XXX: do we need this? RFC 5162/7162 my $ret = $self->{ibx} ? "* OK [CLOSED] previous closed\r\n" : ''; - $self->{ibx} = $ibx; + my ($ibx, $exists, $uidnext, $base) = inbox_lookup($self, $mailbox, 1); + return "$tag NO Mailbox doesn't exist: $mailbox\r\n" if !$ibx; $ret .= <header_raw('Content-Disposition') or return 'NIL'; $cd = parse_content_disposition($cd); my $buf = '('._esc($cd->{type}); - $buf .= ' ' . _esc_hash(delete $cd->{attributes}); + $buf .= ' ' . _esc_hash($cd->{attributes}); $buf .= ')'; } @@ -387,7 +491,7 @@ sub body_leaf ($$;$) { my $ct = $eml->ct; $buf .= '('._esc($ct->{type}).' '; $buf .= _esc($ct->{subtype}); - $buf .= ' ' . _esc_hash(delete $ct->{attributes}); + $buf .= ' ' . _esc_hash($ct->{attributes}); $buf .= ' ' . _esc($eml->header_raw('Content-ID')); $buf .= ' ' . _esc($eml->header_raw('Content-Description')); my $cte = $eml->header_raw('Content-Transfer-Encoding') // '7bit'; @@ -416,7 +520,7 @@ sub body_parent ($$$) { $buf .= @$hold ? join('', @$hold) : 'NIL'; $buf .= ' '._esc($ct->{subtype}); if ($structure) { - $buf .= ' '._esc_hash(delete $ct->{attributes}); + $buf .= ' '._esc_hash($ct->{attributes}); $buf .= ' '.body_disposition($eml); $buf .= ' '._esc($eml->header_raw('Content-Language')); $buf .= ' '._esc($eml->header_raw('Content-Location')); @@ -459,28 +563,10 @@ sub fetch_body ($;$) { join('', @hold); } -sub requeue_once ($) { - my ($self) = @_; - # COMPRESS users all share the same DEFLATE context. - # Flush it here to ensure clients don't see - # each other's data - $self->zflush; - - # no recursion, schedule another call ASAP, - # but only after all pending writes are done. - # autovivify wbuf: - my $new_size = push(@{$self->{wbuf}}, \&long_step); - - # wbuf may be populated by $cb, no need to rearm if so: - $self->requeue if $new_size == 1; -} - -# my ($uid_base, $UID) = @_; -sub fetch_msn_uid ($$) { '* '.($_[1] - $_[0]).' FETCH (UID '.$_[1] } - sub fetch_run_ops { - my ($self, $uid_base, $smsg, $bref, $ops, $partial) = @_; - $self->msg_more(fetch_msn_uid($uid_base, $smsg->{num})); + my ($self, $smsg, $bref, $ops, $partial) = @_; + my $uid = $smsg->{num}; + $self->msg_more('* '.uid2msn($self, $uid)." FETCH (UID $uid"); my ($eml, $k); for (my $i = 0; $i < @$ops;) { $k = $ops->[$i++]; @@ -490,19 +576,26 @@ sub fetch_run_ops { $self->msg_more(")\r\n"); } -sub fetch_blob_cb { # called by git->cat_async via git_async_cat +sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat my ($bref, $oid, $type, $size, $fetch_arg) = @_; - my ($self, undef, $msgs, undef, $ops, $partial) = @$fetch_arg; + my ($self, undef, $msgs, $range_info, $ops, $partial) = @$fetch_arg; + my $ibx = $self->{ibx} or return $self->close; # client disconnected my $smsg = shift @$msgs or die 'BUG: no smsg'; if (!defined($oid)) { # it's possible to have TOCTOU if an admin runs # public-inbox-(edit|purge), just move onto the next message - return requeue_once($self); + warn "E: $smsg->{blob} missing in $ibx->{inboxdir}\n"; + return $self->requeue_once; } else { $smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid"; } - fetch_run_ops($self, $self->{uid_base}, $smsg, $bref, $ops, $partial); - requeue_once($self); + my $pre; + if (!$self->{wbuf} && (my $nxt = $msgs->[0])) { + $pre = ibx_async_prefetch($ibx, $nxt->{blob}, + \&fetch_blob_cb, $fetch_arg); + } + fetch_run_ops($self, $smsg, $bref, $ops, $partial); + $pre ? $self->zflush : $self->requeue_once; } sub emit_rfc822 { @@ -572,7 +665,7 @@ sub op_crlf_bdy { ${$_[4]->{bdy}} =~ s/(?{bdy} } sub uid_clamp ($$$) { my ($self, $beg, $end) = @_; my $uid_min = $self->{uid_base} + 1; - my $uid_end = $uid_min + UID_BLOCK - 1; + my $uid_end = $uid_min + UID_SLICE - 1; $$beg = $uid_min if $$beg < $uid_min; $$end = $uid_end if $$end > $uid_end; } @@ -586,28 +679,32 @@ sub range_step ($$) { $range = $$range_csv; $$range_csv = undef; } + my $uid_base = $self->{uid_base}; + my $uid_end = $uid_base + UID_SLICE; if ($range =~ /\A([0-9]+):([0-9]+)\z/) { ($beg, $end) = ($1 + 0, $2 + 0); + uid_clamp($self, \$beg, \$end); } elsif ($range =~ /\A([0-9]+):\*\z/) { $beg = $1 + 0; - $end = $self->{ibx}->mm->max // 0; - my $uid_end = $self->{uid_base} + UID_BLOCK; + $end = $self->{ibx}->over(1)->max; $end = $uid_end if $end > $uid_end; $beg = $end if $beg > $end; + uid_clamp($self, \$beg, \$end); } elsif ($range =~ /\A[0-9]+\z/) { $beg = $end = $range + 0; - undef $range; + # just let the caller do an out-of-range query if a single + # UID is out-of-range + ++$beg if ($beg <= $uid_base || $end > $uid_end); } else { return 'BAD fetch range'; } - uid_clamp($self, \$beg, \$end) if defined($range); [ $beg, $end, $$range_csv ]; } sub refill_range ($$$) { my ($self, $msgs, $range_info) = @_; my ($beg, $end, $range_csv) = @$range_info; - if (scalar(@$msgs = @{$self->{ibx}->over->query_xover($beg, $end)})) { + if (scalar(@$msgs = @{$self->{ibx}->over(1)->query_xover($beg, $end)})){ $range_info->[0] = $msgs->[-1]->{num} + 1; return; } @@ -626,7 +723,8 @@ sub fetch_blob { # long_response return; } } - git_async_cat($self->{ibx}->git, $msgs->[0]->{blob}, + uo2m_extend($self, $msgs->[-1]->{num}); + ibx_async_cat($self->{ibx}, $msgs->[0]->{blob}, \&fetch_blob_cb, \@_); } @@ -638,37 +736,44 @@ sub fetch_smsg { # long_response return; } } - my $uid_base = $self->{uid_base}; - fetch_run_ops($self, $uid_base, $_, undef, $ops) for @$msgs; + uo2m_extend($self, $msgs->[-1]->{num}); + fetch_run_ops($self, $_, undef, $ops) for @$msgs; @$msgs = (); 1; # more } -sub fetch_uid { # long_response - my ($self, $tag, $uids, $range_info, $ops) = @_; - - while (!@$uids) { # rare - my ($beg, $end, $range_csv) = @$range_info; - if (scalar(@$uids = @{$self->{ibx}->over-> - uid_range($beg, $end)})) { - $range_info->[0] = $uids->[-1] + 1; - } elsif (!$range_csv) { - $self->write(\"$tag OK Fetch done\r\n"); +sub refill_uids ($$$;$) { + my ($self, $uids, $range_info, $sql) = @_; + my ($beg, $end, $range_csv) = @$range_info; + my $over = $self->{ibx}->over(1); + while (1) { + if (scalar(@$uids = @{$over->uid_range($beg, $end, $sql)})) { + $range_info->[0] = $uids->[-1] + 1; # update $beg return; + } elsif (!$range_csv) { + return 0; } else { my $next_range = range_step($self, \$range_csv); - if (!ref($next_range)) { # error - $self->write(\"$tag $next_range\r\n"); - return; - } - @$range_info = @$next_range; + return $next_range if !ref($next_range); # error + ($beg, $end, $range_csv) = @$range_info = @$next_range; + # continue looping } - # continue looping } - my $uid_base = $self->{uid_base}; +} + +sub fetch_uid { # long_response + my ($self, $tag, $uids, $range_info, $ops) = @_; + if (defined(my $err = refill_uids($self, $uids, $range_info))) { + $err ||= 'OK Fetch done'; + $self->write("$tag $err\r\n"); + return; + } + my $adj = $self->{uid_base} + 1; + my $uo2m = uo2m_extend($self, $uids->[-1]); + $uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m); my ($i, $k); for (@$uids) { - $self->msg_more(fetch_msn_uid($uid_base, $_)); + $self->msg_more("* $uo2m->[$_ - $adj] FETCH (UID $_"); for ($i = 0; $i < @$ops;) { $k = $ops->[$i++]; $ops->[$i++]->($self, $k); @@ -708,7 +813,7 @@ sub cmd_status ($$$;@) { my %patmap = ('*' => '.*', '%' => '[^\.]*'); sub cmd_list ($$$$) { my ($self, $tag, $refname, $wildcard) = @_; - my $l = $self->{imapd}->{inboxlist}; + my $l = $self->{imapd}->{mailboxlist}; if ($refname eq '' && $wildcard eq '') { # request for hierarchy delimiter $l = [ qq[* LIST (\\Noselect) "." ""\r\n] ]; @@ -736,12 +841,12 @@ sub eml_index_offs_i { # PublicInbox::Eml::each_part callback # prepares an index for BODY[$SECTION_IDX] fetches sub eml_body_idx ($$) { my ($eml, $section_idx) = @_; - my $idx = $eml->{imap_all_parts} //= do { + my $idx = $eml->{imap_all_parts} // do { my $all = {}; $eml->each_part(\&eml_index_offs_i, $all, 0, 1); # top-level of multipart, BODY[0] not allowed (nz-number) delete $all->{0}; - $all; + $eml->{imap_all_parts} = $all; }; $idx->{$section_idx}; } @@ -925,15 +1030,12 @@ sub cmd_uid_fetch ($$$$;@) { my ($cb, $ops, $partial) = fetch_compile(\@want); return "$tag $cb\r\n" unless $ops; + # cb is one of fetch_blob, fetch_smsg, fetch_uid $range_csv = 'bad' if $range_csv !~ $valid_range; my $range_info = range_step($self, \$range_csv); return "$tag $range_info\r\n" if !ref($range_info); - long_response($self, $cb, $tag, [], $range_info, $ops, $partial); -} - -sub msn_to_uid_range ($$) { - my $uid_base = $_[0]->{uid_base}; - $_[1] =~ s/([0-9]+)/$uid_base + $1/sge; + uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM + $self->long_response($cb, $tag, [], $range_info, $ops, $partial); } sub cmd_fetch ($$$$;@) { @@ -944,202 +1046,106 @@ sub cmd_fetch ($$$$;@) { # cb is one of fetch_blob, fetch_smsg, fetch_uid $range_csv = 'bad' if $range_csv !~ $valid_range; - msn_to_uid_range($self, $range_csv); + msn_to_uid_range(msn2uid($self), $range_csv); my $range_info = range_step($self, \$range_csv); return "$tag $range_info\r\n" if !ref($range_info); - long_response($self, $cb, $tag, [], $range_info, $ops, $partial); + uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM + $self->long_response($cb, $tag, [], $range_info, $ops, $partial); } -sub parse_date ($) { # 02-Oct-1993 - my ($date_text) = @_; - my ($dd, $mon, $yyyy) = split(/-/, $_[0], 3); - defined($yyyy) or return; - my $mm = $MoY{$mon} // return; - $dd =~ /\A[0123]?[0-9]\z/ or return; - $yyyy =~ /\A[0-9]{4,}\z/ or return; # Y10K-compatible! - timegm(0, 0, 0, $dd, $mm, $yyyy); +sub msn_convert ($$) { + my ($self, $uids) = @_; + my $adj = $self->{uid_base} + 1; + my $uo2m = uo2m_extend($self, $uids->[-1]); + $uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m); + $_ = $uo2m->[$_ - $adj] for @$uids; } -sub uid_search_uid_range { # long_response - my ($self, $tag, $beg, $end, $sql) = @_; - my $uids = $self->{ibx}->over->uid_range($$beg, $end, $sql); - if (@$uids) { - $$beg = $uids->[-1] + 1; - $self->msg_more(join(' ', '', @$uids)); - } else { - $self->write(\"\r\n$tag OK Search done\r\n"); - undef; - } -} - -sub date_search { - my ($q, $k, $d) = @_; - my $sql = $q->{sql}; - - # Date: header - if ($k eq 'SENTON') { - my $end = $d + 86399; # no leap day... - my $da = strftime('%Y%m%d%H%M%S', gmtime($d)); - my $db = strftime('%Y%m%d%H%M%S', gmtime($end)); - $q->{xap} .= " dt:$da..$db"; - $$sql .= " AND ds >= $d AND ds <= $end" if defined($sql); - } elsif ($k eq 'SENTBEFORE') { - $q->{xap} .= ' d:..'.strftime('%Y%m%d', gmtime($d)); - $$sql .= " AND ds <= $d" if defined($sql); - } elsif ($k eq 'SENTSINCE') { - $q->{xap} .= ' d:'.strftime('%Y%m%d', gmtime($d)).'..'; - $$sql .= " AND ds >= $d" if defined($sql); - - # INTERNALDATE (Received) - } elsif ($k eq 'ON') { - my $end = $d + 86399; # no leap day... - $q->{xap} .= " ts:$d..$end"; - $$sql .= " AND ts >= $d AND ts <= $end" if defined($sql); - } elsif ($k eq 'BEFORE') { - $q->{xap} .= " ts:..$d"; - $$sql .= " AND ts <= $d" if defined($sql); - } elsif ($k eq 'SINCE') { - $q->{xap} .= " ts:$d.."; - $$sql .= " AND ts >= $d" if defined($sql); - } else { - die "BUG: $k not recognized"; +sub search_uid_range { # long_response + my ($self, $tag, $sql, $range_info, $want_msn) = @_; + my $uids = []; + if (defined(my $err = refill_uids($self, $uids, $range_info, $sql))) { + $err ||= 'OK Search done'; + $self->write("\r\n$tag $err\r\n"); + return; } + msn_convert($self, $uids) if $want_msn; + $self->msg_more(join(' ', '', @$uids)); + 1; # more } -# IMAP to Xapian search key mapping -my %I2X = ( - SUBJECT => 's:', - BODY => 'b:', - FROM => 'f:', - TEXT => '', # n.b. does not include all headers - TO => 't:', - CC => 'c:', - # BCC => 'bcc:', # TODO - # KEYWORD # TODO ? dfpre,dfpost,... -); - -sub parse_query { - my ($self, $rest) = @_; - if (uc($rest->[0]) eq 'CHARSET') { - shift @$rest; - defined(my $c = shift @$rest) or return 'BAD missing charset'; - $c =~ /\A(?:UTF-8|US-ASCII)\z/ or return 'NO [BADCHARSET]'; - } - - my $sql = ''; # date conditions, {sql} deleted if Xapian is needed - my $q = { xap => '', sql => \$sql }; - while (@$rest) { - my $k = uc(shift @$rest); - # default criteria - next if $k =~ /\A(?:ALL|RECENT|UNSEEN|NEW)\z/; - next if $k eq 'AND'; # the default, until we support OR - if ($k =~ $valid_range) { # convert sequence numbers to UIDs - msn_to_uid_range($self, $k); - push @{$q->{uid}}, $k; - } elsif ($k eq 'UID') { - $k = shift(@$rest) // ''; - $k =~ $valid_range or return 'BAD UID range'; - push @{$q->{uid}}, $k; - } elsif ($k =~ /\A(?:SENT)?(?:SINCE|ON|BEFORE)\z/) { - my $d = parse_date(shift(@$rest) // ''); - defined $d or return "BAD $k date format"; - date_search($q, $k, $d); - } elsif ($k =~ /\A(?:SMALLER|LARGER)\z/) { - delete $q->{sql}; # can't use over.sqlite3 - my $bytes = shift(@$rest) // ''; - $bytes =~ /\A[0-9]+\z/ or return "BAD $k not a number"; - $q->{xap} .= ' bytes:' . ($k eq 'SMALLER' ? - '..'.(--$bytes) : - (++$bytes).'..'); - } elsif (defined(my $xk = $I2X{$k})) { - delete $q->{sql}; # can't use over.sqlite3 - my $arg = shift @$rest; - defined($arg) or return "BAD $k no arg"; - - # Xapian can't handle [*"] in probabilistic terms - $arg =~ tr/*"//d; - $q->{xap} .= qq[ $xk:"$arg"]; - } else { - # TODO: parentheses, OR, NOT ... - return "BAD $k not supported (yet?)"; - } - } - - # favor using over.sqlite3 if possible, since Xapian is optional - if (exists $q->{sql}) { - delete($q->{xap}); - delete($q->{sql}) if $sql eq ''; - } elsif (!$self->{ibx}->search) { - return 'BAD Xapian not configured for mailbox'; - } - - if (my $uid = $q->{uid}) { - ((@$uid > 1) || $uid->[0] =~ /,/) and - return 'BAD multiple ranges not supported, yet'; - ($q->{sql} // $q->{xap}) and - return 'BAD ranges and queries do not mix, yet'; - $q->{uid} = join(',', @$uid); # TODO: multiple ranges +sub parse_imap_query ($$) { + my ($self, $query) = @_; + my $q = PublicInbox::IMAPsearchqp::parse($self, $query); + if (ref($q)) { + my $max = $self->{ibx}->over(1)->max; + my $beg = 1; + uid_clamp($self, \$beg, \$max); + $q->{range_info} = [ $beg, $max ]; } $q; } -sub cmd_uid_search ($$$;) { - my ($self, $tag) = splice(@_, 0, 2); +sub search_common { + my ($self, $tag, $query, $want_msn) = @_; my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n"; - my $q = parse_query($self, \@_); + my $q = parse_imap_query($self, $query); return "$tag $q\r\n" if !ref($q); - my $sql = delete $q->{sql}; - - if (!scalar(keys %$q)) { + my ($sql, $range_info) = delete @$q{qw(sql range_info)}; + if (!scalar(keys %$q)) { # overview.sqlite3 $self->msg_more('* SEARCH'); - my $beg = 1; - my $end = $ibx->mm->max // 0; - uid_clamp($self, \$beg, \$end); - long_response($self, \&uid_search_uid_range, - $tag, \$beg, $end, $sql); - } elsif (my $uid = $q->{uid}) { - if ($uid =~ /\A([0-9]+):([0-9]+|\*)\z/s) { - my ($beg, $end) = ($1, $2); - $end = $ibx->mm->max if $end eq '*'; - uid_clamp($self, \$beg, \$end); - $self->msg_more('* SEARCH'); - long_response($self, \&uid_search_uid_range, - $tag, \$beg, $end, $sql); - } elsif ($uid =~ /\A[0-9]+\z/s) { - $uid = $ibx->over->get_art($uid) ? " $uid" : ''; - "* SEARCH$uid\r\n$tag OK Search done\r\n"; - } else { - "$tag BAD Error\r\n"; - } + $self->long_response(\&search_uid_range, + $tag, $sql, $range_info, $want_msn); + } elsif ($q = $q->{xap}) { + my $srch = $self->{ibx}->isrch or + return "$tag BAD search not available for mailbox\r\n"; + my $opt = { + relevance => -1, + limit => UID_SLICE, + uid_range => $range_info + }; + my $mset = $srch->mset($q, $opt); + my $uids = $srch->mset_to_artnums($mset, $opt); + msn_convert($self, $uids) if scalar(@$uids) && $want_msn; + "* SEARCH @$uids\r\n$tag OK Search done\r\n"; } else { "$tag BAD Error\r\n"; } } -sub args_ok ($$) { # duplicated from PublicInbox::NNTP - my ($cb, $argc) = @_; - my $tot = prototype $cb; - my ($nreq, undef) = split(';', $tot); - $nreq = ($nreq =~ tr/$//) - 1; - $tot = ($tot =~ tr/$//) - 1; - ($argc <= $tot && $argc >= $nreq); +sub cmd_uid_search ($$$) { + my ($self, $tag, $query) = @_; + search_common($self, $tag, $query); +} + +sub cmd_search ($$$;) { + my ($self, $tag, $query) = @_; + search_common($self, $tag, $query, 1); } # returns 1 if we can continue, 0 if not due to buffered writes or disconnect sub process_line ($$) { my ($self, $l) = @_; + + # TODO: IMAP allows literals for big requests to upload messages + # (which we don't support) but maybe some big search queries use it. + # RFC 3501 9 (2) doesn't permit TAB or multiple SP my ($tag, $req, @args) = parse_line('[ \t]+', 0, $l); pop(@args) if (@args && !defined($args[-1])); if (@args && uc($req) eq 'UID') { $req .= "_".(shift @args); } my $res = eval { - if (my $cmd = $self->can('cmd_'.lc($req // ''))) { - defined($self->{-idle_tag}) ? - "$self->{-idle_tag} BAD expected DONE\r\n" : - $cmd->($self, $tag, @args); - } elsif (uc($tag // '') eq 'DONE' && !defined($req)) { - cmd_done($self, $tag); + if (defined(my $idle_tag = $self->{-idle_tag})) { + (uc($tag // '') eq 'DONE' && !defined($req)) ? + idle_done($self, $tag) : + "$idle_tag BAD expected DONE\r\n"; + } elsif (my $cmd = $self->can('cmd_'.lc($req // ''))) { + if ($cmd == \&cmd_uid_search || $cmd == \&cmd_search) { + # preserve user-supplied quotes for search + (undef, @args) = split(/ search /i, $l, 2); + } + $cmd->($self, $tag, @args); } else { # this is weird auth_challenge_ok($self) // ($tag // '*') . @@ -1159,37 +1165,6 @@ sub process_line ($$) { $self->write($res); } -sub long_step { - my ($self) = @_; - # wbuf is unset or empty, here; {long} may add to it - my ($fd, $cb, $t0, @args) = @{$self->{long_cb}}; - my $more = eval { $cb->($self, @args) }; - if ($@ || !$self->{sock}) { # something bad happened... - delete $self->{long_cb}; - my $elapsed = now() - $t0; - if ($@) { - err($self, - "%s during long response[$fd] - %0.6f", - $@, $elapsed); - } - out($self, " deferred[$fd] aborted - %0.6f", $elapsed); - $self->close; - } elsif ($more) { # $self->{wbuf}: - $self->update_idle_time; - - # control passed to $more may be a GitAsyncCat object - requeue_once($self) if !ref($more); - } else { # all done! - delete $self->{long_cb}; - my $elapsed = now() - $t0; - my $fd = fileno($self->{sock}); - out($self, " deferred[$fd] done - %0.6f", $elapsed); - my $wbuf = $self->{wbuf}; # do NOT autovivify - - $self->requeue unless $wbuf && @$wbuf; - } -} - sub err ($$;@) { my ($self, $fmt, @args) = @_; printf { $self->{imapd}->{err} } $fmt."\n", @args; @@ -1200,25 +1175,12 @@ sub out ($$;@) { printf { $self->{imapd}->{out} } $fmt."\n", @args; } -sub long_response ($$;@) { - my ($self, $cb, @args) = @_; # cb returns true if more, false if done - - my $sock = $self->{sock} or return; - # make sure we disable reading during a long response, - # clients should not be sending us stuff and making us do more - # work while we are stream a response to them - $self->{long_cb} = [ fileno($sock), $cb, now(), @args ]; - long_step($self); # kick off! - undef; -} - # callback used by PublicInbox::DS for any (e)poll (in/out/hup/err) sub event_step { my ($self) = @_; return unless $self->flush_write && $self->{sock} && !$self->{long_cb}; - $self->update_idle_time; # only read more requests if we've drained the write buffer, # otherwise we can be buffering infinitely w/o backpressure @@ -1229,7 +1191,8 @@ sub event_step { $self->write(\"\* BAD request too long\r\n"); return $self->close; } - $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return; + $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or + return uo2m_hibernate($self); $line = index($$rbuf, "\n"); } $line = substr($$rbuf, 0, $line + 1, ''); @@ -1243,7 +1206,6 @@ sub event_step { return $self->close if $r < 0; $self->rbuf_idle($rbuf); - $self->update_idle_time; # maybe there's more pipelined data, or we'll have # to register it for socket-readiness notifications @@ -1252,8 +1214,6 @@ sub event_step { sub compressed { undef } -sub zflush {} # overridden by IMAPdeflate - # RFC 4978 sub cmd_compress ($$$) { my ($self, $tag, $alg) = @_; @@ -1282,10 +1242,14 @@ sub cmd_starttls ($$) { undef; } -# for graceful shutdown in PublicInbox::Daemon: -sub busy { - my ($self, $now) = @_; - ($self->{rbuf} || $self->{wbuf} || $self->not_idle_long($now)); +sub busy { # for graceful shutdown in PublicInbox::Daemon: + my ($self) = @_; + if (defined($self->{-idle_tag})) { + $self->write(\"* BYE server shutting down\r\n"); + return; # not busy anymore + } + defined($self->{rbuf}) || defined($self->{wbuf}) || + !$self->write(\"* BYE server shutting down\r\n"); } sub close {