# slow storage.
#
# data notes:
-# * NNTP article numbers are UIDs
+#
+# * NNTP article numbers are UIDs, mm->created_at is UIDVALIDITY
+#
+# * public-inboxes are sliced into mailboxes of 50K messages
+# to not overload MUAs: $NEWSGROUP_NAME.$SLICE_INDEX
+# Slices are similar in concept to v2 "epochs". Epochs
+# are for the limitations of git clients, while slices are
+# for the limitations of IMAP clients.
+#
+# * We also take advantage of slices being only 50K to store
+# "UID offset" to message sequence number (MSN) mapping
+# as a 50K uint16_t array (via pack("S*", ...)). "UID offset"
+# is the offset from {uid_base} which determines the start of
+# the mailbox slice.
package PublicInbox::IMAP;
use strict;
use base qw(PublicInbox::DS);
use fields qw(imapd ibx long_cb -login_tag
- uid_base -idle_tag -idle_max);
+ uid_base -idle_tag uo2m);
use PublicInbox::Eml;
use PublicInbox::EmlContentFoo qw(parse_content_disposition);
use PublicInbox::DS qw(now);
sub LINE_MAX () { 8000 } # RFC 2683 3.2.1.5
-# changing this will cause grief for clients which cache
-sub UID_BLOCK () { 50_000 }
+# Changing UID_SLICE will cause grief for clients which cache.
+# This also needs to be <64K: we pack it into a uint16_t
+# for long_response UID (offset) => MSN mappings
+sub UID_SLICE () { 50_000 }
# these values area also used for sorting
sub NEED_SMSG () { 1 }
sub cmd_noop ($$) { "$_[1] OK Noop done\r\n" }
+# uo2m: UID Offset to MSN, this is an arrayref by default,
+# but uo2m_hibernate can compact and deduplicate it
+sub uo2m_ary_new ($) {
+ my ($self) = @_;
+ my $base = $self->{uid_base};
+ my $uids = $self->{ibx}->over->uid_range($base + 1, $base + UID_SLICE);
+
+ # convert UIDs to offsets from {base}
+ my @tmp; # [$UID_OFFSET] => $MSN
+ my $msn = 0;
+ ++$base;
+ $tmp[$_ - $base] = ++$msn for @$uids;
+ \@tmp;
+}
+
+# changes UID-offset-to-MSN mapping into a deduplicated scalar:
+# uint16_t uo2m[UID_SLICE].
+# May be swapped out for idle clients if THP is disabled.
+sub uo2m_hibernate ($) {
+ my ($self) = @_;
+ ref(my $uo2m = $self->{uo2m}) or return;
+ my %dedupe = ( uo2m_pack($uo2m) => undef );
+ $self->{uo2m} = (keys(%dedupe))[0];
+ undef;
+}
+
+sub uo2m_last_uid ($) {
+ my ($self) = @_;
+ my $uo2m = $self->{uo2m} or die 'BUG: uo2m_last_uid w/o {uo2m}';
+ (ref($uo2m) ? @$uo2m : (length($uo2m) >> 1)) + $self->{uid_base};
+}
+
+sub uo2m_pack ($) {
+ # $_[0] is an arrayref of MSNs, it may have undef gaps if there
+ # are gaps in the corresponding UIDs: [ msn1, msn2, undef, msn3 ]
+ no warnings 'uninitialized';
+ pack('S*', @{$_[0]});
+}
+
+# extend {uo2m} to account for new messages which arrived since
+# {uo2m} was created.
+sub uo2m_extend ($$) {
+ my ($self, $new_uid_max) = @_;
+ defined(my $uo2m = $self->{uo2m}) or
+ return($self->{uo2m} = uo2m_ary_new($self));
+ my $beg = uo2m_last_uid($self); # last UID we've learned
+ return $uo2m if $beg >= $new_uid_max; # fast path
+
+ # need to extend the current range:
+ my $base = $self->{uid_base};
+ ++$beg;
+ my $uids = $self->{ibx}->over->uid_range($beg, $base + UID_SLICE);
+ my @tmp; # [$UID_OFFSET] => $MSN
+ if (ref($uo2m)) {
+ my $msn = $uo2m->[-1];
+ $tmp[$_ - $beg] = ++$msn for @$uids;
+ push @$uo2m, @tmp;
+ $uo2m;
+ } else {
+ my $msn = unpack('S', substr($uo2m, -2, 2));
+ $tmp[$_ - $beg] = ++$msn for @$uids;
+ $uo2m .= uo2m_pack(\@tmp);
+ my %dedupe = ($uo2m => undef);
+ $self->{uo2m} = (keys %dedupe)[0];
+ }
+}
+
+# the flexible version which works on scalars and array refs.
+# Must call uo2m_extend before this
+sub uid2msn ($$) {
+ my ($self, $uid) = @_;
+ my $uo2m = $self->{uo2m};
+ my $off = $uid - $self->{uid_base} - 1;
+ ref($uo2m) ? $uo2m->[$off] : unpack('S', substr($uo2m, $off << 1, 2));
+}
+
+# returns an arrayref of UIDs, so MSNs can be translated to UIDs via:
+# $msn2uid->[$MSN-1] => $UID. The result of this is always ephemeral
+# and does not live beyond the event loop.
+sub msn2uid ($) {
+ my ($self) = @_;
+ my $base = $self->{uid_base};
+ my $uo2m = uo2m_extend($self, $base + UID_SLICE);
+ $uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);
+
+ my $uo = 0;
+ my @msn2uid;
+ for my $msn (@$uo2m) {
+ ++$uo;
+ $msn2uid[$msn - 1] = $uo + $base if $msn;
+ }
+ \@msn2uid;
+}
+
+# converts a set of message sequence numbers in requests to UIDs:
+sub msn_to_uid_range ($$) {
+ my $msn2uid = $_[0];
+ $_[1] =~ s!([0-9]+)!$msn2uid->[$1 - 1] // ($msn2uid->[-1] + 1)!sge;
+}
+
# called by PublicInbox::InboxIdle
sub on_inbox_unlock {
my ($self, $ibx) = @_;
- my $new = $ibx->over->max;
- my $uid_base = $self->{uid_base} // 0;
- my $uid_end = $uid_base + UID_BLOCK;
- defined(my $old = $self->{-idle_max}) or die 'BUG: -idle_max unset';
- $new = $uid_end if $new > $uid_end;
+ my $old = uo2m_last_uid($self);
+ my $uid_end = $self->{uid_base} + UID_SLICE;
+ uo2m_extend($self, $uid_end);
+ my $new = uo2m_last_uid($self);
if ($new > $old) {
- $self->{-idle_max} = $new;
- $new -= $uid_base;
- $old -= $uid_base;
- $self->msg_more("* $_ EXISTS\r\n") for (($old + 1)..($new - 1));
- $self->write(\"* $new EXISTS\r\n");
+ my $msn = uid2msn($self, $new);
+ $self->write(\"* $msn EXISTS\r\n");
} elsif ($new == $uid_end) { # max exceeded $uid_end
# continue idling w/o inotify
- delete $self->{-idle_max};
my $sock = $self->{sock} or return;
$ibx->unsubscribe_unlock(fileno($sock));
}
my $ibx = $self->{ibx} or return "$tag BAD no mailbox selected\r\n";
$self->{-idle_tag} = $tag;
my $max = $ibx->over->max;
- my $uid_end = $self->{uid_base} + UID_BLOCK;
+ my $uid_end = $self->{uid_base} + UID_SLICE;
my $sock = $self->{sock} or return;
my $fd = fileno($sock);
# only do inotify on most recent slice
if ($max < $uid_end) {
+ uo2m_extend($self, $uid_end);
$ibx->subscribe_unlock($fd, $self);
$self->{imapd}->idler_start;
- $self->{-idle_max} = $max;
}
$idle_timer //= PublicInbox::DS::later(\&idle_tick_all);
$IDLERS->{$fd} = $self;
"$idle_tag OK Idle done\r\n";
}
-sub ensure_ranges_exist ($$$) {
+sub ensure_slices_exist ($$$) {
my ($imapd, $ibx, $max) = @_;
defined(my $mb_top = $ibx->{newsgroup}) or return;
my $mailboxes = $imapd->{mailboxes};
my @created;
- for (my $i = int($max/UID_BLOCK); $i >= 0; --$i) {
+ for (my $i = int($max/UID_SLICE); $i >= 0; --$i) {
my $sub_mailbox = "$mb_top.$i";
last if exists $mailboxes->{$sub_mailbox};
$mailboxes->{$sub_mailbox} = $ibx;
my ($self, $mailbox) = @_;
my ($ibx, $exists, $uidnext, $uid_base);
if ($mailbox =~ /\A(.+)\.([0-9]+)\z/) {
- # old mail: inbox.comp.foo.$uid_block_idx
+ # old mail: inbox.comp.foo.$SLICE_IDX
my $mb_top = $1;
- $uid_base = $2 * UID_BLOCK;
+ $uid_base = $2 * UID_SLICE;
$ibx = $self->{imapd}->{mailboxes}->{lc $mailbox} or return;
my $max;
($exists, $uidnext, $max) = $ibx->over->imap_status($uid_base,
- $uid_base + UID_BLOCK);
- ensure_ranges_exist($self->{imapd}, $ibx, $max);
+ $uid_base + UID_SLICE);
+ ensure_slices_exist($self->{imapd}, $ibx, $max);
} else { # check for dummy inboxes
$mailbox = lc $mailbox;
$ibx = $self->{imapd}->{mailboxes}->{$mailbox} or return;
# if "INBOX.foo.bar" is selected and "INBOX.foo.bar.0",
# check for new UID ranges (e.g. "INBOX.foo.bar.1")
if (my $z = $self->{imapd}->{mailboxes}->{"$mailbox.0"}) {
- ensure_ranges_exist($self->{imapd}, $z, $z->over->max);
+ ensure_slices_exist($self->{imapd}, $z, $z->over->max);
}
$uid_base = $exists = 0;
$self->requeue if $new_size == 1;
}
-# my ($msn, $UID) = @_;
-sub fetch_msn_uid ($$) { '* '.(${$_[0]}++).' FETCH (UID '.$_[1] }
-
sub fetch_run_ops {
- my ($self, $msn, $smsg, $bref, $ops, $partial) = @_;
- $self->msg_more(fetch_msn_uid($msn, $smsg->{num}));
+ my ($self, $smsg, $bref, $ops, $partial) = @_;
+ my $uid = $smsg->{num};
+ $self->msg_more('* '.uid2msn($self, $uid)." FETCH (UID $uid");
my ($eml, $k);
for (my $i = 0; $i < @$ops;) {
$k = $ops->[$i++];
} else {
$smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
}
- fetch_run_ops($self, $range_info->[3], $smsg, $bref, $ops, $partial);
+ fetch_run_ops($self, $smsg, $bref, $ops, $partial);
requeue_once($self);
}
sub uid_clamp ($$$) {
my ($self, $beg, $end) = @_;
my $uid_min = $self->{uid_base} + 1;
- my $uid_end = $uid_min + UID_BLOCK - 1;
+ my $uid_end = $uid_min + UID_SLICE - 1;
$$beg = $uid_min if $$beg < $uid_min;
$$end = $uid_end if $$end > $uid_end;
}
$$range_csv = undef;
}
my $uid_base = $self->{uid_base};
- my $uid_end = $uid_base + UID_BLOCK;
+ my $uid_end = $uid_base + UID_SLICE;
if ($range =~ /\A([0-9]+):([0-9]+)\z/) {
($beg, $end) = ($1 + 0, $2 + 0);
uid_clamp($self, \$beg, \$end);
} else {
return 'BAD fetch range';
}
- my $msn = $beg - $uid_base;
- [ $beg, $end, $$range_csv, \$msn ];
+ [ $beg, $end, $$range_csv ];
}
sub refill_range ($$$) {
return;
}
}
+ uo2m_extend($self, $msgs->[-1]->{num});
git_async_cat($self->{ibx}->git, $msgs->[0]->{blob},
\&fetch_blob_cb, \@_);
}
return;
}
}
- fetch_run_ops($self, $range_info->[3], $_, undef, $ops) for @$msgs;
+ uo2m_extend($self, $msgs->[-1]->{num});
+ fetch_run_ops($self, $_, undef, $ops) for @$msgs;
@$msgs = ();
1; # more
}
$self->write("$tag $err\r\n");
return;
}
+ my $adj = $self->{uid_base} + 1;
+ my $uo2m = uo2m_extend($self, $uids->[-1]);
+ $uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);
my ($i, $k);
- my $msn = $range_info->[3];
for (@$uids) {
- $self->msg_more(fetch_msn_uid($msn, $_));
+ $self->msg_more("* $uo2m->[$_ - $adj] FETCH (UID $_");
for ($i = 0; $i < @$ops;) {
$k = $ops->[$i++];
$ops->[$i++]->($self, $k);
my ($cb, $ops, $partial) = fetch_compile(\@want);
return "$tag $cb\r\n" unless $ops;
+ # cb is one of fetch_blob, fetch_smsg, fetch_uid
$range_csv = 'bad' if $range_csv !~ $valid_range;
my $range_info = range_step($self, \$range_csv);
return "$tag $range_info\r\n" if !ref($range_info);
+ uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM
long_response($self, $cb, $tag, [], $range_info, $ops, $partial);
}
-sub msn_to_uid_range ($$) {
- my $uid_base = $_[0]->{uid_base};
- $_[1] =~ s/([0-9]+)/$uid_base + $1/sge;
-}
-
sub cmd_fetch ($$$$;@) {
my ($self, $tag, $range_csv, @want) = @_;
my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
# cb is one of fetch_blob, fetch_smsg, fetch_uid
$range_csv = 'bad' if $range_csv !~ $valid_range;
- msn_to_uid_range($self, $range_csv);
+ msn_to_uid_range(msn2uid($self), $range_csv);
my $range_info = range_step($self, \$range_csv);
return "$tag $range_info\r\n" if !ref($range_info);
+ uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM
long_response($self, $cb, $tag, [], $range_info, $ops, $partial);
}
timegm(0, 0, 0, $dd, $mm, $yyyy);
}
-sub msn_convert ($$) {
- my ($self, $uids) = @_;
- my $adj = $self->{uid_base};
- $_ -= $adj for @$uids;
-}
-
sub search_uid_range { # long_response
- my ($self, $tag, $sql, $range_info, $want_msn) = @_;
+ my ($self, $tag, $sql, $range_info) = @_;
my $uids = [];
if (defined(my $err = refill_uids($self, $uids, $range_info, $sql))) {
$err ||= 'OK Search done';
$self->write("\r\n$tag $err\r\n");
return;
}
- msn_convert($self, $uids) if $want_msn;
$self->msg_more(join(' ', '', @$uids));
1; # more
}
my $sql = ''; # date conditions, {sql} deleted if Xapian is needed
my $xap = '';
my $q = { sql => \$sql, xap => \$xap };
+ my $msn2uid;
while (@$rest) {
my $k = uc(shift @$rest);
# default criteria
next if $k =~ /\A(?:ALL|RECENT|UNSEEN|NEW)\z/;
next if $k eq 'AND'; # the default, until we support OR
if ($k =~ $valid_range) { # convert sequence numbers to UIDs
- msn_to_uid_range($self, $k);
+ msn_to_uid_range($msn2uid //= msn2uid($self), $k);
push @{$q->{uid}}, $k;
} elsif ($k eq 'UID') {
$k = shift(@$rest) // '';
}
sub search_xap_range { # long_response
- my ($self, $tag, $q, $range_info, $want_msn) = @_;
+ my ($self, $tag, $q, $range_info) = @_;
my $uids = [];
if (defined(my $err = refill_xap($self, $uids, $range_info, $q))) {
$err ||= 'OK Search done';
$self->write("\r\n$tag $err\r\n");
return;
}
- msn_convert($self, $uids) if $want_msn;
$self->msg_more(join(' ', '', @$uids));
1; # more
}
-sub search_common {
- my ($self, $tag, $rest, $want_msn) = @_;
+sub cmd_uid_search ($$$;) {
+ my ($self, $tag) = splice(@_, 0, 2);
my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
- my $q = parse_query($self, $rest);
+ my $q = parse_query($self, \@_);
return "$tag $q\r\n" if !ref($q);
my ($sql, $range_info) = delete @$q{qw(sql range_info)};
if (!scalar(keys %$q)) { # overview.sqlite3
$self->msg_more('* SEARCH');
long_response($self, \&search_uid_range,
- $tag, $sql, $range_info, $want_msn);
+ $tag, $sql, $range_info);
} elsif ($q = $q->{xap}) {
$self->msg_more('* SEARCH');
long_response($self, \&search_xap_range,
- $tag, $q, $range_info, $want_msn);
+ $tag, $q, $range_info);
} else {
"$tag BAD Error\r\n";
}
}
-sub cmd_uid_search ($$$;) {
- my ($self, $tag) = splice(@_, 0, 2);
- search_common($self, $tag, \@_);
-}
-
-sub cmd_search ($$$;) {
- my ($self, $tag) = splice(@_, 0, 2);
- search_common($self, $tag, \@_, 1);
-}
+# note: MSN SEARCH is NOT supported. Do any widely-used MUAs
+# rely on MSNs from SEARCH results? Let us know at meta@public-inbox.org
sub args_ok ($$) { # duplicated from PublicInbox::NNTP
my ($cb, $argc) = @_;
# returns 1 if we can continue, 0 if not due to buffered writes or disconnect
sub process_line ($$) {
my ($self, $l) = @_;
+
+ # TODO: IMAP allows literals for big requests to upload messages
+ # (which we don't support) but maybe some big search queries use it.
my ($tag, $req, @args) = parse_line('[ \t]+', 0, $l);
pop(@args) if (@args && !defined($args[-1]));
if (@args && uc($req) eq 'UID') {
$self->write(\"\* BAD request too long\r\n");
return $self->close;
}
- $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return;
+ $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or
+ return uo2m_hibernate($self);
$line = index($$rbuf, "\n");
}
$line = substr($$rbuf, 0, $line + 1, '');