1 # Copyright (C) all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # for maintaining synchronization between lei/store <=> Maildir|MH|IMAP|JMAP
5 package PublicInbox::LeiMailSync;
8 use parent qw(PublicInbox::Lock);
9 use DBI qw(:sql_types); # SQL_BLOB
10 use PublicInbox::ContentHash qw(git_sha);
15 my $f = $self->{filename};
18 require PublicInbox::Syscall;
19 open my $fh, '+>>', $f or Carp::croak "open($f): $!";
20 PublicInbox::Syscall::nodatacow_fh($fh);
22 my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
26 sqlite_use_immediate_transaction => 1,
28 # no sqlite_unicode, here, all strings are binary
29 create_tables($self, $dbh);
30 $dbh->do('PRAGMA journal_mode = WAL') if $creat;
31 $dbh->do('PRAGMA case_sensitive_like = ON');
40 lock_path => "$f.flock",
44 sub lms_write_prepare { ($_[0]->{dbh} //= dbh_new($_[0])); $_[0] }
49 my $dbh = delete $self->{dbh};
50 eval { $dbh->do('PRAGMA optimize') } if $dbh;
54 my ($self, $dbh) = @_;
55 my $lk = $self->lock_for_scope;
58 CREATE TABLE IF NOT EXISTS folders (
59 fid INTEGER PRIMARY KEY,
60 loc VARBINARY NOT NULL, /* URL;UIDVALIDITY=$N or $TYPE:/pathname */
65 CREATE TABLE IF NOT EXISTS blob2num (
66 oidbin VARBINARY NOT NULL,
67 fid INTEGER NOT NULL, /* folder ID */
68 uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */
69 /* not UNIQUE(fid, uid), since we may have broken servers */
70 UNIQUE (oidbin, fid, uid)
73 # speeds up LeiImport->ck_update_kw (for "lei import") by 5-6x:
75 CREATE INDEX IF NOT EXISTS idx_fid_uid ON blob2num(fid,uid)
78 CREATE TABLE IF NOT EXISTS blob2name (
79 oidbin VARBINARY NOT NULL,
80 fid INTEGER NOT NULL, /* folder ID */
81 name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */
82 /* not UNIQUE(fid, name), since we may have broken software */
83 UNIQUE (oidbin, fid, name)
86 # speeds up LeiImport->pmdir_cb (for "lei import") by ~6x:
88 CREATE INDEX IF NOT EXISTS idx_fid_name ON blob2name(fid,name)
92 # used to fixup pre-1.7.0 folders
93 sub update_fid ($$$) {
94 my ($dbh, $fid, $loc) = @_;
95 my $sth = $dbh->prepare(<<'');
96 UPDATE folders SET loc = ? WHERE fid = ?
98 $sth->bind_param(1, $loc, SQL_BLOB);
99 $sth->bind_param(2, $fid);
104 my ($sth, $folder, $dbh) = @_;
105 $sth->bind_param(1, $folder, SQL_BLOB);
107 my ($fid) = $sth->fetchrow_array;
108 if (defined $fid) { # for downgrade+upgrade (1.8 -> 1.7 -> 1.8)
109 my $del = $dbh->prepare_cached(<<'');
110 DELETE FROM folders WHERE loc = ? AND fid != ?
112 $del->execute($folder, $fid);
114 $sth->bind_param(1, $folder, SQL_VARCHAR);
115 $sth->execute; # fixup old stuff
116 ($fid) = $sth->fetchrow_array;
117 update_fid($dbh, $fid, $folder) if defined($fid);
123 my ($self, $folder, $creat) = @_;
124 my $dbh = $self->{dbh} //= dbh_new($self);
125 my $sth = $dbh->prepare_cached(<<'', undef, 1);
126 SELECT fid FROM folders WHERE loc = ? LIMIT 1
128 my $fid = get_fid($sth, $folder, $dbh);
129 return $fid if defined($fid);
131 # caller had trailing slash (LeiToMail)
132 if ($folder =~ s!\A((?:maildir|mh):.*?)/+\z!$1!i) {
133 $fid = get_fid($sth, $folder, $dbh);
135 update_fid($dbh, $fid, $folder);
138 # sometimes we stored trailing slash..
139 } elsif ($folder =~ m!\A(?:maildir|mh):!i) {
140 $fid = get_fid($sth, $folder, $dbh);
142 update_fid($dbh, $fid, $folder);
145 } elsif ($creat && $folder =~ m!\Aimaps?://!i) {
146 require PublicInbox::URIimap;
147 my $uri = PublicInbox::URIimap->new($folder);
149 Carp::croak("BUG: $folder has no UIDVALIDITY");
150 defined($uri->uid) and Carp::confess("BUG: $folder has UID");
153 return unless $creat;
154 ($fid) = $dbh->selectrow_array('SELECT MAX(fid) FROM folders');
157 # in case we're reusing, clobber existing stale refs:
158 $dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid);
159 $dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid);
161 $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
162 $sth->bind_param(1, $fid);
163 $sth->bind_param(2, $folder, SQL_BLOB);
170 my ($self, @folders) = @_;
171 my $lk = $self->lock_for_scope;
172 for my $f (@folders) { $self->{fmap}->{$f} //= fid_for($self, $f, 1) }
176 my ($self, $oidbin, $folder, $id) = @_;
177 my $lk = $self->lock_for_scope;
178 my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
179 my $dbh = $self->{dbh};
180 my ($sth, @param3, $del_old);
181 if (ref($id)) { # scalar name
182 @param3 = ($$id, SQL_BLOB);
183 $sth = $dbh->prepare_cached(<<'');
184 INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
186 $del_old = $dbh->prepare_cached(<<'');
187 DELETE FROM blob2name WHERE oidbin = ? AND fid = ? AND name = ?
189 } else { # numeric ID (IMAP UID, MH number)
191 $sth = $dbh->prepare_cached(<<'');
192 INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?)
194 $del_old = $dbh->prepare_cached(<<'');
195 DELETE FROM blob2num WHERE oidbin = ? AND fid = ? AND uid = ?
198 $sth->bind_param(1, $oidbin, SQL_BLOB);
199 $sth->bind_param(2, $fid);
200 $sth->bind_param(3, @param3);
201 my $ret = $sth->execute;
202 $del_old->execute($oidbin, $fid, $param3[0]);
207 my ($self, $folder, $id) = @_;
208 my $lk = $self->lock_for_scope;
209 my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
211 if (ref($id)) { # scalar name
212 @param3 = ($$id, SQL_BLOB);
213 $sth = $self->{dbh}->prepare_cached(<<'');
214 DELETE FROM blob2name WHERE fid = ? AND name = ?
218 $sth = $self->{dbh}->prepare_cached(<<'');
219 DELETE FROM blob2num WHERE fid = ? AND uid = ?
222 $sth->bind_param(1, $fid);
223 $sth->bind_param(2, @param3);
224 my $ret = $sth->execute;
226 # older versions may not have used SQL_BLOB:
227 if (defined($ret) && $ret == 0 && scalar(@param3) == 2) {
228 $sth->bind_param(1, $fid);
229 $sth->bind_param(2, $param3[0]);
230 $ret = $sth->execute;
237 my ($self, $folder, $oidbin, $id, $newbn) = @_;
238 my $lk = $self->lock_for_scope;
239 my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
240 $self->{dbh}->begin_work;
241 my $sth = $self->{dbh}->prepare_cached(<<'');
242 UPDATE blob2name SET name = ? WHERE fid = ? AND oidbin = ? AND name = ?
244 # eval since unique constraint may fail due to race
245 $sth->bind_param(1, $newbn, SQL_BLOB);
246 $sth->bind_param(2, $fid);
247 $sth->bind_param(3, $oidbin, SQL_BLOB);
248 $sth->bind_param(4, $$id, SQL_BLOB);
249 my $nr = eval { $sth->execute };
250 if (!defined($nr) || $nr == 0) { # $nr may be `0E0'
251 # delete from old, pre-SQL_BLOB rows:
252 my $del_old = $self->{dbh}->prepare_cached(<<'');
253 DELETE FROM blob2name WHERE fid = ? AND oidbin = ? AND name = ?
255 $del_old->execute($fid, $oidbin, $$id); # missing-OK
256 $del_old->execute($fid, $oidbin, $newbn); # ditto
258 # may race with a clear_src, ensure new value exists
259 $sth = $self->{dbh}->prepare_cached(<<'');
260 INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
262 $sth->bind_param(1, $oidbin, SQL_BLOB);
263 $sth->bind_param(2, $fid);
264 $sth->bind_param(3, $newbn, SQL_BLOB);
267 $self->{dbh}->commit;
270 # read-only, iterates every oidbin + UID or name for a given folder
272 my ($self, $folder, $cb, @args) = @_;
273 my $dbh = $self->{dbh} //= dbh_new($self);
276 if (ref($folder) eq 'HASH') {
277 $fid = $folder->{fid} // die "BUG: no `fid'";
278 @rng = grep(defined, @$folder{qw(min max)});
279 $and_ge_le = 'AND uid >= ? AND uid <= ?' if @rng;
281 $fid = $self->{fmap}->{$folder} //=
282 fid_for($self, $folder) // return;
285 # minimize implicit txn time to avoid blocking writers by
286 # batching SELECTs. This looks wonky but is necessary since
287 # $cb-> may access the DB on its own.
288 my $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng);
289 SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? $and_ge_le
290 ORDER BY _rowid_ ASC LIMIT 1000
292 my $min = @$ary ? $ary->[-1]->[0] : undef;
293 while (defined $min) {
294 for my $row (@$ary) { $cb->($row->[1], $row->[2], @args) }
296 $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng, $min);
297 SELECT _rowid_,oidbin,uid FROM blob2num
298 WHERE fid = ? $and_ge_le AND _rowid_ > ?
299 ORDER BY _rowid_ ASC LIMIT 1000
301 $min = @$ary ? $ary->[-1]->[0] : undef;
304 $ary = $dbh->selectall_arrayref(<<'', undef, $fid);
305 SELECT _rowid_,oidbin,name FROM blob2name WHERE fid = ?
306 ORDER BY _rowid_ ASC LIMIT 1000
308 $min = @$ary ? $ary->[-1]->[0] : undef;
309 while (defined $min) {
310 for my $row (@$ary) { $cb->($row->[1], \($row->[2]), @args) }
312 $ary = $dbh->selectall_arrayref(<<'', undef, $fid, $min);
313 SELECT _rowid_,oidbin,name FROM blob2name WHERE fid = ? AND _rowid_ > ?
314 ORDER BY _rowid_ ASC LIMIT 1000
316 $min = @$ary ? $ary->[-1]->[0] : undef;
321 my ($self, $folder) = @_;
322 my $dbh = $self->{dbh} //= dbh_new($self);
325 $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder) // return;
326 my ($row) = $dbh->selectrow_array(<<"", undef, $fid);
327 SELECT COUNT(name) FROM blob2name WHERE fid = ?
329 $ret->{'name.count'} = $row if $row;
330 my $ntype = ($folder =~ m!\A(?:nntps?|s?news)://!i) ? 'article' :
331 (($folder =~ m!\Aimaps?://!i) ? 'uid' : "TODO<$folder>");
332 for my $op (qw(count min max)) {
333 ($row) = $dbh->selectrow_array(<<"", undef, $fid);
334 SELECT $op(uid) FROM blob2num WHERE fid = ?
337 $ret->{"$ntype.$op"} = $row;
342 # returns a { location => [ list-of-ids-or-names ] } mapping
344 my ($self, $oidbin) = @_;
345 my ($fid, $sth, $id, %fid2id, %seen);
346 my $dbh = $self->{dbh} //= dbh_new($self);
347 $sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?');
348 $sth->bind_param(1, $oidbin, SQL_BLOB);
350 while (my ($fid, $uid) = $sth->fetchrow_array) {
351 push @{$fid2id{$fid}}, $uid;
352 $seen{"$uid.$fid"} = 1;
355 # deal with 1.7.0 DBs :<
356 $sth->bind_param(1, $oidbin, SQL_VARCHAR);
358 while (my ($fid, $uid) = $sth->fetchrow_array) {
359 next if $seen{"$uid.$fid"};
360 push @{$fid2id{$fid}}, $uid;
364 $sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?');
365 $sth->bind_param(1, $oidbin, SQL_BLOB);
367 while (my ($fid, $name) = $sth->fetchrow_array) {
368 push @{$fid2id{$fid}}, $name;
369 $seen{"$fid.$name"} = 1;
372 # deal with 1.7.0 DBs :<
373 $sth->bind_param(1, $oidbin, SQL_VARCHAR);
375 while (my ($fid, $name) = $sth->fetchrow_array) {
376 next if $seen{"$fid.$name"};
377 push @{$fid2id{$fid}}, $name;
380 $sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1');
382 while (my ($fid, $ids) = each %fid2id) {
384 my ($loc) = $sth->fetchrow_array;
385 unless (defined $loc) {
386 my $oidhex = unpack('H*', $oidbin);
387 warn "E: fid=$fid for $oidhex unknown:\n", map {
388 'E: '.(ref() ? $$_ : "#$_")."\n";
394 scalar(keys %$ret) ? $ret : undef;
397 # returns a list of folders used for completion
399 my ($self, @pfx) = @_;
400 my $sql = 'SELECT loc FROM folders';
402 if (defined($pfx[0])) {
403 $sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
404 $re = !!$pfx[1] ? '.*' : '';
405 $re .= quotemeta($pfx[0]);
408 my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
409 $sth->bind_param(1, $re) if defined($re);
411 map { $_->[0] } @{$sth->fetchall_arrayref};
415 my ($self, $oidhex, $vrfy) = @_;
416 my $dbh = $self->{dbh} //= dbh_new($self);
417 my $b2n = $dbh->prepare(<<'');
418 SELECT f.loc,b.name FROM blob2name b
419 LEFT JOIN folders f ON b.fid = f.fid
422 $b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
424 while (my ($d, $n) = $b2n->fetchrow_array) {
425 substr($d, 0, length('maildir:')) = '';
426 # n.b. both mbsync and offlineimap use ":2," as a suffix
427 # in "new/", despite (from what I understand of reading
428 # <https://cr.yp.to/proto/maildir.html>), the ":2," only
429 # applies to files in "cur/".
430 my @try = $n =~ /:2,[a-zA-Z]+\z/ ? qw(cur new) : qw(new cur);
433 open my $fh, '<', $f or next;
434 # some (buggy) Maildir writers are non-atomic:
439 my $got = git_sha(1, \$raw)->hexdigest;
440 if ($got ne $oidhex) {
441 warn "$f changed $oidhex => $got\n";
452 my ($self, $url, $all) = @_; # $all = [ $lms->folders ];
453 $all //= [ $self->folders ];
454 require PublicInbox::URIimap;
455 my $want = PublicInbox::URIimap->new($url)->canonical;
456 my ($s, $h, $mb) = ($want->scheme, $want->host, $want->mailbox);
457 my @uri = map { PublicInbox::URIimap->new($_)->canonical }
458 grep(m!\A\Q$s\E://.*?\Q$h\E\b.*?/\Q$mb\E\b!, @$all);
461 next if $x->mailbox ne $want->mailbox;
462 next if $x->host ne $want->host;
463 next if $x->port != $want->port;
464 my $x_uidval = $x->uidvalidity;
465 next if ($want->uidvalidity // $x_uidval) != $x_uidval;
467 # allow nothing in want to possibly match ";AUTH=ANONYMOUS"
468 if (defined($x->auth) && !defined($want->auth) &&
469 !defined($want->user)) {
471 # or maybe user was forgotten on CLI:
472 } elsif (defined($x->user) && !defined($want->user)) {
474 } elsif (($x->user//"\0") eq ($want->user//"\0")) {
478 return @match if wantarray;
479 scalar(@match) <= 1 ? $match[0] :
480 "E: `$url' is ambiguous:\n\t".join("\n\t", @match)."\n";
483 sub match_nntp_url ($$$) {
484 my ($self, $url, $all) = @_; # $all = [ $lms->folders ];
485 $all //= [ $self->folders ];
486 require PublicInbox::URInntps;
487 my $want = PublicInbox::URInntps->new($url)->canonical;
488 my ($s, $h, $p) = ($want->scheme, $want->host, $want->port);
489 my $ng = $want->group; # force scalar (no article ranges)
490 my @uri = map { PublicInbox::URInntps->new($_)->canonical }
491 grep(m!\A\Q$s\E://.*?\Q$h\E\b.*?/\Q$ng\E\b!, @$all);
494 next if $x->group ne $ng || $x->host ne $h || $x->port != $p;
495 # maybe user was forgotten on CLI:
496 if (defined($x->userinfo) && !defined($want->userinfo)) {
498 } elsif (($x->userinfo//"\0") eq ($want->userinfo//"\0")) {
502 return @match if wantarray;
503 scalar(@match) <= 1 ? $match[0] :
504 "E: `$url' is ambiguous:\n\t".join("\n\t", @match)."\n";
507 # returns undef on failure, number on success
509 my ($self, $lei, $all, $folders) = @_;
510 return $lei->fail(<<EOM) if @$folders;
511 --all= not compatible with @$folders on command-line
513 my %x = map { $_ => $_ } split(/,/, $all);
514 my @ok = grep(defined, delete(@x{qw(local remote), ''}));
515 push(@ok, '') if $all eq '';
518 @no = (join(',', @no));
519 return $lei->fail(<<EOM);
520 --all=@no not accepted (must be `local' and/or `remote')
524 my @all = $self->folders;
526 if ($ok eq 'local') {
527 @inc = grep(!m!\A[a-z0-9\+]+://!i, @all);
528 } elsif ($ok eq 'remote') {
529 @inc = grep(m!\A[a-z0-9\+]+://!i, @all);
530 } elsif ($ok ne '') {
531 return $lei->fail("--all=$all not understood");
535 push(@$folders, (grep { !$seen{$_}++ } @inc));
537 scalar(@$folders) || $lei->fail(<<EOM);
538 no --mail-sync folders known to lei
542 # map CLI args to folder table entries, returns undef on failure
544 my ($self, $lei, $folders) = @_;
545 my @all = $self->folders;
546 my %all = map { $_ => 1 } @all;
549 next if $all{$_}; # ok
550 if (m!\A(maildir|mh):(.+)!i) {
552 my $d = "$type:".$lei->abs_path($2);
553 push(@no, $_) unless $all{$d};
555 } elsif (-d "$_/new" && -d "$_/cur") {
556 my $d = 'maildir:'.$lei->abs_path($_);
557 push(@no, $_) unless $all{$d};
559 } elsif (m!\Aimaps?://!i) {
561 my $res = match_imap_url($self, $orig, \@all);
565 # using `$res' instead of `$orig'
568 warn($res, "\n") if defined $res;
571 } elsif (m!\A(?:nntps?|s?news)://!i) {
573 my $res = match_nntp_url($self, $orig, \@all);
577 # using `$res' instead of `$orig'
580 warn($res, "\n") if defined $res;
588 my $no = join("\n\t", @no);
590 No sync information for: $no
591 Run `lei ls-mail-sync' to display valid choices
597 my ($self, @folders) = @_;
598 my $lk = $self->lock_for_scope;
599 for my $folder (@folders) {
600 my $fid = delete($self->{fmap}->{$folder}) //
601 fid_for($self, $folder) // next;
602 for my $t (qw(blob2name blob2num folders)) {
603 $self->{dbh}->do("DELETE FROM $t WHERE fid = ?",
609 # only used for changing canonicalization errors
611 my ($self, $old, $new) = @_;
612 my $lk = $self->lock_for_scope;
613 my $ofid = delete($self->{fmap}->{$old}) //
614 fid_for($self, $old) // return;
616 $self->{dbh}->do(<<EOM, undef, $new, $ofid);
617 UPDATE folders SET loc = ? WHERE fid = ?
620 if ($@ =~ /\bunique\b/i) {
621 my $nfid = $self->{fmap}->{$new} // fid_for($self, $new);
622 for my $t (qw(blob2name blob2num)) {
623 $self->{dbh}->do(<<EOM, undef, $nfid, $ofid);
624 UPDATE OR REPLACE $t SET fid = ? WHERE fid = ?
627 $self->{dbh}->do(<<EOM, undef, $ofid);
628 DELETE FROM folders WHERE fid = ?
633 sub num_oidbin ($$$) {
634 my ($self, $url, $uid) = @_; # $url MUST have UIDVALIDITY if IMAP
635 my $fid = $self->{fmap}->{$url} //= fid_for($self, $url) // return ();
636 my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
637 SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? ORDER BY _rowid_
639 $sth->execute($fid, $uid);
640 my %uniq; # for public-inbox <= 1.7.0
641 grep { !$uniq{$_}++ } map { $_->[0] } @{$sth->fetchall_arrayref};
644 sub name_oidbin ($$$) {
645 my ($self, $mdir, $nm) = @_;
646 my $fid = $self->{fmap}->{$mdir} //= fid_for($self, $mdir) // return;
647 my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
648 SELECT oidbin FROM blob2name WHERE fid = ? AND name = ?
650 $sth->bind_param(1, $fid);
651 $sth->bind_param(2, $nm, SQL_BLOB);
653 my @bin = map { $_->[0] } @{$sth->fetchall_arrayref};
654 $sth->bind_param(1, $fid);
655 $sth->bind_param(2, $nm, SQL_VARCHAR);
657 my @old = map { $_->[0] } @{$sth->fetchall_arrayref};
658 my %uniq; # for public-inbox <= 1.7.0
659 grep { !$uniq{$_}++ } (@bin, @old);
663 my ($self, $lei, $uid_uri) = @_;
664 my $mailbox_uri = $uid_uri->clone;
665 $mailbox_uri->uid(undef);
666 my $folders = [ $$mailbox_uri ];
667 eval { $self->arg2folder($lei, $folders) };
668 $lei->qerr("# no sync information for $mailbox_uri") if $@;
669 map { unpack('H*',$_) } num_oidbin($self, $folders->[0], $uid_uri->uid)