1 # Copyright (C) 2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # common reader code for IMAP and NNTP (and maybe JMAP)
5 package PublicInbox::NetReader;
8 use parent qw(Exporter PublicInbox::IPC);
11 our %IMAPflags2kw = map {; "\\\u$_" => $_ } qw(seen answered flagged draft);
13 our @EXPORT = qw(uri_section imap_uri nntp_uri);
15 # returns the git config section name, e.g [imap "imaps://user@example.com"]
16 # without the mailbox, so we can share connections between different inboxes
19 $uri->scheme . '://' . $uri->authority;
22 sub auth_anon_cb { '' }; # for Mail::IMAPClient::Authcallback
24 # mic_for may prompt the user and store auth info, prepares mic_get
25 sub mic_for { # mic = Mail::IMAPClient
26 my ($self, $url, $mic_args, $lei) = @_;
27 require PublicInbox::URIimap;
28 my $uri = PublicInbox::URIimap->new($url);
29 require PublicInbox::GitCredential;
32 protocol => $uri->scheme,
34 username => $uri->user,
35 password => $uri->password,
36 }, 'PublicInbox::GitCredential';
37 my $common = $mic_args->{uri_section($uri)} // {};
38 # IMAPClient and Net::Netrc both mishandles `0', so we pass `127.0.0.1'
39 my $host = $cred->{host};
40 $host = '127.0.0.1' if $host eq '0';
44 Ssl => $uri->scheme eq 'imaps',
45 Keepalive => 1, # SO_KEEPALIVE
46 %$common, # may set Starttls, Compress, Debug ....
48 require PublicInbox::IMAPClient;
49 my $mic = PublicInbox::IMAPClient->new(%$mic_arg) or
50 die "E: <$url> new: $@\n";
52 # default to using STARTTLS if it's available, but allow
53 # it to be disabled since I usually connect to localhost
54 if (!$mic_arg->{Ssl} && !defined($mic_arg->{Starttls}) &&
55 $mic->has_capability('STARTTLS') &&
56 $mic->can('starttls')) {
57 $mic->starttls or die "E: <$url> STARTTLS: $@\n";
60 # do we even need credentials?
61 if (!defined($cred->{username}) &&
62 $mic->has_capability('AUTH=ANONYMOUS')) {
66 $cred->check_netrc unless defined $cred->{password};
67 $cred->fill($lei); # may prompt user here
68 $mic->User($mic_arg->{User} = $cred->{username});
69 $mic->Password($mic_arg->{Password} = $cred->{password});
70 } else { # AUTH=ANONYMOUS
71 $mic->Authmechanism($mic_arg->{Authmechanism} = 'ANONYMOUS');
72 $mic_arg->{Authcallback} = 'auth_anon_cb';
73 $mic->Authcallback(\&auth_anon_cb);
76 if ($mic->login && $mic->IsAuthenticated) {
77 # success! keep IMAPClient->new arg in case we get disconnected
78 $self->{mic_arg}->{uri_section($uri)} = $mic_arg;
80 $err = "E: <$url> LOGIN: $@\n";
81 if ($cred && defined($cred->{password})) {
82 $err =~ s/\Q$cred->{password}\E/*******/g;
86 $cred->run($mic ? 'approve' : 'reject') if $cred;
88 $lei ? $lei->fail($err) : warn($err);
93 # Net::NNTP doesn't support CAPABILITIES, yet
94 sub try_starttls ($) {
96 return if $host =~ /\.onion\z/s;
97 return if $host =~ /\A127\.[0-9]+\.[0-9]+\.[0-9]+\z/s;
98 return if $host eq '::1';
103 my ($nn_arg, $nntp_opt, $uri) = @_;
104 my $nn = Net::NNTP->new(%$nn_arg) or die "E: <$uri> new: $!\n";
106 # default to using STARTTLS if it's available, but allow
107 # it to be disabled for localhost/VPN users
108 if (!$nn_arg->{SSL} && $nn->can('starttls')) {
109 if (!defined($nntp_opt->{starttls}) &&
110 try_starttls($nn_arg->{Host})) {
111 # soft fail by default
112 $nn->starttls or warn <<"";
113 W: <$uri> STARTTLS tried and failed (not requested)
115 } elsif ($nntp_opt->{starttls}) {
116 # hard fail if explicitly configured
117 $nn->starttls or die <<"";
118 E: <$uri> STARTTLS requested and failed
121 } elsif ($nntp_opt->{starttls}) {
122 $nn->can('starttls') or
123 die "E: <$uri> Net::NNTP too old for STARTTLS\n";
124 $nn->starttls or die <<"";
125 E: <$uri> STARTTLS requested and failed
131 sub nn_for ($$$;$) { # nn = Net::NNTP
132 my ($self, $uri, $nn_args, $lei) = @_;
133 my $sec = uri_section($uri);
134 my $nntp_opt = $self->{nntp_opt}->{$sec} //= {};
135 my $host = $uri->host;
136 # Net::NNTP and Net::Netrc both mishandle `0', so we pass `127.0.0.1'
137 $host = '127.0.0.1' if $host eq '0';
140 if (defined(my $ui = $uri->userinfo)) {
141 require PublicInbox::GitCredential;
144 protocol => $uri->scheme,
146 }, 'PublicInbox::GitCredential';
147 ($u, $p) = split(/:/, $ui, 2);
148 ($cred->{username}, $cred->{password}) = ($u, $p);
149 $cred->check_netrc unless defined $p;
151 my $common = $nn_args->{$sec} // {};
155 SSL => $uri->secure, # snews == nntps
156 %$common, # may Debug ....
158 my $nn = nn_new($nn_arg, $nntp_opt, $uri);
160 $cred->fill($lei); # may prompt user here
161 if ($nn->authinfo($u, $p)) {
162 push @{$nntp_opt->{-postconn}}, [ 'authinfo', $u, $p ];
164 warn "E: <$uri> AUTHINFO $u XXXX failed\n";
169 if ($nntp_opt->{compress}) {
170 # https://rt.cpan.org/Ticket/Display.html?id=129967
171 if ($nn->can('compress')) {
173 push @{$nntp_opt->{-postconn}}, [ 'compress' ];
175 warn "W: <$uri> COMPRESS failed\n";
178 delete $nntp_opt->{compress};
180 W: <$uri> COMPRESS not supported by Net::NNTP
181 W: see https://rt.cpan.org/Ticket/Display.html?id=129967 for updates
186 $self->{nn_arg}->{$sec} = $nn_arg;
187 $cred->run($nn ? 'approve' : 'reject') if $cred;
193 require PublicInbox::URIimap;
194 my $uri = PublicInbox::URIimap->new($url);
195 $uri ? $uri->canonical : undef;
198 my %IS_NNTP = (news => 1, snews => 1, nntp => 1, nntps => 1);
201 require PublicInbox::URInntps;
202 my $uri = PublicInbox::URInntps->new($url);
203 $uri && $IS_NNTP{$uri->scheme} && $uri->group ? $uri->canonical : undef;
206 sub cfg_intvl ($$$) {
207 my ($cfg, $key, $url) = @_;
208 my $v = $cfg->urlmatch($key, $url) // return;
209 $v =~ /\A[0-9]+(?:\.[0-9]+)?\z/s and return $v + 0;
210 if (ref($v) eq 'ARRAY') {
211 $v = join(', ', @$v);
212 warn "W: $key has multiple values: $v\nW: $key ignored\n";
214 warn "W: $key=$v is not a numeric value in seconds\n";
219 my ($cfg, $key, $url) = @_;
220 my $orig = $cfg->urlmatch($key, $url) // return;
221 my $bool = $cfg->git_bool($orig);
222 warn "W: $key=$orig for $url is not boolean\n" unless defined($bool);
226 # flesh out common IMAP-specific data structures
227 sub imap_common_init ($;$) {
228 my ($self, $lei) = @_;
229 return unless $self->{imap_order};
230 $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet};
231 eval { require PublicInbox::IMAPClient } or
232 die "Mail::IMAPClient is required for IMAP:\n$@\n";
233 eval { require PublicInbox::IMAPTracker } or
234 die "DBD::SQLite is required for IMAP\n:$@\n";
235 require PublicInbox::URIimap;
236 my $cfg = $self->{pi_cfg} // $lei->_lei_cfg;
237 my $mic_args = {}; # scheme://authority => Mail:IMAPClient arg
238 for my $uri (@{$self->{imap_order}}) {
239 my $sec = uri_section($uri);
240 for my $k (qw(Starttls Debug Compress)) {
241 my $bool = cfg_bool($cfg, "imap.$k", $$uri) // next;
242 $mic_args->{$sec}->{$k} = $bool;
244 my $to = cfg_intvl($cfg, 'imap.timeout', $$uri);
245 $mic_args->{$sec}->{Timeout} = $to if $to;
246 for my $k (qw(pollInterval idleInterval)) {
247 $to = cfg_intvl($cfg, "imap.$k", $$uri) // next;
248 $self->{imap_opt}->{$sec}->{$k} = $to;
250 my $k = 'imap.fetchBatchSize';
251 my $bs = $cfg->urlmatch($k, $$uri) // next;
252 if ($bs =~ /\A([0-9]+)\z/) {
253 $self->{imap_opt}->{$sec}->{batch_size} = $bs;
255 warn "$k=$bs is not an integer\n";
258 # make sure we can connect and cache the credentials in memory
259 $self->{mic_arg} = {}; # schema://authority => IMAPClient->new args
260 my $mics = {}; # schema://authority => IMAPClient obj
261 for my $uri (@{$self->{imap_order}}) {
262 my $sec = uri_section($uri);
263 $mics->{$sec} //= mic_for($self, "$sec/", $mic_args, $lei);
264 next unless $self->isa('PublicInbox::NetWriter');
265 my $dst = $uri->mailbox // next;
266 my $mic = $mics->{$sec};
267 next if $mic->exists($dst); # already exists
268 $mic->create($dst) or die "CREATE $dst failed <$uri>: $@";
273 # flesh out common NNTP-specific data structures
274 sub nntp_common_init ($;$) {
275 my ($self, $lei) = @_;
276 return unless $self->{nntp_order};
277 $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet};
278 eval { require Net::NNTP } or
279 die "Net::NNTP is required for NNTP:\n$@\n";
280 eval { require PublicInbox::IMAPTracker } or
281 die "DBD::SQLite is required for NNTP\n:$@\n";
282 my $cfg = $self->{pi_cfg} // $lei->_lei_cfg;
283 my $nn_args = {}; # scheme://authority => Net::NNTP->new arg
284 for my $uri (@{$self->{nntp_order}}) {
285 my $sec = uri_section($uri);
287 # Debug and Timeout are passed to Net::NNTP->new
288 my $v = cfg_bool($cfg, 'nntp.Debug', $$uri);
289 $nn_args->{$sec}->{Debug} = $v if defined $v;
290 my $to = cfg_intvl($cfg, 'nntp.Timeout', $$uri);
291 $nn_args->{$sec}->{Timeout} = $to if $to;
293 # Net::NNTP post-connect commands
294 for my $k (qw(starttls compress)) {
295 $v = cfg_bool($cfg, "nntp.$k", $$uri) // next;
296 $self->{nntp_opt}->{$sec}->{$k} = $v;
300 for my $k (qw(pollInterval)) {
301 $to = cfg_intvl($cfg, "nntp.$k", $$uri) // next;
302 $self->{nntp_opt}->{$sec}->{$k} = $to;
305 # make sure we can connect and cache the credentials in memory
306 $self->{nn_arg} = {}; # schema://authority => Net::NNTP->new args
307 my %nn; # schema://authority => Net::NNTP object
308 for my $uri (@{$self->{nntp_order}}) {
309 my $sec = uri_section($uri);
310 $nn{$sec} //= nn_for($self, $uri, $nn_args, $lei);
312 \%nn; # for optional {nn_cached}
316 my ($self, $arg) = @_;
318 if ($uri = imap_uri($arg)) {
319 push @{$self->{imap_order}}, $uri;
320 } elsif ($uri = nntp_uri($arg)) {
321 push @{$self->{nntp_order}}, $uri;
323 push @{$self->{unsupported_url}}, $arg;
329 if (my $u = $self->{unsupported_url}) {
330 return "Unsupported URL(s): @$u";
332 if ($self->{imap_order}) {
333 eval { require PublicInbox::IMAPClient } or
334 die "Mail::IMAPClient is required for IMAP:\n$@\n";
336 if ($self->{nntp_order}) {
337 eval { require Net::NNTP } or
338 die "Net::NNTP is required for NNTP:\n$@\n";
343 sub _imap_do_msg ($$$$$) {
344 my ($self, $uri, $uid, $raw, $flags) = @_;
345 # our target audience expects LF-only, save storage
346 $$raw =~ s/\r\n/\n/sg;
348 for my $f (split(/ /, $flags)) {
349 if (my $k = $IMAPflags2kw{$f}) {
351 } elsif ($f eq "\\Recent") { # not in JMAP
352 } elsif ($f eq "\\Deleted") { # not in JMAP
354 } elsif ($self->{verbose}) {
355 warn "# unknown IMAP flag $f <$uri;uid=$uid>\n";
358 @$kw = sort @$kw; # for all UI/UX purposes
359 my ($eml_cb, @args) = @{$self->{eml_each}};
360 $eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args);
363 sub run_commit_cb ($) {
365 my $cmt_cb_args = $self->{on_commit} or return;
366 my ($cb, @args) = @$cmt_cb_args;
370 sub _imap_fetch_all ($$$) {
371 my ($self, $mic, $uri) = @_;
372 my $sec = uri_section($uri);
373 my $mbx = $uri->mailbox;
374 $mic->Clear(1); # trim results history
375 $mic->examine($mbx) or return "E: EXAMINE $mbx ($sec) failed: $!";
376 my ($r_uidval, $r_uidnext);
377 for ($mic->Results) {
378 /^\* OK \[UIDVALIDITY ([0-9]+)\].*/ and $r_uidval = $1;
379 /^\* OK \[UIDNEXT ([0-9]+)\].*/ and $r_uidnext = $1;
380 last if $r_uidval && $r_uidnext;
382 $r_uidval //= $mic->uidvalidity($mbx) //
383 return "E: $uri cannot get UIDVALIDITY";
384 $r_uidnext //= $mic->uidnext($mbx) //
385 return "E: $uri cannot get UIDNEXT";
386 my $itrk = $self->{incremental} ?
387 PublicInbox::IMAPTracker->new($$uri) : 0;
388 my ($l_uidval, $l_uid) = $itrk ? $itrk->get_last : ();
389 $l_uidval //= $r_uidval; # first time
391 if ($l_uidval != $r_uidval) {
392 return "E: $uri UIDVALIDITY mismatch\n".
393 "E: local=$l_uidval != remote=$r_uidval";
395 my $r_uid = $r_uidnext - 1;
396 if ($l_uid > $r_uid) {
397 return "E: $uri local UID exceeds remote ($l_uid > $r_uid)\n".
398 "E: $uri strangely, UIDVALIDLITY matches ($l_uidval)\n";
400 return if $l_uid >= $r_uid; # nothing to do
402 my ($mod, $shard) = @{$self->{shard_info} // []};
403 unless ($self->{quiet}) {
404 my $m = $mod ? " [(UID % $mod) == $shard]" : '';
405 warn "# $uri fetching UID $l_uid:$r_uid$m\n";
407 $mic->Uid(1); # the default, we hope
408 my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1;
409 my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
415 # I wish "UID FETCH $START:*" could work, but:
416 # 1) servers do not need to return results in any order
417 # 2) Mail::IMAPClient doesn't offer a streaming API
418 unless ($uids = $mic->search("UID $l_uid:*")) {
419 return if $!{EINTR} && $self->{quit};
420 return "E: $uri UID SEARCH $l_uid:* error: $!";
422 return if scalar(@$uids) == 0;
424 # RFC 3501 doesn't seem to indicate order of UID SEARCH
425 # responses, so sort it ourselves. Order matters so
426 # IMAPTracker can store the newest UID.
427 @$uids = sort { $a <=> $b } @$uids;
429 # Did we actually get new messages?
430 return if $uids->[0] < $l_uid;
432 $l_uid = $uids->[-1] + 1; # for next search
434 my $n = $self->{max_batch};
436 @$uids = grep { ($_ % $mod) == $shard } @$uids if $mod;
437 while (scalar @$uids) {
438 my @batch = splice(@$uids, 0, $bs);
439 $batch = join(',', @batch);
440 local $0 = "UID:$batch $mbx $sec";
441 my $r = $mic->fetch_hash($batch, $req, 'FLAGS');
442 unless ($r) { # network error?
443 last if $!{EINTR} && $self->{quit};
444 $err = "E: $uri UID FETCH $batch error: $!";
447 for my $uid (@batch) {
448 # messages get deleted, so holes appear
449 my $per_uid = delete $r->{$uid} // next;
450 my $raw = delete($per_uid->{$key}) // next;
451 _imap_do_msg($self, $uri, $uid, \$raw,
454 last if $self->{quit};
456 last if $self->{quit};
458 run_commit_cb($self);
459 $itrk->update_last($r_uidval, $last_uid) if $itrk;
460 } until ($err || $self->{quit});
464 # uses cached auth info prepared by mic_for
466 my ($self, $uri) = @_;
467 my $sec = uri_section($uri);
468 # see if caller saved result of imap_common_init
469 my $cached = $self->{mics_cached};
471 my $mic = $cached->{$sec};
472 return $mic if $mic && $mic->IsConnected;
473 delete $cached->{$sec};
475 my $mic_arg = $self->{mic_arg}->{$sec} or
476 die "BUG: no Mail::IMAPClient->new arg for $sec";
477 if (defined(my $cb_name = $mic_arg->{Authcallback})) {
478 if (ref($cb_name) ne 'CODE') {
479 $mic_arg->{Authcallback} = $self->can($cb_name);
482 my $mic = PublicInbox::IMAPClient->new(%$mic_arg);
483 $cached //= {}; # invalid placeholder if no cache enabled
484 $mic && $mic->IsConnected ? ($cached->{$sec} = $mic) : undef;
488 my ($self, $url, $eml_cb, @args) = @_;
489 my $uri = ref($url) ? $url : PublicInbox::URIimap->new($url);
490 my $sec = uri_section($uri);
491 local $0 = $uri->mailbox." $sec";
492 my $mic = mic_get($self, $uri);
495 local $self->{eml_each} = [ $eml_cb, @args ];
496 $err = _imap_fetch_all($self, $mic, $uri);
498 $err = "E: <$uri> not connected: $!";
504 # may used cached auth info prepared by nn_for once
506 my ($self, $uri) = @_;
507 my $sec = uri_section($uri);
508 # see if caller saved result of nntp_common_init
509 my $cached = $self->{nn_cached} // {};
511 $nn = delete($cached->{$sec}) and return $nn;
512 my $nn_arg = $self->{nn_arg}->{$sec} or
513 die "BUG: no Net::NNTP->new arg for $sec";
514 my $nntp_opt = $self->{nntp_opt}->{$sec};
515 $nn = nn_new($nn_arg, $nntp_opt, $uri) or return;
516 if (my $postconn = $nntp_opt->{-postconn}) {
517 for my $m_arg (@$postconn) {
518 my ($method, @args) = @$m_arg;
519 $nn->$method(@args) and next;
520 die "E: <$uri> $method failed\n";
527 sub _nntp_fetch_all ($$$) {
528 my ($self, $nn, $uri) = @_;
529 my ($group, $num_a, $num_b) = $uri->group;
530 my $sec = uri_section($uri);
531 my ($nr, $beg, $end) = $nn->group($group);
532 unless (defined($nr)) {
533 chomp(my $msg = $nn->message);
534 return "E: GROUP $group <$sec> $msg";
537 # IMAPTracker is also used for tracking NNTP, UID == article number
538 # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's
539 # expensive. So we assume newsgroups don't change:
540 my $itrk = $self->{incremental} ?
541 PublicInbox::IMAPTracker->new($$uri) : 0;
542 my (undef, $l_art) = $itrk ? $itrk->get_last : ();
544 # allow users to specify articles to refetch
545 # cf. https://tools.ietf.org/id/draft-gilman-news-url-01.txt
546 # nntp://example.com/inbox.foo/$num_a-$num_b
547 $beg = $num_a if defined($num_a) && $num_a < $beg;
548 $end = $num_b if defined($num_b) && $num_b < $end;
549 if (defined $l_art) {
550 return if $l_art >= $end; # nothing to do
554 unless ($self->{quiet}) {
555 warn "# $uri fetching ARTICLE $beg..$end\n";
558 my $n = $self->{max_batch};
560 last if $self->{quit};
563 run_commit_cb($self);
564 $itrk->update_last(0, $last_art) if $itrk;
565 $n = $self->{max_batch};
567 my $raw = $nn->article($art);
568 unless (defined($raw)) {
569 my $msg = $nn->message;
570 if ($nn->code == 421) { # pseudo response from Net::Cmd
573 } else { # probably just a deleted message (spam)
578 $raw = join('', @$raw);
579 $raw =~ s/\r\n/\n/sg;
580 my ($eml_cb, @args) = @{$self->{eml_each}};
581 $eml_cb->($uri, $art, [], PublicInbox::Eml->new(\$raw), @args);
584 run_commit_cb($self);
585 $itrk->update_last(0, $last_art) if $itrk;
590 my ($self, $url, $eml_cb, @args) = @_;
591 my $uri = ref($url) ? $url : PublicInbox::URInntps->new($url);
592 my $sec = uri_section($uri);
593 local $0 = $uri->group ." $sec";
594 my $nn = nn_get($self, $uri);
595 return if $self->{quit};
598 local $self->{eml_each} = [ $eml_cb, @args ];
599 $err = _nntp_fetch_all($self, $nn, $uri);
601 $err = "E: <$uri> not connected: $!";
607 sub new { bless {}, shift };