X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FNNTP.pm;h=63d5870b045756463b8d78f7b16c4689481a15be;hb=305d728977def1df9ab57778f9cad9dd834ce73d;hp=6c661a1bc3b8927e5449e1e647012cf66e8dc6cc;hpb=889663221bdd796d67e9c536108b3094305f8e80;p=public-inbox.git diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 6c661a1b..46398cd4 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -1,43 +1,68 @@ -# Copyright (C) 2015 all contributors -# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# Copyright (C) 2015-2020 all contributors +# License: AGPL-3.0+ +# +# Each instance of this represents a NNTP client socket +# fields: +# nntpd: PublicInbox::NNTPD ref +# article: per-session current article number +# ng: PublicInbox::Inbox ref +# long_cb: long_response private data package PublicInbox::NNTP; use strict; -use warnings; -use base qw(Danga::Socket); -use fields qw(nntpd article rbuf ng long_res); -use PublicInbox::Msgmap; -use PublicInbox::GitCatFile; -use PublicInbox::MID qw(mid2path); -use Email::MIME; -use Data::Dumper qw(Dumper); +use parent qw(PublicInbox::DS); +use PublicInbox::MID qw(mid_escape $MID_EXTRACT); +use PublicInbox::Eml; use POSIX qw(strftime); -use Time::HiRes qw(clock_gettime ualarm CLOCK_MONOTONIC); +use PublicInbox::DS qw(now); +use Digest::SHA qw(sha1_hex); +use Time::Local qw(timegm timelocal); +use PublicInbox::GitAsyncCat; use constant { + LINE_MAX => 512, # RFC 977 section 2.3 r501 => '501 command syntax error', + r502 => '502 Command unavailable', r221 => '221 Header follows', r224 => '224 Overview information follows (multi-line)', + r225 => '225 Headers follow (multi-line)', r430 => '430 No article with that message-id', - long_response_limit => 0xffffffff, }; - -sub now () { clock_gettime(CLOCK_MONOTONIC) }; - -my @OVERVIEW = qw(Subject From Date Message-ID References Bytes Lines); -my %OVERVIEW = map { $_ => 1 } @OVERVIEW; - -# disable commands with easy DoS potential: -# LISTGROUP could get pretty bad, too... -my %DISABLED; # = map { $_ => 1 } qw(xover list_overview_fmt newnews xhdr); +use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); +use Errno qw(EAGAIN); +my $ONE_MSGID = qr/\A$MID_EXTRACT\z/; +my @OVERVIEW = qw(Subject From Date Message-ID References); +my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') . + "Xref:full\r\n"; +my $LIST_HEADERS = join("\r\n", @OVERVIEW, + qw(:bytes :lines Xref To Cc)) . "\r\n"; +my $CAPABILITIES = <<""; +101 Capability list:\r +VERSION 2\r +READER\r +NEWNEWS\r +LIST ACTIVE ACTIVE.TIMES NEWSGROUPS OVERVIEW.FMT\r +HDR\r +OVER\r +COMPRESS DEFLATE\r + +sub greet ($) { $_[0]->write($_[0]->{nntpd}->{greet}) }; sub new ($$$) { my ($class, $sock, $nntpd) = @_; - my $self = fields::new($class); - binmode $sock, ':utf8'; # RFC 3977 - $self->SUPER::new($sock); - $self->{nntpd} = $nntpd; - res($self, '201 server ready - post via email'); - $self->{rbuf} = ''; - $self->watch_read(1); + my $self = bless { nntpd => $nntpd }, $class; + my $ev = EPOLLIN; + my $wbuf; + if ($sock->can('accept_SSL') && !$sock->accept_SSL) { + return CORE::close($sock) if $! != EAGAIN; + $ev = PublicInbox::TLS::epollbit(); + $wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ]; + } + $self->SUPER::new($sock, $ev | EPOLLONESHOT); + if ($wbuf) { + $self->{wbuf} = $wbuf; + } else { + greet($self); + } + $self->update_idle_time; $self; } @@ -53,31 +78,40 @@ sub args_ok ($$) { # returns 1 if we can continue, 0 if not due to buffered writes or disconnect sub process_line ($$) { my ($self, $l) = @_; - my ($req, @args) = split(/\s+/, $l); - $req = lc($req); - $req = eval { - no strict 'refs'; - $req = $DISABLED{$req} ? undef : *{'cmd_'.$req}{CODE}; - }; + my ($req, @args) = split(/[ \t]+/, $l); + return 1 unless defined($req); # skip blank line + $req = $self->can('cmd_'.lc($req)); return res($self, '500 command not recognized') unless $req; return res($self, r501) unless args_ok($req, scalar @args); my $res = eval { $req->($self, @args) }; my $err = $@; - if ($err && !$self->{closed}) { - chomp($l = Dumper(\$l)); - err($self, "error from: $l $err"); + if ($err && $self->{sock}) { + local $/ = "\n"; + chomp($l); + err($self, 'error from: %s (%s)', $l, $err); $res = '503 program fault - command not performed'; } return 0 unless defined $res; res($self, $res); } +# The keyword argument is not used (rfc3977 5.2.2) +sub cmd_capabilities ($;$) { + my ($self, undef) = @_; + my $res = $CAPABILITIES; + if (!$self->{sock}->can('accept_SSL') && + $self->{nntpd}->{accept_tls}) { + $res .= "STARTTLS\r\n"; + } + $res .= '.'; +} + sub cmd_mode ($$) { my ($self, $arg) = @_; $arg = uc $arg; return r501 unless $arg eq 'READER'; - '200 reader status acknowledged'; + '201 Posting prohibited'; } sub cmd_slave ($) { '202 slave status noted' } @@ -89,16 +123,21 @@ sub cmd_xgtitle ($;$) { '.' } -sub list_overview_fmt ($$) { +sub list_overview_fmt ($) { my ($self) = @_; - more($self, $_ . ':') foreach @OVERVIEW; + $self->msg_more($OVERVIEW_FMT); +} + +sub list_headers ($;$) { + my ($self) = @_; + $self->msg_more($LIST_HEADERS); } sub list_active ($;$) { my ($self, $wildmat) = @_; wildmat2re($wildmat); foreach my $ng (@{$self->{nntpd}->{grouplist}}) { - $ng->{name} =~ $wildmat or next; + $ng->{newsgroup} =~ $wildmat or next; group_line($self, $ng); } } @@ -107,9 +146,9 @@ sub list_active_times ($;$) { my ($self, $wildmat) = @_; wildmat2re($wildmat); foreach my $ng (@{$self->{nntpd}->{grouplist}}) { - $ng->{name} =~ $wildmat or next; + $ng->{newsgroup} =~ $wildmat or next; my $c = eval { $ng->mm->created_at } || time; - more($self, "$ng->{name} $c $ng->{address}"); + more($self, "$ng->{newsgroup} $c $ng->{-primary_address}"); } } @@ -117,25 +156,20 @@ sub list_newsgroups ($;$) { my ($self, $wildmat) = @_; wildmat2re($wildmat); foreach my $ng (@{$self->{nntpd}->{grouplist}}) { - $ng->{name} =~ $wildmat or next; + $ng->{newsgroup} =~ $wildmat or next; my $d = $ng->description; - more($self, "$ng->{name} $d"); + more($self, "$ng->{newsgroup} $d"); } } -# LIST SUBSCRIPTIONS not supported +# LIST SUBSCRIPTIONS, DISTRIB.PATS are not supported sub cmd_list ($;$$) { my ($self, @args) = @_; if (scalar @args) { my $arg = shift @args; $arg =~ tr/A-Z./a-z_/; $arg = "list_$arg"; - return '503 function not performed' if $DISABLED{$arg}; - - $arg = eval { - no strict 'refs'; - *{$arg}{CODE}; - }; + $arg = $self->can($arg); return r501 unless $arg && args_ok($arg, scalar @args); more($self, '215 information follows'); $arg->($self, @args); @@ -148,57 +182,69 @@ sub cmd_list ($;$$) { '.' } -sub cmd_listgroup ($;$) { - my ($self, $group) = @_; +sub listgroup_range_i { + my ($self, $beg, $end) = @_; + my $r = $self->{ng}->mm->msg_range($beg, $end, 'num'); + scalar(@$r) or return; + more($self, join("\r\n", map { $_->[0] } @$r)); + 1; +} + +sub listgroup_all_i { + my ($self, $num) = @_; + my $ary = $self->{ng}->mm->ids_after($num); + scalar(@$ary) or return; + more($self, join("\r\n", @$ary)); + 1; +} + +sub cmd_listgroup ($;$$) { + my ($self, $group, $range) = @_; if (defined $group) { my $res = cmd_group($self, $group); return $res if ($res !~ /\A211 /); more($self, $res); } - $self->{ng} or return '412 no newsgroup selected'; - $self->long_response(0, long_response_limit, sub { - my ($i) = @_; - my $nr = $self->{ng}->mm->id_batch($$i, sub { - my ($ary) = @_; - more($self, join("\r\n", @$ary)); - }); - - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; - }); + if (defined $range) { + my $r = get_range($self, $range); + return $r unless ref $r; + long_response($self, \&listgroup_range_i, @$r); + } else { # grab every article number + long_response($self, \&listgroup_all_i, \(my $num = 0)); + } } sub parse_time ($$;$) { my ($date, $time, $gmt) = @_; - use Time::Local qw(); my ($hh, $mm, $ss) = unpack('A2A2A2', $time); if (defined $gmt) { - $gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt\n"; + $gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt"; $gmt = 1; } - my @now = $gmt ? gmtime : localtime; my ($YYYY, $MM, $DD); - if (length($date) == 8) { # RFC 3977 allows YYYYMMDD + if (bytes::length($date) == 8) { # RFC 3977 allows YYYYMMDD ($YYYY, $MM, $DD) = unpack('A4A2A2', $date); } else { # legacy clients send YYMMDD - ($YYYY, $MM, $DD) = unpack('A2A2A2', $date); - if ($YYYY > strftime('%y', @now)) { - my $cur_year = $now[5] + 1900; - $YYYY += int($cur_year / 1000) * 1000 - 100; - } + my $YY; + ($YY, $MM, $DD) = unpack('A2A2A2', $date); + my @now = $gmt ? gmtime : localtime; + my $cur_year = $now[5] + 1900; + my $cur_cent = int($cur_year / 100) * 100; + $YYYY = (($YY + $cur_cent) > $cur_year) ? + ($YY + 1900) : ($YY + $cur_cent); } if ($gmt) { - Time::Local::timegm($ss, $mm, $hh, $DD, $MM - 1, $YYYY); + timegm($ss, $mm, $hh, $DD, $MM - 1, $YYYY); } else { - Time::Local::timelocal($ss, $mm, $hh, $DD, $MM - 1, $YYYY); + timelocal($ss, $mm, $hh, $DD, $MM - 1, $YYYY); } } sub group_line ($$) { my ($self, $ng) = @_; my ($min, $max) = $ng->mm->minmax; - more($self, "$ng->{name} $max $min n") if defined $min && defined $max; + more($self, "$ng->{newsgroup} $max $min n") if defined $min && defined $max; } sub cmd_newgroups ($$$;$$) { @@ -220,7 +266,6 @@ sub wildmat2re (;$) { return $_[0] = qr/.*/ if (!defined $_[0] || $_[0] eq '*'); my %keep; my $salt = rand; - use Digest::SHA qw(sha1_hex); my $tmp = $_[0]; $tmp =~ s#(?[0]; + my $msgs = $over->query_ts($ts, $$prev); + if (scalar @$msgs) { + more($self, '<' . + join(">\r\n<", map { $_->{mid} } @$msgs ). + '>'); + $$prev = $msgs->[-1]->{num}; + } else { + shift @$overs; + if (@$overs) { # continue onto next newsgroup + $$prev = 0; + return 1; + } else { # break out of the long response. + return; + } + } +} + sub cmd_newnews ($$$$;$$) { my ($self, $newsgroups, $date, $time, $gmt, $dists) = @_; my $ts = eval { parse_time($date, $time, $gmt) }; @@ -256,42 +321,25 @@ sub cmd_newnews ($$$$;$$) { my ($keep, $skip) = split('!', $newsgroups, 2); ngpat2re($keep); ngpat2re($skip); - my @srch; + my @overs; foreach my $ng (@{$self->{nntpd}->{grouplist}}) { - $ng->{name} =~ $keep or next; - $ng->{name} =~ $skip and next; - my $srch = $ng->search or next; - push @srch, $srch; + $ng->{newsgroup} =~ $keep or next; + $ng->{newsgroup} =~ $skip and next; + my $over = $ng->over or next; + push @overs, $over; }; - return '.' unless @srch; - - $ts .= '..'; - my $opts = { asc => 1, limit => 1000, offset => 0 }; - $self->long_response(0, long_response_limit, sub { - my ($i) = @_; - my $srch = $srch[0]; - my $res = $srch->query($ts, $opts); - my $msgs = $res->{msgs}; - if (my $nr = scalar @$msgs) { - more($self, '<' . - join(">\r\n<", map { $_->mid } @$msgs ). - '>'); - $opts->{offset} += $nr; - } else { - shift @srch; - if (@srch) { # continue onto next newsgroup - $opts->{offset} = 0; - } else { # break out of the long response. - $$i = long_response_limit; - } - } - }); + return '.' unless @overs; + + my $prev = 0; + long_response($self, \&newnews_i, \@overs, $ts, \$prev); } sub cmd_group ($$) { my ($self, $group) = @_; my $no_such = '411 no such news group'; - my $ng = $self->{nntpd}->{groups}->{$group} or return $no_such; + my $nntpd = $self->{nntpd}; + my $ng = $nntpd->{groups}->{$group} or return $no_such; + $nntpd->idler_start; $self->{ng} = $ng; my ($min, $max) = $ng->mm->minmax; @@ -327,27 +375,88 @@ sub cmd_last ($) { article_adj($_[0], -1) } sub cmd_post ($) { my ($self) = @_; my $ng = $self->{ng}; - $ng ? "440 mailto:$ng->{address} to post" : '440 posting not allowed' + $ng ? "440 mailto:$ng->{-primary_address} to post" + : '440 posting not allowed' } sub cmd_quit ($) { my ($self) = @_; res($self, '205 closing connection - goodbye!'); - $self->close; + $self->shutdn; undef; } -sub art_lookup ($$$) { - my ($self, $art, $set_headers) = @_; +sub header_append ($$$) { + my ($hdr, $k, $v) = @_; + my @v = $hdr->header_raw($k); + foreach (@v) { + return if $v eq $_; + } + $hdr->header_set($k, @v, $v); +} + +sub xref ($$$$) { + my ($self, $ng, $n, $mid) = @_; + my $ret = $self->{nntpd}->{servername} . " $ng->{newsgroup}:$n"; + + # num_for is pretty cheap and sometimes we'll lookup the existence + # of an article without getting even the OVER info. In other words, + # I'm not sure if its worth optimizing by scanning To:/Cc: and + # PublicInbox::ExtMsg on the PSGI end is just as expensive + foreach my $other (@{$self->{nntpd}->{grouplist}}) { + next if $ng eq $other; + my $num = eval { $other->mm->num_for($mid) } or next; + $ret .= " $other->{newsgroup}:$num"; + } + $ret; +} + +sub set_nntp_headers ($$) { + my ($hdr, $smsg) = @_; + my ($mid) = $smsg->{mid}; + + # why? leafnode requires a Path: header for some inexplicable + # reason. We'll fake the shortest one possible. + $hdr->header_set('Path', 'y'); + + # leafnode (and maybe other NNTP clients) have trouble dealing + # with v2 messages which have multiple Message-IDs (either due + # to our own content-based dedupe or buggy git-send-email versions). + my @mids = $hdr->header_raw('Message-ID'); + if (scalar(@mids) > 1) { + my $mid0 = "<$mid>"; + $hdr->header_set('Message-ID', $mid0); + my @alt = $hdr->header_raw('X-Alt-Message-ID'); + my %seen = map { $_ => 1 } (@alt, $mid0); + push(@alt, grep { !$seen{$_}++ } @mids); + $hdr->header_set('X-Alt-Message-ID', @alt); + } + + # clobber some + my $ibx = $smsg->{-ibx}; + my $xref = xref($smsg->{nntp}, $ibx, $smsg->{num}, $mid); + $hdr->header_set('Xref', $xref); + $xref =~ s/:[0-9]+//g; + $hdr->header_set('Newsgroups', (split(/ /, $xref, 2))[1]); + header_append($hdr, 'List-Post', "{-primary_address}>"); + if (my $url = $ibx->base_url) { + $mid = mid_escape($mid); + header_append($hdr, 'Archived-At', "<$url$mid/>"); + header_append($hdr, 'List-Archive', "<$url>"); + } +} + +sub art_lookup ($$) { + my ($self, $art) = @_; my $ng = $self->{ng}; my ($n, $mid); my $err; if (defined $art) { - if ($art =~ /\A\d+\z/o) { + if ($art =~ /\A[0-9]+\z/) { $err = '423 no such article number in this group'; $n = int($art); goto find_mid; - } elsif ($art =~ /\A<([^>]+)>\z/) { + } elsif ($art =~ $ONE_MSGID) { $mid = $1; $err = r430; $n = $ng->mm->num_for($mid) if $ng; @@ -373,78 +482,111 @@ find_mid: defined $mid or return $err; } found: - my $o = 'HEAD:' . mid2path($mid); - my $bytes; - my $s = eval { Email::MIME->new($ng->gcf->cat_file($o, \$bytes)) }; - return $err unless $s; - my $lines; - if ($set_headers) { - $s->header_set('Newsgroups', $ng->{name}); - $s->header_set('Xref', xref($ng, $n)); - $lines = $s->body =~ tr!\n!\n!; - - # must be last - $s->body_set('') if ($set_headers == 2); - } - [ $n, $mid, $s, $bytes, $lines, $ng ]; -} - -sub simple_body_write ($$) { - my ($self, $s) = @_; - my $body = $s->body; - $s->body_set(''); - $body =~ s/^\./../smg; - $body =~ s/(?over->get_art($n) or return $err; + $smsg->{-ibx} = $ng; + $smsg; +} + +sub msg_body_write ($$) { + my ($self, $msg) = @_; + + # these can momentarily double the memory consumption :< + $$msg =~ s/^\./../smg; + $$msg =~ s/(?msg_more($$msg); } sub set_art { my ($self, $art) = @_; - $self->{article} = $art if defined $art && $art =~ /\A\d+\z/; + $self->{article} = $art if defined $art && $art =~ /\A[0-9]+\z/; +} + +sub msg_hdr_write ($$) { + my ($eml, $smsg) = @_; + set_nntp_headers($eml, $smsg); + + my $hdr = $eml->{hdr} // \(my $x = ''); + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $$hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + $$hdr =~ s/(?{nntp}->msg_more($$hdr); +} + +sub blob_cb { # called by git->cat_async via git_async_cat + my ($bref, $oid, $type, $size, $smsg) = @_; + my $self = $smsg->{nntp}; + my $code = $smsg->{nntp_code} // 220; + if (!defined($oid)) { + # it's possible to have TOCTOU if an admin runs + # public-inbox-(edit|purge), just move onto the next message + warn "E: $smsg->{blob} missing in $smsg->{-ibx}->{inboxdir}\n"; + return $self->requeue; + } elsif ($smsg->{blob} ne $oid) { + $self->close; + die "BUG: $smsg->{blob} != $oid"; + } + my $r = "$code $smsg->{num} <$smsg->{mid}> article retrieved - "; + my $eml = PublicInbox::Eml->new($bref); + if ($code == 220) { + more($self, $r .= 'head and body follow'); + msg_hdr_write($eml, $smsg); + $self->msg_more("\r\n"); + msg_body_write($self, $bref); + } elsif ($code == 221) { + more($self, $r .= 'head follows'); + msg_hdr_write($eml, $smsg); + } elsif ($code == 222) { + more($self, $r .= 'body follows'); + msg_body_write($self, $bref); + } else { + $self->close; + die "BUG: bad code: $r"; + } + $self->write(\".\r\n"); # flushes (includes ->zflush) + $self->requeue; } sub cmd_article ($;$) { my ($self, $art) = @_; - my $r = $self->art_lookup($art, 1); - return $r unless ref $r; - my ($n, $mid, $s) = @$r; + my $smsg = art_lookup($self, $art); + return $smsg unless ref $smsg; set_art($self, $art); - more($self, "220 $n <$mid> article retrieved - head and body follow"); - do_more($self, $s->header_obj->as_string); - do_more($self, "\r\n"); - simple_body_write($self, $s); + $smsg->{nntp} = $self; + ${git_async_cat($smsg->{-ibx}->git, $smsg->{blob}, \&blob_cb, $smsg)}; } sub cmd_head ($;$) { my ($self, $art) = @_; - my $r = $self->art_lookup($art, 2); - return $r unless ref $r; - my ($n, $mid, $s) = @$r; + my $smsg = art_lookup($self, $art); + return $smsg unless ref $smsg; set_art($self, $art); - more($self, "221 $n <$mid> article retrieved - head follows"); - do_more($self, $s->header_obj->as_string); - '.' + $smsg->{nntp} = $self; + $smsg->{nntp_code} = 221; + ${git_async_cat($smsg->{-ibx}->git, $smsg->{blob}, \&blob_cb, $smsg)}; } sub cmd_body ($;$) { my ($self, $art) = @_; - my $r = $self->art_lookup($art, 0); - return $r unless ref $r; - my ($n, $mid, $s) = @$r; + my $smsg = art_lookup($self, $art); + return $smsg unless ref $smsg; set_art($self, $art); - more($self, "222 $n <$mid> article retrieved - body follows"); - simple_body_write($self, $s); + $smsg->{nntp} = $self; + $smsg->{nntp_code} = 222; + ${git_async_cat($smsg->{-ibx}->git, $smsg->{blob}, \&blob_cb, $smsg)}; } sub cmd_stat ($;$) { my ($self, $art) = @_; - my $r = $self->art_lookup($art, 0); - return $r unless ref $r; - my ($n, $mid, undef) = @$r; + my $smsg = art_lookup($self, $art); # art may be msgid + return $smsg unless ref $smsg; + $art = $smsg->{num}; set_art($self, $art); - "223 $n <$mid> article retrieved - request text separately"; + "223 $art <$smsg->{mid}> article retrieved - request text separately"; } sub cmd_ihave ($) { '435 article not wanted - do not send it' } @@ -463,11 +605,11 @@ sub get_range ($$) { defined $range or return '420 No article(s) selected'; my ($beg, $end); my ($min, $max) = $ng->mm->minmax; - if ($range =~ /\A(\d+)\z/) { + if ($range =~ /\A([0-9]+)\z/) { $beg = $end = $1; - } elsif ($range =~ /\A(\d+)-\z/) { + } elsif ($range =~ /\A([0-9]+)-\z/) { ($beg, $end) = ($1, $max); - } elsif ($range =~ /\A(\d+)-(\d+)\z/) { + } elsif ($range =~ /\A([0-9]+)-([0-9]+)\z/) { ($beg, $end) = ($1, $2); } else { return r501; @@ -475,79 +617,73 @@ sub get_range ($$) { $beg = $min if ($beg < $min); $end = $max if ($end > $max); return '420 No article(s) selected' if ($beg > $end); - [ $beg, $end ]; + [ \$beg, $end ]; } -sub hdr_val ($$) { - my ($r, $header) = @_; - $header = lc $header; - return $r->[3] if ($header eq 'bytes'); - return $r->[4] if ($header eq 'lines'); - $r = $r->[2]->header_obj->header($header); - defined $r or return; - $r =~ s/[\r\n\t]+/ /sg; - $r; +sub long_step { + my ($self) = @_; + # wbuf is unset or empty, here; {long} may add to it + my ($fd, $cb, $t0, @args) = @{$self->{long_cb}}; + my $more = eval { $cb->($self, @args) }; + if ($@ || !$self->{sock}) { # something bad happened... + delete $self->{long_cb}; + my $elapsed = now() - $t0; + if ($@) { + err($self, + "%s during long response[$fd] - %0.6f", + $@, $elapsed); + } + out($self, " deferred[$fd] aborted - %0.6f", $elapsed); + $self->close; + } elsif ($more) { # $self->{wbuf}: + $self->update_idle_time; + + # COMPRESS users all share the same DEFLATE context. + # Flush it here to ensure clients don't see + # each other's data + $self->zflush; + + # no recursion, schedule another call ASAP, but only after + # all pending writes are done. autovivify wbuf: + my $new_size = push(@{$self->{wbuf}}, \&long_step); + + # wbuf may be populated by $cb, no need to rearm if so: + $self->requeue if $new_size == 1; + } else { # all done! + delete $self->{long_cb}; + res($self, '.'); + my $elapsed = now() - $t0; + my $fd = fileno($self->{sock}); + out($self, " deferred[$fd] done - %0.6f", $elapsed); + my $wbuf = $self->{wbuf}; # do NOT autovivify + $self->requeue unless $wbuf && @$wbuf; + } } -sub long_response ($$$$) { - my ($self, $beg, $end, $cb) = @_; - die "BUG: nested long response" if $self->{long_res}; +sub long_response ($$;@) { + my ($self, $cb, @args) = @_; # cb returns true if more, false if done + my $sock = $self->{sock} or return; # make sure we disable reading during a long response, # clients should not be sending us stuff and making us do more # work while we are stream a response to them - $self->watch_read(0); - my $fd = fileno $self->{sock}; - my $t0 = now(); - $self->{long_res} = sub { - # limit our own running time for fairness with other - # clients and to avoid buffering too much: - my $yield; - local $SIG{ALRM} = sub { $yield = 1 }; - ualarm(100000); - - my $err; - do { - eval { $cb->(\$beg) }; - } until (($err = $@) || $self->{closed} || $yield || - $self->{write_buf_size} || ++$beg > $end); - ualarm(0); - - if ($err || $self->{closed}) { - $self->{long_res} = undef; - - if ($err) { - err($self, - "$err during long response[$fd] - %0.6f", - now() - $t0); - } - if ($self->{closed}) { - out($self, " deferred[$fd] aborted - %0.6f", - now() - $t0); - } else { - $self->watch_read(1); - } - } elsif ($yield || $self->{write_buf_size}) { - # no recursion, schedule another call ASAP - # but only after all pending writes are done - Danga::Socket->AddTimer(0, sub { - $self->write($self->{long_res}); - }); - } else { # all done! - $self->{long_res} = undef; - $self->watch_read(1); - res($self, '.'); - out($self, " deferred[$fd] done - %0.6f", now() - $t0); - } - }; - $self->{long_res}->(); # kick off! + $self->{long_cb} = [ fileno($sock), $cb, now(), @args ]; + long_step($self); # kick off! undef; } +sub hdr_msgid_range_i { + my ($self, $beg, $end) = @_; + my $r = $self->{ng}->mm->msg_range($beg, $end); + @$r or return; + more($self, join("\r\n", map { "$_->[0] <$_->[1]>" } @$r)); + 1; +} + sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull. my ($self, $xhdr, $range) = @_; - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID + if (defined $range && $range =~ $ONE_MSGID) { my ($ng, $n) = mid_lookup($self, $1); return r430 unless $n; hdr_mid_response($self, $xhdr, $ng, $n, $range, $range); @@ -555,22 +691,11 @@ sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull. $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; - my $mm = $self->{ng}->mm; - my ($beg, $end) = @$r; - more($self, '221 Header follows'); - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i); - more($self, "$$i <$mid>") if defined $mid; - }); + more($self, $xhdr ? r221 : r225); + long_response($self, \&hdr_msgid_range_i, @$r); } } -sub xref ($$) { - my ($ng, $n) = @_; - "$ng->{domain} $ng->{name}:$n" -} - sub mid_lookup ($$) { my ($self, $mid) = @_; my $self_ng = $self->{ng}; @@ -586,77 +711,78 @@ sub mid_lookup ($$) { (undef, undef); } +sub xref_range_i { + my ($self, $beg, $end) = @_; + my $ng = $self->{ng}; + my $r = $ng->mm->msg_range($beg, $end); + @$r or return; + more($self, join("\r\n", map { + my $num = $_->[0]; + "$num ".xref($self, $ng, $num, $_->[1]); + } @$r)); + 1; +} + sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin my ($self, $xhdr, $range) = @_; - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID - my ($ng, $n) = mid_lookup($self, $1); + if (defined $range && $range =~ $ONE_MSGID) { + my $mid = $1; + my ($ng, $n) = mid_lookup($self, $mid); return r430 unless $n; - hdr_mid_response($self, $xhdr, $ng, $n, $range, xref($ng, $n)); + hdr_mid_response($self, $xhdr, $ng, $n, $range, + xref($self, $ng, $n, $mid)); } else { # numeric range $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; - my $ng = $self->{ng}; - my $mm = $ng->mm; - my ($beg, $end) = @$r; - more($self, '221 Header follows'); - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i); - more($self, "$$i ".xref($ng, $$i)) if defined $mid; - }); - } -} - -sub header_obj_for { - my ($srch, $mid) = @_; - eval { - my $smsg = $srch->lookup_message($mid); - $smsg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); - $smsg->mini_mime->header_obj; - }; -}; + more($self, $xhdr ? r221 : r225); + long_response($self, \&xref_range_i, @$r); + } +} + +sub over_header_for { + my ($over, $num, $field) = @_; + my $smsg = $over->get_art($num) or return; + return PublicInbox::Smsg::date($smsg) if $field eq 'date'; + $smsg->{$field}; +} -sub hdr_searchmsg ($$$$) { - my ($self, $xhdr, $hdr, $range) = @_; - my $filter; - if ($hdr eq 'date') { - $hdr = 'X-PI-TS'; - $filter = sub ($) { - strftime('%a, %d %b %Y %T %z', gmtime($_[0])); - }; +sub smsg_range_i { + my ($self, $beg, $end, $field) = @_; + my $over = $self->{ng}->over; + my $msgs = $over->query_xover($$beg, $end); + scalar(@$msgs) or return; + my $tmp = ''; + + # ->{$field} is faster than ->$field invocations, so favor that. + if ($field eq 'date') { + for my $s (@$msgs) { + $tmp .= "$s->{num} ".PublicInbox::Smsg::date($s)."\r\n" + } + } else { + for my $s (@$msgs) { + $tmp .= "$s->{num} $s->{$field}\r\n"; + } } + utf8::encode($tmp); + $self->msg_more($tmp); + $$beg = $msgs->[-1]->{num} + 1; +} - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID +sub hdr_smsg ($$$$) { + my ($self, $xhdr, $field, $range) = @_; + if (defined $range && $range =~ $ONE_MSGID) { my ($ng, $n) = mid_lookup($self, $1); - return r430 unless $n; - if (my $srch = $ng->search) { - my $m = header_obj_for($srch, $range); - my $v = $m->header($hdr); - $v = $filter->($v) if defined $v && $filter; - hdr_mid_response($self, $xhdr, $ng, $n, $range, $v); - } else { - hdr_slow($self, $xhdr, $hdr, $range); - } + return r430 unless defined $n; + my $v = over_header_for($ng->over, $n, $field); + hdr_mid_response($self, $xhdr, $ng, $n, $range, $v); } else { # numeric range $range = $self->{article} unless defined $range; - my $srch = $self->{ng}->search or - return hdr_slow($self, $xhdr, $hdr, $range); - my $mm = $self->{ng}->mm; my $r = get_range($self, $range); return $r unless ref $r; - my ($beg, $end) = @$r; - more($self, '221 Header follows'); - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i) or return; - my $m = header_obj_for($srch, $mid) or return; - my $v = $m->header($hdr); - defined $v or return; - $v = $filter->($v) if $filter; - more($self, "$$i $v"); - }); + more($self, $xhdr ? r221 : r225); + long_response($self, \&smsg_range_i, @$r, $field); } } @@ -667,10 +793,13 @@ sub do_hdr ($$$;$) { hdr_message_id($self, $xhdr, $range); } elsif ($sub eq 'xref') { hdr_xref($self, $xhdr, $range); - } elsif ($sub =~ /\A(subject|references|date)\z/) { - hdr_searchmsg($self, $xhdr, $sub, $range); + } elsif ($sub =~ /\A(?:subject|references|date|from|to|cc| + bytes|lines)\z/x) { + hdr_smsg($self, $xhdr, $sub, $range); + } elsif ($sub =~ /\A:(bytes|lines)\z/) { + hdr_smsg($self, $xhdr, $1, $range); } else { - hdr_slow($self, $xhdr, $header, $range); + $xhdr ? (r221 . "\r\n.") : "503 HDR not permitted on $header"; } } @@ -699,45 +828,25 @@ sub hdr_mid_prefix ($$$$$) { } sub hdr_mid_response ($$$$$$) { - my ($self, $xhdr, $ng, $n, $mid, $v) = @_; # r: art_lookup result + my ($self, $xhdr, $ng, $n, $mid, $v) = @_; my $res = ''; if ($xhdr) { $res .= r221 . "\r\n"; - $res .= "$mid $v\r\n" if defined $v; + $res .= "$mid $v\r\n"; } else { - $res .= r224 . "\r\n"; - if (defined $v) { - my $pfx = hdr_mid_prefix($self, $xhdr, $ng, $n, $mid); - $res .= "$pfx $v\r\n"; - } + $res .= r225 . "\r\n"; + my $pfx = hdr_mid_prefix($self, $xhdr, $ng, $n, $mid); + $res .= "$pfx $v\r\n"; } res($self, $res .= '.'); undef; } -sub hdr_slow ($$$$) { - my ($self, $xhdr, $header, $range) = @_; - - if (defined $range && $range =~ /\A<.+>\z/) { # Message-ID - my $r = $self->art_lookup($range, 2); - return $r unless ref $r; - my ($n, $ng) = ($r->[0], $r->[5]); - my $v = hdr_val($r, $header); - hdr_mid_response($self, $xhdr, $ng, $n, $range, $v); - } else { # numeric range - $range = $self->{article} unless defined $range; - my $r = get_range($self, $range); - return $r unless ref $r; - my ($beg, $end) = @$r; - more($self, $xhdr ? r221 : r224); - $self->long_response($beg, $end, sub { - my ($i) = @_; - $r = $self->art_lookup($$i, 2); - return unless ref $r; - defined($r = hdr_val($r, $header)) or return; - more($self, "$$i $r"); - }); - } +sub xrover_i { + my ($self, $beg, $end) = @_; + my $h = over_header_for($self->{ng}->over, $$beg, 'references'); + more($self, "$$beg $h") if defined($h); + $$beg++ < $end; } sub cmd_xrover ($;$) { @@ -749,91 +858,116 @@ sub cmd_xrover ($;$) { $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; - my ($beg, $end) = @$r; - my $mm = $ng->mm; - my $srch = $ng->search; more($self, '224 Overview information follows'); - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i) or return; - my $m = header_obj_for($srch, $mid) or return; - my $h = $m->header('references'); - more($self, "$$i $h") if defined $h; - }); + long_response($self, \&xrover_i, @$r); } -sub over_line ($$) { - my ($self, $r) = @_; - - more($self, join("\t", $r->[0], map { - my $h = hdr_val($r, $_); - defined $h ? $h : ''; - } @OVERVIEW )); +sub over_line ($$$$) { + my ($self, $ng, $num, $smsg) = @_; + # n.b. field access and procedural calls can be + # 10%-15% faster than OO method calls: + my $s = join("\t", $num, + $smsg->{subject}, + $smsg->{from}, + PublicInbox::Smsg::date($smsg), + "<$smsg->{mid}>", + $smsg->{references}, + $smsg->{bytes}, + $smsg->{lines}, + "Xref: " . xref($self, $ng, $num, $smsg->{mid})); + utf8::encode($s); + $s } sub cmd_over ($;$) { my ($self, $range) = @_; - if ($range && $range =~ /\A<.+>\z/) { - my $r = $self->art_lookup($range, 2); - return '430 No article with that message-id' unless ref $r; + if ($range && $range =~ $ONE_MSGID) { + my ($ng, $n) = mid_lookup($self, $1); + defined $n or return r430; + my $smsg = $ng->over->get_art($n) or return r430; more($self, '224 Overview information follows (multi-line)'); # Only set article number column if it's the current group - my $ng = $self->{ng}; - $r->[0] = 0 if (!$ng || $ng ne $r->[5]); - over_line($self, $r); + my $self_ng = $self->{ng}; + $n = 0 if (!$self_ng || $self_ng ne $ng); + more($self, over_line($self, $ng, $n, $smsg)); '.'; } else { cmd_xover($self, $range); } } +sub xover_i { + my ($self, $beg, $end) = @_; + my $ng = $self->{ng}; + my $msgs = $ng->over->query_xover($$beg, $end); + my $nr = scalar @$msgs or return; + + # OVERVIEW.FMT + more($self, join("\r\n", map { + over_line($self, $ng, $_->{num}, $_); + } @$msgs)); + $$beg = $msgs->[-1]->{num} + 1; +} + sub cmd_xover ($;$) { my ($self, $range) = @_; $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; my ($beg, $end) = @$r; - more($self, "224 Overview information follows for $beg to $end"); - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $r = $self->art_lookup($$i, 2); - return unless ref $r; - over_line($self, $r); - }); + more($self, "224 Overview information follows for $$beg to $end"); + long_response($self, \&xover_i, @$r); } +sub compressed { undef } + +sub cmd_starttls ($) { + my ($self) = @_; + my $sock = $self->{sock} or return; + # RFC 4642 2.2.1 + return r502 if ($sock->can('accept_SSL') || $self->compressed); + my $opt = $self->{nntpd}->{accept_tls} or + return '580 can not initiate TLS negotiation'; + res($self, '382 Continue with TLS negotiation'); + $self->{sock} = IO::Socket::SSL->start_SSL($sock, %$opt); + $self->requeue if PublicInbox::DS::accept_tls_step($self); + undef; +} + +# RFC 8054 +sub cmd_compress ($$) { + my ($self, $alg) = @_; + return '503 Only DEFLATE is supported' if uc($alg) ne 'DEFLATE'; + return r502 if $self->compressed; + PublicInbox::NNTPdeflate->enable($self); + $self->requeue; + undef +} + +sub zflush {} # overridden by NNTPdeflate + sub cmd_xpath ($$) { my ($self, $mid) = @_; - return r501 unless $mid =~ /\A<(.+)>\z/; + return r501 unless $mid =~ $ONE_MSGID; $mid = $1; my @paths; foreach my $ng (values %{$self->{nntpd}->{groups}}) { my $n = $ng->mm->num_for($mid); - push @paths, "$ng->{name}/$n" if defined $n; + push @paths, "$ng->{newsgroup}/$n" if defined $n; } return '430 no such article on server' unless @paths; '223 '.join(' ', @paths); } -sub res ($$) { - my ($self, $line) = @_; - do_write($self, $line . "\r\n"); -} +sub res ($$) { do_write($_[0], $_[1] . "\r\n") } -sub more ($$) { - my ($self, $line) = @_; - do_more($self, $line . "\r\n"); -} +sub more ($$) { $_[0]->msg_more($_[1] . "\r\n") } sub do_write ($$) { - my ($self, $data) = @_; - my $done = $self->write($data); - die if $self->{closed}; - - # Do not watch for readability if we have data in the queue, - # instead re-enable watching for readability when we can - $self->watch_read(0) if (!$done || $self->{long_res}); + my $self = $_[0]; + my $done = $self->write(\($_[1])); + return 0 unless $self->{sock}; $done; } @@ -848,76 +982,45 @@ sub out ($$;@) { printf { $self->{nntpd}->{out} } $fmt."\n", @args; } -use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; - -sub do_more ($$) { - my ($self, $data) = @_; - if (MSG_MORE && !$self->{write_buf_size}) { - my $n = send($self->{sock}, $data, MSG_MORE); - if (defined $n) { - my $dlen = length($data); - return 1 if $n == $dlen; # all done! - $data = substr($data, $n, $dlen - $n); - } - } - $self->do_write($data); -} - -# callbacks for by Danga::Socket - -sub event_hup { $_[0]->close } -sub event_err { $_[0]->close } - -sub event_write { +# callback used by PublicInbox::DS for any (e)poll (in/out/hup/err) +sub event_step { my ($self) = @_; - # only continue watching for readability when we are done writing: - if ($self->write(undef) == 1 && !$self->{long_res}) { - $self->watch_read(1); - } -} -sub event_read { - my ($self) = @_; - use constant LINE_MAX => 512; # RFC 977 section 2.3 - my $r = 1; + return unless $self->flush_write && $self->{sock} && !$self->{long_cb}; - my $buf = $self->read(LINE_MAX) or return $self->close; - $self->{rbuf} .= $$buf; - while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]+)\r?\n//) { - my $line = $1; - my $t0 = now(); - $r = eval { $self->process_line($line) }; - my $d = $self->{long_res} ? - ' deferred['.fileno($self->{sock}).']' : ''; - out($self, "$line - %0.6f$d", now() - $t0); + $self->update_idle_time; + # only read more requests if we've drained the write buffer, + # otherwise we can be buffering infinitely w/o backpressure + + my $rbuf = $self->{rbuf} // \(my $x = ''); + my $line = index($$rbuf, "\n"); + while ($line < 0) { + return $self->close if length($$rbuf) >= LINE_MAX; + $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return; + $line = index($$rbuf, "\n"); } + $line = substr($$rbuf, 0, $line + 1, ''); + $line =~ s/\r?\n\z//s; + return $self->close if $line =~ /[[:cntrl:]]/s; + my $t0 = now(); + my $fd = fileno($self->{sock}); + my $r = eval { process_line($self, $line) }; + my $pending = $self->{wbuf} ? ' pending' : ''; + out($self, "[$fd] %s - %0.6f$pending", $line, now() - $t0); return $self->close if $r < 0; - my $len = length($self->{rbuf}); - return $self->close if ($len >= LINE_MAX); -} - -sub watch_read { - my ($self, $bool) = @_; - my $rv = $self->SUPER::watch_read($bool); - if ($bool && $self->{rbuf} ne '') { - # Force another read if there is a pipelined request. - # We don't know if the socket has anything for us to read, - # and we must double-check again by the time the timer fires - # in case we really did dispatch a read event and started - # another long response. - Danga::Socket->AddTimer(0, sub { - if (&Danga::Socket::POLLIN & $self->{event_watch}) { - $self->event_read; - } - }); - } - $rv; + $self->rbuf_idle($rbuf); + $self->update_idle_time; + + # maybe there's more pipelined data, or we'll have + # to register it for socket-readiness notifications + $self->requeue unless $pending; } -sub busy () { - my ($self) = @_; - ($self->{rbuf} ne '' || $self->{long_res} || $self->{write_buf_size}); +# for graceful shutdown in PublicInbox::Daemon: +sub busy { + my ($self, $now) = @_; + ($self->{rbuf} || $self->{wbuf} || $self->not_idle_long($now)); } 1;