X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=blobdiff_plain;f=lib%2FPublicInbox%2FNNTP.pm;h=e51793505cf515851f7031dc65c982918076f89c;hp=796f091ff67b11f221ae4dabda1ed75730d32721;hb=b8c41362f2a5c8fcc6b1846a79c72bfa77565297;hpb=63a19b146bd37ecc361620fc520a407113f0c4c1 diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 796f091f..e5179350 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -1,5 +1,7 @@ -# Copyright (C) 2015 all contributors -# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# Copyright (C) 2015-2018 all contributors +# License: AGPL-3.0+ +# +# Each instance of this represents a NNTP client socket package PublicInbox::NNTP; use strict; use warnings; @@ -7,12 +9,14 @@ use base qw(Danga::Socket); use fields qw(nntpd article rbuf ng long_res); use PublicInbox::Search; use PublicInbox::Msgmap; -use PublicInbox::GitCatFile; -use PublicInbox::MID qw(mid2path); -use Email::MIME; -use Data::Dumper qw(Dumper); +use PublicInbox::MID qw(mid_escape); +use PublicInbox::Git; +require PublicInbox::EvCleanup; +use Email::Simple; use POSIX qw(strftime); use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); +use Digest::SHA qw(sha1_hex); +use Time::Local qw(timegm timelocal); use constant { r501 => '501 command syntax error', r221 => '221 Header follows', @@ -30,25 +34,39 @@ my $LIST_HEADERS = join("\r\n", @OVERVIEW, qw(:bytes :lines Xref To Cc)) . "\r\n"; # disable commands with easy DoS potential: -# LISTGROUP could get pretty bad, too... my %DISABLED; # = map { $_ => 1 } qw(xover list_overview_fmt newnews xhdr); my $EXPMAP; # fd -> [ idle_time, $self ] -my $EXPTIMER; +my $expt; our $EXPTIME = 180; # 3 minutes +my $nextt; + +my $nextq = []; +sub next_tick () { + $nextt = undef; + my $q = $nextq; + $nextq = []; + foreach my $nntp (@$q) { + # for request && response protocols, always finish writing + # before finishing reading: + if (my $long_cb = $nntp->{long_res}) { + $nntp->write($long_cb); + } elsif (&Danga::Socket::POLLIN & $nntp->{event_watch}) { + event_read($nntp); + } + } +} sub update_idle_time ($) { my ($self) = @_; - my $tmp = $self->{sock} or return; - $tmp = fileno($tmp); - defined $tmp and $EXPMAP->{$tmp} = [ now(), $self ]; + my $fd = $self->{fd}; + defined $fd and $EXPMAP->{$fd} = [ now(), $self ]; } sub expire_old () { my $now = now(); my $exp = $EXPTIME; my $old = $now - $exp; - my $next = $now + $exp; my $nr = 0; my %new; while (my ($fd, $v) = each %$EXPMAP) { @@ -56,36 +74,31 @@ sub expire_old () { if ($idle_time < $old) { $nntp->close; # idempotent } else { - my $nexp = $idle_time + $exp; - $next = $nexp if ($nexp < $next); ++$nr; $new{$fd} = $v; } } $EXPMAP = \%new; if ($nr) { - $next -= $now; - $next = 0 if $next < 0; - $EXPTIMER = Danga::Socket->AddTimer($next, *expire_old); + $expt = PublicInbox::EvCleanup::later(*expire_old); } else { - $EXPTIMER = undef; - # noop to kick outselves out of the loop so descriptors + $expt = undef; + # noop to kick outselves out of the loop ASAP so descriptors # really get closed - Danga::Socket->AddTimer(0, *expire_cleanup); + PublicInbox::EvCleanup::asap(sub {}); } } sub new ($$$) { my ($class, $sock, $nntpd) = @_; my $self = fields::new($class); - binmode $sock, ':utf8'; # RFC 3977 $self->SUPER::new($sock); $self->{nntpd} = $nntpd; res($self, '201 server ready - post via email'); $self->{rbuf} = ''; $self->watch_read(1); update_idle_time($self); - $EXPTIMER ||= Danga::Socket->AddTimer($EXPTIME, *expire_old); + $expt ||= PublicInbox::EvCleanup::later(*expire_old); $self; } @@ -113,8 +126,9 @@ sub process_line ($$) { my $res = eval { $req->($self, @args) }; my $err = $@; if ($err && !$self->{closed}) { - chomp($l = Dumper(\$l)); - err($self, "error from: $l $err"); + local $/ = "\n"; + chomp($l); + err($self, 'error from: %s (%s)', $l, $err); $res = '503 program fault - command not performed'; } return 0 unless defined $res; @@ -151,7 +165,7 @@ sub list_active ($;$) { my ($self, $wildmat) = @_; wildmat2re($wildmat); foreach my $ng (@{$self->{nntpd}->{grouplist}}) { - $ng->{name} =~ $wildmat or next; + $ng->{newsgroup} =~ $wildmat or next; group_line($self, $ng); } } @@ -160,9 +174,9 @@ sub list_active_times ($;$) { my ($self, $wildmat) = @_; wildmat2re($wildmat); foreach my $ng (@{$self->{nntpd}->{grouplist}}) { - $ng->{name} =~ $wildmat or next; + $ng->{newsgroup} =~ $wildmat or next; my $c = eval { $ng->mm->created_at } || time; - more($self, "$ng->{name} $c $ng->{address}"); + more($self, "$ng->{newsgroup} $c $ng->{-primary_address}"); } } @@ -170,9 +184,9 @@ sub list_newsgroups ($;$) { my ($self, $wildmat) = @_; wildmat2re($wildmat); foreach my $ng (@{$self->{nntpd}->{grouplist}}) { - $ng->{name} =~ $wildmat or next; + $ng->{newsgroup} =~ $wildmat or next; my $d = $ng->description; - more($self, "$ng->{name} $d"); + more($self, "$ng->{newsgroup} $d"); } } @@ -210,24 +224,20 @@ sub cmd_listgroup ($;$) { } $self->{ng} or return '412 no newsgroup selected'; - $self->long_response(0, long_response_limit, sub { - my ($i) = @_; - my $nr = $self->{ng}->mm->id_batch($$i, sub { - my ($ary) = @_; - more($self, join("\r\n", @$ary)); - }); - - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; + my $n = 0; + long_response($self, sub { + my $ary = $self->{ng}->mm->ids_after(\$n); + scalar @$ary or return; + more($self, join("\r\n", @$ary)); + 1; }); } sub parse_time ($$;$) { my ($date, $time, $gmt) = @_; - use Time::Local qw(); my ($hh, $mm, $ss) = unpack('A2A2A2', $time); if (defined $gmt) { - $gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt\n"; + $gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt"; $gmt = 1; } my @now = $gmt ? gmtime : localtime; @@ -236,22 +246,22 @@ sub parse_time ($$;$) { ($YYYY, $MM, $DD) = unpack('A4A2A2', $date); } else { # legacy clients send YYMMDD ($YYYY, $MM, $DD) = unpack('A2A2A2', $date); - if ($YYYY > strftime('%y', @now)) { - my $cur_year = $now[5] + 1900; + my $cur_year = $now[5] + 1900; + if ($YYYY > $cur_year) { $YYYY += int($cur_year / 1000) * 1000 - 100; } } if ($gmt) { - Time::Local::timegm($ss, $mm, $hh, $DD, $MM - 1, $YYYY); + timegm($ss, $mm, $hh, $DD, $MM - 1, $YYYY); } else { - Time::Local::timelocal($ss, $mm, $hh, $DD, $MM - 1, $YYYY); + timelocal($ss, $mm, $hh, $DD, $MM - 1, $YYYY); } } sub group_line ($$) { my ($self, $ng) = @_; my ($min, $max) = $ng->mm->minmax; - more($self, "$ng->{name} $max $min n") if defined $min && defined $max; + more($self, "$ng->{newsgroup} $max $min n") if defined $min && defined $max; } sub cmd_newgroups ($$$;$$) { @@ -273,7 +283,6 @@ sub wildmat2re (;$) { return $_[0] = qr/.*/ if (!defined $_[0] || $_[0] eq '*'); my %keep; my $salt = rand; - use Digest::SHA qw(sha1_hex); my $tmp = $_[0]; $tmp =~ s#(?{nntpd}->{grouplist}}) { - $ng->{name} =~ $keep or next; - $ng->{name} =~ $skip and next; + $ng->{newsgroup} =~ $keep or next; + $ng->{newsgroup} =~ $skip and next; my $srch = $ng->search or next; push @srch, $srch; }; return '.' unless @srch; - $ts .= '..'; - my $opts = { asc => 1, limit => 1000, offset => 0 }; - $self->long_response(0, long_response_limit, sub { - my ($i) = @_; + my $prev = 0; + long_response($self, sub { my $srch = $srch[0]; - my $res = $srch->query($ts, $opts); - my $msgs = $res->{msgs}; - if (my $nr = scalar @$msgs) { + my $msgs = $srch->query_ts($ts, $prev); + if (scalar @$msgs) { more($self, '<' . join(">\r\n<", map { $_->mid } @$msgs ). '>'); - $opts->{offset} += $nr; + $prev = $msgs->[-1]->{num}; } else { shift @srch; if (@srch) { # continue onto next newsgroup - $opts->{offset} = 0; + $prev = 0; + return 1; } else { # break out of the long response. - $$i = long_response_limit; + return; } } }); @@ -380,7 +387,8 @@ sub cmd_last ($) { article_adj($_[0], -1) } sub cmd_post ($) { my ($self) = @_; my $ng = $self->{ng}; - $ng ? "440 mailto:$ng->{address} to post" : '440 posting not allowed' + $ng ? "440 mailto:$ng->{-primary_address} to post" + : '440 posting not allowed' } sub cmd_quit ($) { @@ -390,6 +398,29 @@ sub cmd_quit ($) { undef; } +sub header_append ($$$) { + my ($hdr, $k, $v) = @_; + my @v = $hdr->header($k); + foreach (@v) { + return if $v eq $_; + } + $hdr->header_set($k, @v, $v); +} + +sub set_nntp_headers { + my ($hdr, $ng, $n, $mid) = @_; + + # clobber some + $hdr->header_set('Newsgroups', $ng->{newsgroup}); + $hdr->header_set('Xref', xref($ng, $n)); + header_append($hdr, 'List-Post', "{-primary_address}>"); + if (my $url = $ng->base_url) { + $mid = mid_escape($mid); + header_append($hdr, 'Archived-At', "<$url$mid/>"); + header_append($hdr, 'List-Archive', "<$url>"); + } +} + sub art_lookup ($$$) { my ($self, $art, $set_headers) = @_; my $ng = $self->{ng}; @@ -426,20 +457,16 @@ find_mid: defined $mid or return $err; } found: - my $o = 'HEAD:' . mid2path($mid); - my $bytes; - my $s = eval { Email::MIME->new($ng->gcf->cat_file($o, \$bytes)) }; - return $err unless $s; - my $lines; + my $smsg = $ng->search->{over_ro}->get_art($n) or return $err; + my $msg = $ng->msg_by_smsg($smsg) or return $err; + my $s = Email::Simple->new($msg); if ($set_headers) { - $s->header_set('Newsgroups', $ng->{name}); - $s->header_set('Xref', xref($ng, $n)); - $lines = $s->body =~ tr!\n!\n!; + set_nntp_headers($s->header_obj, $ng, $n, $mid); # must be last $s->body_set('') if ($set_headers == 2); } - [ $n, $mid, $s, $bytes, $lines, $ng ]; + [ $n, $mid, $s, $smsg->bytes, $smsg->lines, $ng ]; } sub simple_body_write ($$) { @@ -458,32 +485,38 @@ sub set_art { $self->{article} = $art if defined $art && $art =~ /\A\d+\z/; } +sub _header ($) { + my $hdr = $_[0]->header_obj->as_string; + utf8::encode($hdr); + $hdr +} + sub cmd_article ($;$) { my ($self, $art) = @_; - my $r = $self->art_lookup($art, 1); + my $r = art_lookup($self, $art, 1); return $r unless ref $r; my ($n, $mid, $s) = @$r; set_art($self, $art); more($self, "220 $n <$mid> article retrieved - head and body follow"); - do_more($self, $s->header_obj->as_string); + do_more($self, _header($s)); do_more($self, "\r\n"); simple_body_write($self, $s); } sub cmd_head ($;$) { my ($self, $art) = @_; - my $r = $self->art_lookup($art, 2); + my $r = art_lookup($self, $art, 2); return $r unless ref $r; my ($n, $mid, $s) = @$r; set_art($self, $art); more($self, "221 $n <$mid> article retrieved - head follows"); - do_more($self, $s->header_obj->as_string); + do_more($self, _header($s)); '.' } sub cmd_body ($;$) { my ($self, $art) = @_; - my $r = $self->art_lookup($art, 0); + my $r = art_lookup($self, $art, 0); return $r unless ref $r; my ($n, $mid, $s) = @$r; set_art($self, $art); @@ -493,7 +526,7 @@ sub cmd_body ($;$) { sub cmd_stat ($;$) { my ($self, $art) = @_; - my $r = $self->art_lookup($art, 0); + my $r = art_lookup($self, $art, 0); return $r unless ref $r; my ($n, $mid, undef) = @$r; set_art($self, $art); @@ -531,18 +564,8 @@ sub get_range ($$) { [ $beg, $end ]; } -sub hdr_val ($$) { - my ($r, $header) = @_; - return $r->[3] if $header =~ /\A:?bytes\z/i; - return $r->[4] if $header =~ /\A:?lines\z/i; - $r = $r->[2]->header_obj->header($header); - defined $r or return; - $r =~ s/[\r\n\t]+/ /sg; - $r; -} - -sub long_response ($$$$) { - my ($self, $beg, $end, $cb) = @_; +sub long_response ($$) { + my ($self, $cb) = @_; die "BUG: nested long response" if $self->{long_res}; my $fd = $self->{fd}; @@ -553,23 +576,14 @@ sub long_response ($$$$) { $self->watch_read(0); my $t0 = now(); $self->{long_res} = sub { - # limit our own running time for fairness with other - # clients and to avoid buffering too much: - my $lim = 100; - - my $err; - do { - eval { $cb->(\$beg) }; - } until (($err = $@) || $self->{closed} || - ++$beg > $end || !--$lim || $self->{write_buf_size}); - - if ($err || $self->{closed}) { + my $more = eval { $cb->() }; + if ($@ || $self->{closed}) { $self->{long_res} = undef; - if ($err) { + if ($@) { err($self, - "$err during long response[$fd] - %0.6f", - now() - $t0); + "%s during long response[$fd] - %0.6f", + $@, now() - $t0); } if ($self->{closed}) { out($self, " deferred[$fd] aborted - %0.6f", @@ -578,13 +592,13 @@ sub long_response ($$$$) { update_idle_time($self); $self->watch_read(1); } - } elsif (!$lim || $self->{write_buf_size}) { + } elsif ($more) { # $self->{write_buf_size}: # no recursion, schedule another call ASAP # but only after all pending writes are done update_idle_time($self); - Danga::Socket->AddTimer(0, sub { - $self->write($self->{long_res}); - }); + + push @$nextq, $self; + $nextt ||= PublicInbox::EvCleanup::asap(*next_tick); } else { # all done! $self->{long_res} = undef; $self->watch_read(1); @@ -610,17 +624,20 @@ sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull. my $mm = $self->{ng}->mm; my ($beg, $end) = @$r; more($self, $xhdr ? r221 : r225); - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i); - more($self, "$$i <$mid>") if defined $mid; + long_response($self, sub { + my $r = $mm->msg_range(\$beg, $end); + @$r or return; + more($self, join("\r\n", map { + "$_->[0] <$_->[1]>" + } @$r)); + 1; }); } } sub xref ($$) { my ($ng, $n) = @_; - "$ng->{domain} $ng->{name}:$n" + "$ng->{domain} $ng->{newsgroup}:$n" } sub mid_lookup ($$) { @@ -653,27 +670,33 @@ sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin my $mm = $ng->mm; my ($beg, $end) = @$r; more($self, $xhdr ? r221 : r225); - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i); - more($self, "$$i ".xref($ng, $$i)) if defined $mid; + long_response($self, sub { + my $r = $mm->msg_range(\$beg, $end); + @$r or return; + more($self, join("\r\n", map { + # TODO: use $_->[1] (mid) to fill + # Xref: from other inboxes + my $num = $_->[0]; + "$num ".xref($ng, $num); + } @$r)); + 1; }); } } sub search_header_for { - my ($srch, $mid, $field) = @_; - my $smsg = $srch->lookup_message($mid) or return; - $smsg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); - $smsg->$field; + my ($srch, $num, $field) = @_; + my $smsg = $srch->{over_ro}->get_art($num) or return; + return PublicInbox::SearchMsg::date($smsg) if $field eq 'date'; + $smsg->{$field}; } sub hdr_searchmsg ($$$$) { my ($self, $xhdr, $field, $range) = @_; if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID my ($ng, $n) = mid_lookup($self, $1); - return r430 unless $n; - my $v = search_header_for($ng->search, $range, $field); + return r430 unless defined $n; + my $v = search_header_for($ng->search, $n, $field); hdr_mid_response($self, $xhdr, $ng, $n, $range, $v); } else { # numeric range $range = $self->{article} unless defined $range; @@ -683,20 +706,17 @@ sub hdr_searchmsg ($$$$) { return $r unless ref $r; my ($beg, $end) = @$r; more($self, $xhdr ? r221 : r225); - my $off = 0; - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $res = $srch->query_xover($beg, $end, $off); - my $msgs = $res->{msgs}; + my $cur = $beg; + long_response($self, sub { + my $msgs = $srch->query_xover($cur, $end); my $nr = scalar @$msgs or return; - $off += $nr; my $tmp = ''; foreach my $s (@$msgs) { $tmp .= $s->num . ' ' . $s->$field . "\r\n"; } + utf8::encode($tmp); do_more($self, $tmp); - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; + $cur = $msgs->[-1]->{num} + 1; }); } } @@ -770,11 +790,11 @@ sub cmd_xrover ($;$) { my $mm = $ng->mm; my $srch = $ng->search; more($self, '224 Overview information follows'); - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i) or return; - my $h = search_header_for($srch, $mid, 'references'); - more($self, "$$i $h"); + + long_response($self, sub { + my $h = search_header_for($srch, $beg, 'references'); + more($self, "$beg $h") if defined($h); + $beg++ < $end; }); } @@ -782,24 +802,25 @@ sub over_line ($$) { my ($num, $smsg) = @_; # n.b. field access and procedural calls can be # 10%-15% faster than OO method calls: - join("\t", $num, + my $s = join("\t", $num, $smsg->{subject}, $smsg->{from}, PublicInbox::SearchMsg::date($smsg), - '<'.PublicInbox::SearchMsg::mid($smsg).'>', + "<$smsg->{mid}>", $smsg->{references}, - PublicInbox::SearchMsg::bytes($smsg), - PublicInbox::SearchMsg::lines($smsg)); + $smsg->{bytes}, + $smsg->{lines}); + utf8::encode($s); + $s } sub cmd_over ($;$) { my ($self, $range) = @_; if ($range && $range =~ /\A<(.+)>\z/) { my ($ng, $n) = mid_lookup($self, $1); - my $smsg = $ng->search->lookup_message($range) or - return '430 No article with that message-id'; + defined $n or return r430; + my $smsg = $ng->search->{over_ro}->get_art($n) or return r430; more($self, '224 Overview information follows (multi-line)'); - $smsg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); # Only set article number column if it's the current group my $self_ng = $self->{ng}; @@ -819,21 +840,16 @@ sub cmd_xover ($;$) { my ($beg, $end) = @$r; more($self, "224 Overview information follows for $beg to $end"); my $srch = $self->{ng}->search; - my $off = 0; - $self->long_response($beg, $end, sub { - my ($i) = @_; - my $res = $srch->query_xover($beg, $end, $off); - my $msgs = $res->{msgs}; + my $cur = $beg; + long_response($self, sub { + my $msgs = $srch->query_xover($cur, $end); my $nr = scalar @$msgs or return; - $off += $nr; # OVERVIEW.FMT more($self, join("\r\n", map { - over_line(PublicInbox::SearchMsg::num($_), $_); + over_line($_->{num}, $_); } @$msgs)); - - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; + $cur = $msgs->[-1]->{num} + 1; }); } @@ -844,7 +860,7 @@ sub cmd_xpath ($$) { my @paths; foreach my $ng (values %{$self->{nntpd}->{groups}}) { my $n = $ng->mm->num_for($mid); - push @paths, "$ng->{name}/$n" if defined $n; + push @paths, "$ng->{newsgroup}/$n" if defined $n; } return '430 no such article on server' unless @paths; '223 '.join(' ', @paths); @@ -863,7 +879,7 @@ sub more ($$) { sub do_write ($$) { my ($self, $data) = @_; my $done = $self->write($data); - die if $self->{closed}; + return if $self->{closed}; # Do not watch for readability if we have data in the queue, # instead re-enable watching for readability when we can @@ -894,10 +910,10 @@ sub do_more ($$) { $data = substr($data, $n, $dlen - $n); } } - $self->do_write($data); + do_write($self, $data); } -# callbacks for by Danga::Socket +# callbacks for Danga::Socket sub event_hup { $_[0]->close } sub event_err { $_[0]->close } @@ -920,12 +936,13 @@ sub event_read { $self->{rbuf} .= $$buf; while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]+)\r?\n//) { my $line = $1; + return $self->close if $line =~ /[[:cntrl:]]/s; my $t0 = now(); my $fd = $self->{fd}; - $r = eval { $self->process_line($line) }; + $r = eval { process_line($self, $line) }; my $d = $self->{long_res} ? " deferred[$fd]" : ''; - out($self, "[$fd] $line - %0.6f$d", now() - $t0); + out($self, "[$fd] %s - %0.6f$d", $line, now() - $t0); } return $self->close if $r < 0; @@ -943,18 +960,25 @@ sub watch_read { # and we must double-check again by the time the timer fires # in case we really did dispatch a read event and started # another long response. - Danga::Socket->AddTimer(0, sub { - if (&Danga::Socket::POLLIN & $self->{event_watch}) { - $self->event_read; - } - }); + push @$nextq, $self; + $nextt ||= PublicInbox::EvCleanup::asap(*next_tick); } $rv; } -sub busy () { - my ($self) = @_; - ($self->{rbuf} ne '' || $self->{long_res} || $self->{write_buf_size}); +sub not_idle_long ($$) { + my ($self, $now) = @_; + defined(my $fd = $self->{fd}) or return; + my $ary = $EXPMAP->{$fd} or return; + my $exp_at = $ary->[0] + $EXPTIME; + $exp_at > $now; +} + +# for graceful shutdown in PublicInbox::Daemon: +sub busy { + my ($self, $now) = @_; + ($self->{rbuf} ne '' || $self->{long_res} || $self->{write_buf_size} || + not_idle_long($self, $now)); } 1;