X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FNNTP.pm;h=c574c9e62290dc2dbea4085d866675983126c414;hb=119463b3b8517e5ec149198bb83588999118ee1d;hp=58b86a8297a5ba5e4a60941c5533277530e9235d;hpb=528230a0411897a14cbb79d0dae02ea89827bf1e;p=public-inbox.git diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 58b86a82..c574c9e6 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -1,5 +1,5 @@ -# Copyright (C) 2015 all contributors -# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# Copyright (C) 2015-2018 all contributors +# License: AGPL-3.0+ # # Each instance of this represents a NNTP client socket package PublicInbox::NNTP; @@ -9,15 +9,14 @@ use base qw(Danga::Socket); use fields qw(nntpd article rbuf ng long_res); use PublicInbox::Search; use PublicInbox::Msgmap; +use PublicInbox::MID qw(mid_escape); use PublicInbox::Git; -use PublicInbox::MID qw(mid2path); require PublicInbox::EvCleanup; use Email::Simple; use POSIX qw(strftime); use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); -use URI::Escape qw(uri_escape_utf8); -use Encode qw(find_encoding); -my $enc_utf8 = find_encoding('UTF-8'); +use Digest::SHA qw(sha1_hex); +use Time::Local qw(timegm timelocal); use constant { r501 => '501 command syntax error', r221 => '221 Header follows', @@ -41,8 +40,6 @@ my %DISABLED; # = map { $_ => 1 } qw(xover list_overview_fmt newnews xhdr); my $EXPMAP; # fd -> [ idle_time, $self ] my $expt; our $EXPTIME = 180; # 3 minutes -my $WEAKEN = {}; # string(nntpd) -> nntpd -my $weakt; my $nextt; my $nextq = []; @@ -63,19 +60,8 @@ sub next_tick () { sub update_idle_time ($) { my ($self) = @_; - my $tmp = $self->{sock} or return; - $tmp = fileno($tmp); - defined $tmp and $EXPMAP->{$tmp} = [ now(), $self ]; -} - -# reduce FD pressure by closing some "git cat-file --batch" processes -# and unused FDs for msgmap and Xapian indices -sub weaken_groups () { - $weakt = undef; - foreach my $nntpd (values %$WEAKEN) { - $_->weaken_all foreach (@{$nntpd->{grouplist}}); - } - $WEAKEN = {}; + my $fd = $self->{fd}; + defined $fd and $EXPMAP->{$fd} = [ now(), $self ]; } sub expire_old () { @@ -96,15 +82,11 @@ sub expire_old () { $EXPMAP = \%new; if ($nr) { $expt = PublicInbox::EvCleanup::later(*expire_old); - weaken_groups(); } else { $expt = undef; # noop to kick outselves out of the loop ASAP so descriptors # really get closed PublicInbox::EvCleanup::asap(sub {}); - - # grace period for reaping resources - $weakt ||= PublicInbox::EvCleanup::later(*weaken_groups); } } @@ -117,7 +99,6 @@ sub new ($$$) { $self->{rbuf} = ''; $self->watch_read(1); update_idle_time($self); - $WEAKEN->{"$nntpd"} = $nntpd; $expt ||= PublicInbox::EvCleanup::later(*expire_old); $self; } @@ -135,6 +116,7 @@ sub args_ok ($$) { sub process_line ($$) { my ($self, $l) = @_; my ($req, @args) = split(/\s+/, $l); + return unless defined($req); $req = lc($req); $req = eval { no strict 'refs'; @@ -146,6 +128,7 @@ sub process_line ($$) { my $res = eval { $req->($self, @args) }; my $err = $@; if ($err && !$self->{closed}) { + local $/ = "\n"; chomp($l); err($self, 'error from: %s (%s)', $l, $err); $res = '503 program fault - command not performed'; @@ -257,7 +240,6 @@ sub cmd_listgroup ($;$) { sub parse_time ($$;$) { my ($date, $time, $gmt) = @_; - use Time::Local qw(); my ($hh, $mm, $ss) = unpack('A2A2A2', $time); if (defined $gmt) { $gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt"; @@ -269,15 +251,15 @@ sub parse_time ($$;$) { ($YYYY, $MM, $DD) = unpack('A4A2A2', $date); } else { # legacy clients send YYMMDD ($YYYY, $MM, $DD) = unpack('A2A2A2', $date); - if ($YYYY > strftime('%y', @now)) { - my $cur_year = $now[5] + 1900; + my $cur_year = $now[5] + 1900; + if ($YYYY > $cur_year) { $YYYY += int($cur_year / 1000) * 1000 - 100; } } if ($gmt) { - Time::Local::timegm($ss, $mm, $hh, $DD, $MM - 1, $YYYY); + timegm($ss, $mm, $hh, $DD, $MM - 1, $YYYY); } else { - Time::Local::timelocal($ss, $mm, $hh, $DD, $MM - 1, $YYYY); + timelocal($ss, $mm, $hh, $DD, $MM - 1, $YYYY); } } @@ -306,7 +288,6 @@ sub wildmat2re (;$) { return $_[0] = qr/.*/ if (!defined $_[0] || $_[0] eq '*'); my %keep; my $salt = rand; - use Digest::SHA qw(sha1_hex); my $tmp = $_[0]; $tmp =~ s#(?query($ts, $opts); + my $res = $srch->query_ts($ts, $opts); my $msgs = $res->{msgs}; if (my $nr = scalar @$msgs) { more($self, '<' . @@ -441,7 +422,7 @@ sub set_nntp_headers { $hdr->header_set('Xref', xref($ng, $n)); header_append($hdr, 'List-Post', "{-primary_address}>"); if (my $url = $ng->base_url) { - $mid = uri_escape_utf8($mid); + $mid = mid_escape($mid); header_append($hdr, 'Archived-At', "<$url$mid/>"); header_append($hdr, 'List-Archive', "<$url>"); } @@ -483,10 +464,9 @@ find_mid: defined $mid or return $err; } found: - my $o = 'HEAD:' . mid2path($mid); my $bytes; - my $s = eval { Email::Simple->new($ng->gcf->cat_file($o, \$bytes)) }; - return $err unless $s; + my $s = eval { $ng->msg_by_mid($mid, \$bytes) } or return $err; + $s = Email::Simple->new($s); my $lines; if ($set_headers) { set_nntp_headers($s->header_obj, $ng, $n, $mid); @@ -514,6 +494,12 @@ sub set_art { $self->{article} = $art if defined $art && $art =~ /\A\d+\z/; } +sub _header ($) { + my $hdr = $_[0]->header_obj->as_string; + utf8::encode($hdr); + $hdr +} + sub cmd_article ($;$) { my ($self, $art) = @_; my $r = art_lookup($self, $art, 1); @@ -521,7 +507,7 @@ sub cmd_article ($;$) { my ($n, $mid, $s) = @$r; set_art($self, $art); more($self, "220 $n <$mid> article retrieved - head and body follow"); - do_more($self, $s->header_obj->as_string); + do_more($self, _header($s)); do_more($self, "\r\n"); simple_body_write($self, $s); } @@ -533,7 +519,7 @@ sub cmd_head ($;$) { my ($n, $mid, $s) = @$r; set_art($self, $art); more($self, "221 $n <$mid> article retrieved - head follows"); - do_more($self, $s->header_obj->as_string); + do_more($self, _header($s)); '.' } @@ -605,9 +591,10 @@ sub long_response ($$$$) { my $err; do { - eval { $cb->(\$beg) }; + eval { $cb->(\$beg, \$lim) }; } until (($err = $@) || $self->{closed} || - ++$beg > $end || !--$lim || $self->{write_buf_size}); + ++$beg > $end || --$lim < 0 || + $self->{write_buf_size}); if ($err || $self->{closed}) { $self->{long_res} = undef; @@ -624,7 +611,7 @@ sub long_response ($$$$) { update_idle_time($self); $self->watch_read(1); } - } elsif (!$lim || $self->{write_buf_size}) { + } elsif ($lim < 0 || $self->{write_buf_size}) { # no recursion, schedule another call ASAP # but only after all pending writes are done update_idle_time($self); @@ -709,8 +696,7 @@ sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin sub search_header_for { my ($srch, $mid, $field) = @_; - my $smsg = $srch->lookup_message($mid) or return; - $smsg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); + my $smsg = $srch->lookup_mail($mid) or return; $smsg->$field; } @@ -731,15 +717,17 @@ sub hdr_searchmsg ($$$$) { more($self, $xhdr ? r221 : r225); my $off = 0; long_response($self, $beg, $end, sub { - my ($i) = @_; + my ($i, $lim) = @_; my $res = $srch->query_xover($beg, $end, $off); my $msgs = $res->{msgs}; my $nr = scalar @$msgs or return; $off += $nr; + $$lim -= $nr; my $tmp = ''; foreach my $s (@$msgs) { $tmp .= $s->num . ' ' . $s->$field . "\r\n"; } + utf8::encode($tmp); do_more($self, $tmp); # -1 to adjust for implicit increment in long_response $$i = $nr ? $$i + $nr - 1 : long_response_limit; @@ -828,7 +816,7 @@ sub over_line ($$) { my ($num, $smsg) = @_; # n.b. field access and procedural calls can be # 10%-15% faster than OO method calls: - join("\t", $num, + my $s = join("\t", $num, $smsg->{subject}, $smsg->{from}, PublicInbox::SearchMsg::date($smsg), @@ -836,16 +824,17 @@ sub over_line ($$) { $smsg->{references}, PublicInbox::SearchMsg::bytes($smsg), PublicInbox::SearchMsg::lines($smsg)); + utf8::encode($s); + $s } sub cmd_over ($;$) { my ($self, $range) = @_; if ($range && $range =~ /\A<(.+)>\z/) { my ($ng, $n) = mid_lookup($self, $1); - my $smsg = $ng->search->lookup_message($range) or + my $smsg = $ng->search->lookup_mail($range) or return '430 No article with that message-id'; more($self, '224 Overview information follows (multi-line)'); - $smsg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); # Only set article number column if it's the current group my $self_ng = $self->{ng}; @@ -867,11 +856,12 @@ sub cmd_xover ($;$) { my $srch = $self->{ng}->search; my $off = 0; long_response($self, $beg, $end, sub { - my ($i) = @_; + my ($i, $lim) = @_; my $res = $srch->query_xover($beg, $end, $off); my $msgs = $res->{msgs}; my $nr = scalar @$msgs or return; $off += $nr; + $$lim -= $nr; # OVERVIEW.FMT more($self, join("\r\n", map { @@ -898,7 +888,6 @@ sub cmd_xpath ($$) { sub res ($$) { my ($self, $line) = @_; - $line = $enc_utf8->encode($line); do_write($self, $line . "\r\n"); } @@ -910,7 +899,7 @@ sub more ($$) { sub do_write ($$) { my ($self, $data) = @_; my $done = $self->write($data); - die if $self->{closed}; + return if $self->{closed}; # Do not watch for readability if we have data in the queue, # instead re-enable watching for readability when we can @@ -933,7 +922,6 @@ use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; sub do_more ($$) { my ($self, $data) = @_; - $data = $enc_utf8->encode($data); if (MSG_MORE && !$self->{write_buf_size}) { my $n = send($self->{sock}, $data, MSG_MORE); if (defined $n) { @@ -962,11 +950,13 @@ sub event_write { sub event_read { my ($self) = @_; use constant LINE_MAX => 512; # RFC 977 section 2.3 - my $r = 1; - my $buf = $self->read(LINE_MAX) or return $self->close; - $self->{rbuf} .= $$buf; - while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]+)\r?\n//) { + if (index($self->{rbuf}, "\n") < 0) { + my $buf = $self->read(LINE_MAX) or return $self->close; + $self->{rbuf} .= $$buf; + } + my $r = 1; + while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]*)\r?\n//) { my $line = $1; return $self->close if $line =~ /[[:cntrl:]]/s; my $t0 = now(); @@ -986,7 +976,7 @@ sub event_read { sub watch_read { my ($self, $bool) = @_; my $rv = $self->SUPER::watch_read($bool); - if ($bool && $self->{rbuf} ne '') { + if ($bool && index($self->{rbuf}, "\n") >= 0) { # Force another read if there is a pipelined request. # We don't know if the socket has anything for us to read, # and we must double-check again by the time the timer fires @@ -998,10 +988,19 @@ sub watch_read { $rv; } +sub not_idle_long ($$) { + my ($self, $now) = @_; + defined(my $fd = $self->{fd}) or return; + my $ary = $EXPMAP->{$fd} or return; + my $exp_at = $ary->[0] + $EXPTIME; + $exp_at > $now; +} + # for graceful shutdown in PublicInbox::Daemon: -sub busy () { - my ($self) = @_; - ($self->{rbuf} ne '' || $self->{long_res} || $self->{write_buf_size}); +sub busy { + my ($self, $now) = @_; + ($self->{rbuf} ne '' || $self->{long_res} || $self->{write_buf_size} || + not_idle_long($self, $now)); } 1;