X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FNNTP.pm;h=46398cd4896af40c4e550fb1e3c070c5f39d51b2;hb=305d728977def1df9ab57778f9cad9dd834ce73d;hp=12f74c3dd3fb24955aeec4bb16f70418615e4ebd;hpb=ea45e9f71588572a2f4b9299a86cedc3c8e9c72a;p=public-inbox.git diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 12f74c3d..46398cd4 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -1,18 +1,22 @@ -# Copyright (C) 2015-2019 all contributors +# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # # Each instance of this represents a NNTP client socket +# fields: +# nntpd: PublicInbox::NNTPD ref +# article: per-session current article number +# ng: PublicInbox::Inbox ref +# long_cb: long_response private data package PublicInbox::NNTP; use strict; -use warnings; -use base qw(PublicInbox::DS); -use fields qw(nntpd article ng long_cb); -use PublicInbox::MID qw(mid_escape); -use Email::Simple; +use parent qw(PublicInbox::DS); +use PublicInbox::MID qw(mid_escape $MID_EXTRACT); +use PublicInbox::Eml; use POSIX qw(strftime); use PublicInbox::DS qw(now); use Digest::SHA qw(sha1_hex); use Time::Local qw(timegm timelocal); +use PublicInbox::GitAsyncCat; use constant { LINE_MAX => 512, # RFC 977 section 2.3 r501 => '501 command syntax error', @@ -24,7 +28,7 @@ use constant { }; use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); use Errno qw(EAGAIN); - +my $ONE_MSGID = qr/\A$MID_EXTRACT\z/; my @OVERVIEW = qw(Subject From Date Message-ID References); my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') . "Xref:full\r\n"; @@ -38,23 +42,21 @@ NEWNEWS\r LIST ACTIVE ACTIVE.TIMES NEWSGROUPS OVERVIEW.FMT\r HDR\r OVER\r - -my $have_deflate; +COMPRESS DEFLATE\r sub greet ($) { $_[0]->write($_[0]->{nntpd}->{greet}) }; sub new ($$$) { my ($class, $sock, $nntpd) = @_; - my $self = fields::new($class); + my $self = bless { nntpd => $nntpd }, $class; my $ev = EPOLLIN; my $wbuf; - if (ref($sock) eq 'IO::Socket::SSL' && !$sock->accept_SSL) { + if ($sock->can('accept_SSL') && !$sock->accept_SSL) { return CORE::close($sock) if $! != EAGAIN; $ev = PublicInbox::TLS::epollbit(); $wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ]; } $self->SUPER::new($sock, $ev | EPOLLONESHOT); - $self->{nntpd} = $nntpd; if ($wbuf) { $self->{wbuf} = $wbuf; } else { @@ -76,7 +78,7 @@ sub args_ok ($$) { # returns 1 if we can continue, 0 if not due to buffered writes or disconnect sub process_line ($$) { my ($self, $l) = @_; - my ($req, @args) = split(/[ \t]/, $l); + my ($req, @args) = split(/[ \t]+/, $l); return 1 unless defined($req); # skip blank line $req = $self->can('cmd_'.lc($req)); return res($self, '500 command not recognized') unless $req; @@ -98,7 +100,7 @@ sub process_line ($$) { sub cmd_capabilities ($;$) { my ($self, undef) = @_; my $res = $CAPABILITIES; - if (ref($self->{sock}) ne 'IO::Socket::SSL' && + if (!$self->{sock}->can('accept_SSL') && $self->{nntpd}->{accept_tls}) { $res .= "STARTTLS\r\n"; } @@ -297,7 +299,7 @@ sub newnews_i { my $msgs = $over->query_ts($ts, $$prev); if (scalar @$msgs) { more($self, '<' . - join(">\r\n<", map { $_->mid } @$msgs ). + join(">\r\n<", map { $_->{mid} } @$msgs ). '>'); $$prev = $msgs->[-1]->{num}; } else { @@ -335,7 +337,9 @@ sub cmd_newnews ($$$$;$$) { sub cmd_group ($$) { my ($self, $group) = @_; my $no_such = '411 no such news group'; - my $ng = $self->{nntpd}->{groups}->{$group} or return $no_such; + my $nntpd = $self->{nntpd}; + my $ng = $nntpd->{groups}->{$group} or return $no_such; + $nntpd->idler_start; $self->{ng} = $ng; my ($min, $max) = $ng->mm->minmax; @@ -384,7 +388,7 @@ sub cmd_quit ($) { sub header_append ($$$) { my ($hdr, $k, $v) = @_; - my @v = $hdr->header($k); + my @v = $hdr->header_raw($k); foreach (@v) { return if $v eq $_; } @@ -407,8 +411,9 @@ sub xref ($$$$) { $ret; } -sub set_nntp_headers ($$$$$) { - my ($self, $hdr, $ng, $n, $mid) = @_; +sub set_nntp_headers ($$) { + my ($hdr, $smsg) = @_; + my ($mid) = $smsg->{mid}; # why? leafnode requires a Path: header for some inexplicable # reason. We'll fake the shortest one possible. @@ -417,31 +422,32 @@ sub set_nntp_headers ($$$$$) { # leafnode (and maybe other NNTP clients) have trouble dealing # with v2 messages which have multiple Message-IDs (either due # to our own content-based dedupe or buggy git-send-email versions). - my @mids = $hdr->header('Message-ID'); + my @mids = $hdr->header_raw('Message-ID'); if (scalar(@mids) > 1) { my $mid0 = "<$mid>"; $hdr->header_set('Message-ID', $mid0); - my @alt = $hdr->header('X-Alt-Message-ID'); + my @alt = $hdr->header_raw('X-Alt-Message-ID'); my %seen = map { $_ => 1 } (@alt, $mid0); push(@alt, grep { !$seen{$_}++ } @mids); $hdr->header_set('X-Alt-Message-ID', @alt); } # clobber some - my $xref = xref($self, $ng, $n, $mid); + my $ibx = $smsg->{-ibx}; + my $xref = xref($smsg->{nntp}, $ibx, $smsg->{num}, $mid); $hdr->header_set('Xref', $xref); $xref =~ s/:[0-9]+//g; $hdr->header_set('Newsgroups', (split(/ /, $xref, 2))[1]); - header_append($hdr, 'List-Post', "{-primary_address}>"); - if (my $url = $ng->base_url) { + header_append($hdr, 'List-Post', "{-primary_address}>"); + if (my $url = $ibx->base_url) { $mid = mid_escape($mid); header_append($hdr, 'Archived-At', "<$url$mid/>"); header_append($hdr, 'List-Archive', "<$url>"); } } -sub art_lookup ($$$) { - my ($self, $art, $set_headers) = @_; +sub art_lookup ($$) { + my ($self, $art) = @_; my $ng = $self->{ng}; my ($n, $mid); my $err; @@ -450,7 +456,7 @@ sub art_lookup ($$$) { $err = '423 no such article number in this group'; $n = int($art); goto find_mid; - } elsif ($art =~ /\A<([^>]+)>\z/) { + } elsif ($art =~ $ONE_MSGID) { $mid = $1; $err = r430; $n = $ng->mm->num_for($mid) if $ng; @@ -477,14 +483,8 @@ find_mid: } found: my $smsg = $ng->over->get_art($n) or return $err; - my $msg = $ng->msg_by_smsg($smsg) or return $err; - - # Email::Simple->new will modify $msg in-place as documented - # in its manpage, so what's left is the body and we won't need - # to call Email::Simple::body(), later - my $hdr = Email::Simple->new($msg)->header_obj; - set_nntp_headers($self, $hdr, $ng, $n, $mid) if $set_headers; - [ $n, $mid, $msg, $hdr ]; + $smsg->{-ibx} = $ng; + $smsg; } sub msg_body_write ($$) { @@ -495,7 +495,6 @@ sub msg_body_write ($$) { $$msg =~ s/(?msg_more($$msg); - '.' } sub set_art { @@ -503,60 +502,91 @@ sub set_art { $self->{article} = $art if defined $art && $art =~ /\A[0-9]+\z/; } -sub msg_hdr_write ($$$) { - my ($self, $hdr, $body_follows) = @_; - $hdr = $hdr->as_string; - utf8::encode($hdr); - $hdr =~ s/(?{hdr} // \(my $x = ''); + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $$hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + $$hdr =~ s/(?msg_more($hdr); + # are only a single line. + $$hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm; + $smsg->{nntp}->msg_more($$hdr); +} + +sub blob_cb { # called by git->cat_async via git_async_cat + my ($bref, $oid, $type, $size, $smsg) = @_; + my $self = $smsg->{nntp}; + my $code = $smsg->{nntp_code} // 220; + if (!defined($oid)) { + # it's possible to have TOCTOU if an admin runs + # public-inbox-(edit|purge), just move onto the next message + warn "E: $smsg->{blob} missing in $smsg->{-ibx}->{inboxdir}\n"; + return $self->requeue; + } elsif ($smsg->{blob} ne $oid) { + $self->close; + die "BUG: $smsg->{blob} != $oid"; + } + my $r = "$code $smsg->{num} <$smsg->{mid}> article retrieved - "; + my $eml = PublicInbox::Eml->new($bref); + if ($code == 220) { + more($self, $r .= 'head and body follow'); + msg_hdr_write($eml, $smsg); + $self->msg_more("\r\n"); + msg_body_write($self, $bref); + } elsif ($code == 221) { + more($self, $r .= 'head follows'); + msg_hdr_write($eml, $smsg); + } elsif ($code == 222) { + more($self, $r .= 'body follows'); + msg_body_write($self, $bref); + } else { + $self->close; + die "BUG: bad code: $r"; + } + $self->write(\".\r\n"); # flushes (includes ->zflush) + $self->requeue; } sub cmd_article ($;$) { my ($self, $art) = @_; - my $r = art_lookup($self, $art, 1); - return $r unless ref $r; - my ($n, $mid, $msg, $hdr) = @$r; + my $smsg = art_lookup($self, $art); + return $smsg unless ref $smsg; set_art($self, $art); - more($self, "220 $n <$mid> article retrieved - head and body follow"); - msg_hdr_write($self, $hdr, 1); - msg_body_write($self, $msg); + $smsg->{nntp} = $self; + ${git_async_cat($smsg->{-ibx}->git, $smsg->{blob}, \&blob_cb, $smsg)}; } sub cmd_head ($;$) { my ($self, $art) = @_; - my $r = art_lookup($self, $art, 2); - return $r unless ref $r; - my ($n, $mid, undef, $hdr) = @$r; + my $smsg = art_lookup($self, $art); + return $smsg unless ref $smsg; set_art($self, $art); - more($self, "221 $n <$mid> article retrieved - head follows"); - msg_hdr_write($self, $hdr, 0); - '.' + $smsg->{nntp} = $self; + $smsg->{nntp_code} = 221; + ${git_async_cat($smsg->{-ibx}->git, $smsg->{blob}, \&blob_cb, $smsg)}; } sub cmd_body ($;$) { my ($self, $art) = @_; - my $r = art_lookup($self, $art, 0); - return $r unless ref $r; - my ($n, $mid, $msg) = @$r; + my $smsg = art_lookup($self, $art); + return $smsg unless ref $smsg; set_art($self, $art); - more($self, "222 $n <$mid> article retrieved - body follows"); - msg_body_write($self, $msg); + $smsg->{nntp} = $self; + $smsg->{nntp_code} = 222; + ${git_async_cat($smsg->{-ibx}->git, $smsg->{blob}, \&blob_cb, $smsg)}; } sub cmd_stat ($;$) { my ($self, $art) = @_; - my $r = art_lookup($self, $art, 0); - return $r unless ref $r; - my ($n, $mid) = @$r; + my $smsg = art_lookup($self, $art); # art may be msgid + return $smsg unless ref $smsg; + $art = $smsg->{num}; set_art($self, $art); - "223 $n <$mid> article retrieved - request text separately"; + "223 $art <$smsg->{mid}> article retrieved - request text separately"; } sub cmd_ihave ($) { '435 article not wanted - do not send it' } @@ -653,7 +683,7 @@ sub hdr_msgid_range_i { sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull. my ($self, $xhdr, $range) = @_; - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID + if (defined $range && $range =~ $ONE_MSGID) { my ($ng, $n) = mid_lookup($self, $1); return r430 unless $n; hdr_mid_response($self, $xhdr, $ng, $n, $range, $range); @@ -696,7 +726,7 @@ sub xref_range_i { sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin my ($self, $xhdr, $range) = @_; - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID + if (defined $range && $range =~ $ONE_MSGID) { my $mid = $1; my ($ng, $n) = mid_lookup($self, $mid); return r430 unless $n; @@ -714,27 +744,35 @@ sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin sub over_header_for { my ($over, $num, $field) = @_; my $smsg = $over->get_art($num) or return; - return PublicInbox::SearchMsg::date($smsg) if $field eq 'date'; + return PublicInbox::Smsg::date($smsg) if $field eq 'date'; $smsg->{$field}; } -sub searchmsg_range_i { +sub smsg_range_i { my ($self, $beg, $end, $field) = @_; my $over = $self->{ng}->over; my $msgs = $over->query_xover($$beg, $end); scalar(@$msgs) or return; my $tmp = ''; - foreach my $s (@$msgs) { - $tmp .= $s->{num} . ' ' . $s->$field . "\r\n"; + + # ->{$field} is faster than ->$field invocations, so favor that. + if ($field eq 'date') { + for my $s (@$msgs) { + $tmp .= "$s->{num} ".PublicInbox::Smsg::date($s)."\r\n" + } + } else { + for my $s (@$msgs) { + $tmp .= "$s->{num} $s->{$field}\r\n"; + } } utf8::encode($tmp); $self->msg_more($tmp); $$beg = $msgs->[-1]->{num} + 1; } -sub hdr_searchmsg ($$$$) { +sub hdr_smsg ($$$$) { my ($self, $xhdr, $field, $range) = @_; - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID + if (defined $range && $range =~ $ONE_MSGID) { my ($ng, $n) = mid_lookup($self, $1); return r430 unless defined $n; my $v = over_header_for($ng->over, $n, $field); @@ -744,7 +782,7 @@ sub hdr_searchmsg ($$$$) { my $r = get_range($self, $range); return $r unless ref $r; more($self, $xhdr ? r221 : r225); - long_response($self, \&searchmsg_range_i, @$r, $field); + long_response($self, \&smsg_range_i, @$r, $field); } } @@ -757,9 +795,9 @@ sub do_hdr ($$$;$) { hdr_xref($self, $xhdr, $range); } elsif ($sub =~ /\A(?:subject|references|date|from|to|cc| bytes|lines)\z/x) { - hdr_searchmsg($self, $xhdr, $sub, $range); + hdr_smsg($self, $xhdr, $sub, $range); } elsif ($sub =~ /\A:(bytes|lines)\z/) { - hdr_searchmsg($self, $xhdr, $1, $range); + hdr_smsg($self, $xhdr, $1, $range); } else { $xhdr ? (r221 . "\r\n.") : "503 HDR not permitted on $header"; } @@ -831,7 +869,7 @@ sub over_line ($$$$) { my $s = join("\t", $num, $smsg->{subject}, $smsg->{from}, - PublicInbox::SearchMsg::date($smsg), + PublicInbox::Smsg::date($smsg), "<$smsg->{mid}>", $smsg->{references}, $smsg->{bytes}, @@ -843,7 +881,7 @@ sub over_line ($$$$) { sub cmd_over ($;$) { my ($self, $range) = @_; - if ($range && $range =~ /\A<(.+)>\z/) { + if ($range && $range =~ $ONE_MSGID) { my ($ng, $n) = mid_lookup($self, $1); defined $n or return r430; my $smsg = $ng->over->get_art($n) or return r430; @@ -888,7 +926,7 @@ sub cmd_starttls ($) { my ($self) = @_; my $sock = $self->{sock} or return; # RFC 4642 2.2.1 - return r502 if (ref($sock) eq 'IO::Socket::SSL' || $self->compressed); + return r502 if ($sock->can('accept_SSL') || $self->compressed); my $opt = $self->{nntpd}->{accept_tls} or return '580 can not initiate TLS negotiation'; res($self, '382 Continue with TLS negotiation'); @@ -901,7 +939,7 @@ sub cmd_starttls ($) { sub cmd_compress ($$) { my ($self, $alg) = @_; return '503 Only DEFLATE is supported' if uc($alg) ne 'DEFLATE'; - return r502 if $self->compressed || !$have_deflate; + return r502 if $self->compressed; PublicInbox::NNTPdeflate->enable($self); $self->requeue; undef @@ -911,7 +949,7 @@ sub zflush {} # overridden by NNTPdeflate sub cmd_xpath ($$) { my ($self, $mid) = @_; - return r501 unless $mid =~ /\A<(.+)>\z/; + return r501 unless $mid =~ $ONE_MSGID; $mid = $1; my @paths; foreach my $ng (values %{$self->{nntpd}->{groups}}) { @@ -948,38 +986,35 @@ sub out ($$;@) { sub event_step { my ($self) = @_; - return unless $self->flush_write && $self->{sock}; + return unless $self->flush_write && $self->{sock} && !$self->{long_cb}; $self->update_idle_time; # only read more requests if we've drained the write buffer, # otherwise we can be buffering infinitely w/o backpressure - my $rbuf = $self->{rbuf} // (\(my $x = '')); - my $r = 1; - - if (index($$rbuf, "\n") < 0) { - my $off = bytes::length($$rbuf); - $r = $self->do_read($rbuf, LINE_MAX, $off) or return; - } - while ($r > 0 && $$rbuf =~ s/\A[ \t]*([^\n]*?)\r?\n//) { - my $line = $1; - return $self->close if $line =~ /[[:cntrl:]]/s; - my $t0 = now(); - my $fd = fileno($self->{sock}); - $r = eval { process_line($self, $line) }; - my $pending = $self->{wbuf} ? ' pending' : ''; - out($self, "[$fd] %s - %0.6f$pending", $line, now() - $t0); + my $rbuf = $self->{rbuf} // \(my $x = ''); + my $line = index($$rbuf, "\n"); + while ($line < 0) { + return $self->close if length($$rbuf) >= LINE_MAX; + $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return; + $line = index($$rbuf, "\n"); } - + $line = substr($$rbuf, 0, $line + 1, ''); + $line =~ s/\r?\n\z//s; + return $self->close if $line =~ /[[:cntrl:]]/s; + + my $t0 = now(); + my $fd = fileno($self->{sock}); + my $r = eval { process_line($self, $line) }; + my $pending = $self->{wbuf} ? ' pending' : ''; + out($self, "[$fd] %s - %0.6f$pending", $line, now() - $t0); return $self->close if $r < 0; - my $len = bytes::length($$rbuf); - return $self->close if ($len >= LINE_MAX); $self->rbuf_idle($rbuf); $self->update_idle_time; # maybe there's more pipelined data, or we'll have # to register it for socket-readiness notifications - $self->requeue unless $self->{wbuf}; + $self->requeue unless $pending; } # for graceful shutdown in PublicInbox::Daemon: @@ -988,10 +1023,4 @@ sub busy { ($self->{rbuf} || $self->{wbuf} || $self->not_idle_long($now)); } -# this is an import to prevent "perl -c" from complaining about fields -sub import { - $have_deflate = eval { require PublicInbox::NNTPdeflate } and - $CAPABILITIES .= "COMPRESS DEFLATE\r\n"; -} - 1;