X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FNNTP.pm;h=6df19f322b6e03aaf91ba76f61a524d047be7604;hb=0e6ceff37fc38f28a1520d7475f31d47f74ec7e6;hp=12f74c3dd3fb24955aeec4bb16f70418615e4ebd;hpb=ea45e9f71588572a2f4b9299a86cedc3c8e9c72a;p=public-inbox.git
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 12f74c3d..6df19f32 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2019 all contributors
+# Copyright (C) 2015-2020 all contributors
# License: AGPL-3.0+
#
# Each instance of this represents a NNTP client socket
@@ -7,12 +7,13 @@ use strict;
use warnings;
use base qw(PublicInbox::DS);
use fields qw(nntpd article ng long_cb);
-use PublicInbox::MID qw(mid_escape);
-use Email::Simple;
+use PublicInbox::MID qw(mid_escape $MID_EXTRACT);
+use PublicInbox::Eml;
use POSIX qw(strftime);
use PublicInbox::DS qw(now);
use Digest::SHA qw(sha1_hex);
use Time::Local qw(timegm timelocal);
+use PublicInbox::GitAsyncCat;
use constant {
LINE_MAX => 512, # RFC 977 section 2.3
r501 => '501 command syntax error',
@@ -24,7 +25,7 @@ use constant {
};
use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
use Errno qw(EAGAIN);
-
+my $ONE_MSGID = qr/\A$MID_EXTRACT\z/;
my @OVERVIEW = qw(Subject From Date Message-ID References);
my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') .
"Xref:full\r\n";
@@ -38,8 +39,7 @@ NEWNEWS\r
LIST ACTIVE ACTIVE.TIMES NEWSGROUPS OVERVIEW.FMT\r
HDR\r
OVER\r
-
-my $have_deflate;
+COMPRESS DEFLATE\r
sub greet ($) { $_[0]->write($_[0]->{nntpd}->{greet}) };
@@ -48,7 +48,7 @@ sub new ($$$) {
my $self = fields::new($class);
my $ev = EPOLLIN;
my $wbuf;
- if (ref($sock) eq 'IO::Socket::SSL' && !$sock->accept_SSL) {
+ if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
return CORE::close($sock) if $! != EAGAIN;
$ev = PublicInbox::TLS::epollbit();
$wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ];
@@ -76,7 +76,7 @@ sub args_ok ($$) {
# returns 1 if we can continue, 0 if not due to buffered writes or disconnect
sub process_line ($$) {
my ($self, $l) = @_;
- my ($req, @args) = split(/[ \t]/, $l);
+ my ($req, @args) = split(/[ \t]+/, $l);
return 1 unless defined($req); # skip blank line
$req = $self->can('cmd_'.lc($req));
return res($self, '500 command not recognized') unless $req;
@@ -98,7 +98,7 @@ sub process_line ($$) {
sub cmd_capabilities ($;$) {
my ($self, undef) = @_;
my $res = $CAPABILITIES;
- if (ref($self->{sock}) ne 'IO::Socket::SSL' &&
+ if (!$self->{sock}->can('accept_SSL') &&
$self->{nntpd}->{accept_tls}) {
$res .= "STARTTLS\r\n";
}
@@ -297,7 +297,7 @@ sub newnews_i {
my $msgs = $over->query_ts($ts, $$prev);
if (scalar @$msgs) {
more($self, '<' .
- join(">\r\n<", map { $_->mid } @$msgs ).
+ join(">\r\n<", map { $_->{mid} } @$msgs ).
'>');
$$prev = $msgs->[-1]->{num};
} else {
@@ -335,7 +335,9 @@ sub cmd_newnews ($$$$;$$) {
sub cmd_group ($$) {
my ($self, $group) = @_;
my $no_such = '411 no such news group';
- my $ng = $self->{nntpd}->{groups}->{$group} or return $no_such;
+ my $nntpd = $self->{nntpd};
+ my $ng = $nntpd->{groups}->{$group} or return $no_such;
+ $nntpd->idler_start;
$self->{ng} = $ng;
my ($min, $max) = $ng->mm->minmax;
@@ -384,7 +386,7 @@ sub cmd_quit ($) {
sub header_append ($$$) {
my ($hdr, $k, $v) = @_;
- my @v = $hdr->header($k);
+ my @v = $hdr->header_raw($k);
foreach (@v) {
return if $v eq $_;
}
@@ -407,8 +409,9 @@ sub xref ($$$$) {
$ret;
}
-sub set_nntp_headers ($$$$$) {
- my ($self, $hdr, $ng, $n, $mid) = @_;
+sub set_nntp_headers ($$$) {
+ my ($self, $hdr, $smsg) = @_;
+ my ($mid) = $smsg->{mid};
# why? leafnode requires a Path: header for some inexplicable
# reason. We'll fake the shortest one possible.
@@ -417,18 +420,19 @@ sub set_nntp_headers ($$$$$) {
# leafnode (and maybe other NNTP clients) have trouble dealing
# with v2 messages which have multiple Message-IDs (either due
# to our own content-based dedupe or buggy git-send-email versions).
- my @mids = $hdr->header('Message-ID');
+ my @mids = $hdr->header_raw('Message-ID');
if (scalar(@mids) > 1) {
my $mid0 = "<$mid>";
$hdr->header_set('Message-ID', $mid0);
- my @alt = $hdr->header('X-Alt-Message-ID');
+ my @alt = $hdr->header_raw('X-Alt-Message-ID');
my %seen = map { $_ => 1 } (@alt, $mid0);
push(@alt, grep { !$seen{$_}++ } @mids);
$hdr->header_set('X-Alt-Message-ID', @alt);
}
# clobber some
- my $xref = xref($self, $ng, $n, $mid);
+ my $ng = $self->{ng};
+ my $xref = xref($self, $ng, $smsg->{num}, $mid);
$hdr->header_set('Xref', $xref);
$xref =~ s/:[0-9]+//g;
$hdr->header_set('Newsgroups', (split(/ /, $xref, 2))[1]);
@@ -440,8 +444,8 @@ sub set_nntp_headers ($$$$$) {
}
}
-sub art_lookup ($$$) {
- my ($self, $art, $set_headers) = @_;
+sub art_lookup ($$) {
+ my ($self, $art) = @_;
my $ng = $self->{ng};
my ($n, $mid);
my $err;
@@ -450,7 +454,7 @@ sub art_lookup ($$$) {
$err = '423 no such article number in this group';
$n = int($art);
goto find_mid;
- } elsif ($art =~ /\A<([^>]+)>\z/) {
+ } elsif ($art =~ $ONE_MSGID) {
$mid = $1;
$err = r430;
$n = $ng->mm->num_for($mid) if $ng;
@@ -477,14 +481,7 @@ find_mid:
}
found:
my $smsg = $ng->over->get_art($n) or return $err;
- my $msg = $ng->msg_by_smsg($smsg) or return $err;
-
- # Email::Simple->new will modify $msg in-place as documented
- # in its manpage, so what's left is the body and we won't need
- # to call Email::Simple::body(), later
- my $hdr = Email::Simple->new($msg)->header_obj;
- set_nntp_headers($self, $hdr, $ng, $n, $mid) if $set_headers;
- [ $n, $mid, $msg, $hdr ];
+ $smsg;
}
sub msg_body_write ($$) {
@@ -495,7 +492,6 @@ sub msg_body_write ($$) {
$$msg =~ s/(?msg_more($$msg);
- '.'
}
sub set_art {
@@ -504,55 +500,88 @@ sub set_art {
}
sub msg_hdr_write ($$$) {
- my ($self, $hdr, $body_follows) = @_;
- $hdr = $hdr->as_string;
- utf8::encode($hdr);
- $hdr =~ s/(?{hdr} // \(my $x = '');
+ # fixup old bug from import (pre-a0c07cba0e5d8b6a)
+ $$hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+ $$hdr =~ s/(?msg_more($hdr);
+ # are only a single line.
+ $$hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm;
+ $self->msg_more($$hdr);
+}
+
+sub blob_cb { # called by git->cat_async via git_async_cat
+ my ($bref, $oid, $type, $size, $smsg) = @_;
+ my $self = $smsg->{nntp};
+ my $code = $smsg->{nntp_code} // 220;
+ if (!defined($oid)) {
+ # it's possible to have TOCTOU if an admin runs
+ # public-inbox-(edit|purge), just move onto the next message
+ return $self->requeue;
+ } elsif ($smsg->{blob} ne $oid) {
+ $self->close;
+ die "BUG: $smsg->{blob} != $oid";
+ }
+ my $r = "$code $smsg->{num} <$smsg->{mid}> article retrieved - ";
+ my $eml = PublicInbox::Eml->new($bref);
+ if ($code == 220) {
+ more($self, $r .= 'head and body follow');
+ msg_hdr_write($self, $eml, $smsg);
+ $self->msg_more("\r\n");
+ msg_body_write($self, $bref);
+ } elsif ($code == 221) {
+ more($self, $r .= 'head follows');
+ msg_hdr_write($self, $eml, $smsg);
+ } elsif ($code == 222) {
+ more($self, $r .= 'body follows');
+ msg_body_write($self, $bref);
+ } else {
+ $self->close;
+ die "BUG: bad code: $r";
+ }
+ $self->write(\".\r\n"); # flushes (includes ->zflush)
+ $self->requeue;
}
sub cmd_article ($;$) {
my ($self, $art) = @_;
- my $r = art_lookup($self, $art, 1);
- return $r unless ref $r;
- my ($n, $mid, $msg, $hdr) = @$r;
+ my $smsg = art_lookup($self, $art);
+ return $smsg unless ref $smsg;
set_art($self, $art);
- more($self, "220 $n <$mid> article retrieved - head and body follow");
- msg_hdr_write($self, $hdr, 1);
- msg_body_write($self, $msg);
+ $smsg->{nntp} = $self;
+ git_async_cat($self->{ng}->git, $smsg->{blob}, \&blob_cb, $smsg);
+ undef;
}
sub cmd_head ($;$) {
my ($self, $art) = @_;
- my $r = art_lookup($self, $art, 2);
- return $r unless ref $r;
- my ($n, $mid, undef, $hdr) = @$r;
+ my $smsg = art_lookup($self, $art);
+ return $smsg unless ref $smsg;
set_art($self, $art);
- more($self, "221 $n <$mid> article retrieved - head follows");
- msg_hdr_write($self, $hdr, 0);
- '.'
+ $smsg->{nntp} = $self;
+ $smsg->{nntp_code} = 221;
+ git_async_cat($self->{ng}->git, $smsg->{blob}, \&blob_cb, $smsg);
+ undef;
}
sub cmd_body ($;$) {
my ($self, $art) = @_;
- my $r = art_lookup($self, $art, 0);
- return $r unless ref $r;
- my ($n, $mid, $msg) = @$r;
+ my $smsg = art_lookup($self, $art);
+ return $smsg unless ref $smsg;
set_art($self, $art);
- more($self, "222 $n <$mid> article retrieved - body follows");
- msg_body_write($self, $msg);
+ $smsg->{nntp} = $self;
+ $smsg->{nntp_code} = 222;
+ git_async_cat($self->{ng}->git, $smsg->{blob}, \&blob_cb, $smsg);
+ undef;
}
sub cmd_stat ($;$) {
my ($self, $art) = @_;
- my $r = art_lookup($self, $art, 0);
+ my $r = art_lookup($self, $art);
return $r unless ref $r;
my ($n, $mid) = @$r;
set_art($self, $art);
@@ -653,7 +682,7 @@ sub hdr_msgid_range_i {
sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull.
my ($self, $xhdr, $range) = @_;
- if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID
+ if (defined $range && $range =~ $ONE_MSGID) {
my ($ng, $n) = mid_lookup($self, $1);
return r430 unless $n;
hdr_mid_response($self, $xhdr, $ng, $n, $range, $range);
@@ -696,7 +725,7 @@ sub xref_range_i {
sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin
my ($self, $xhdr, $range) = @_;
- if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID
+ if (defined $range && $range =~ $ONE_MSGID) {
my $mid = $1;
my ($ng, $n) = mid_lookup($self, $mid);
return r430 unless $n;
@@ -714,27 +743,35 @@ sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin
sub over_header_for {
my ($over, $num, $field) = @_;
my $smsg = $over->get_art($num) or return;
- return PublicInbox::SearchMsg::date($smsg) if $field eq 'date';
+ return PublicInbox::Smsg::date($smsg) if $field eq 'date';
$smsg->{$field};
}
-sub searchmsg_range_i {
+sub smsg_range_i {
my ($self, $beg, $end, $field) = @_;
my $over = $self->{ng}->over;
my $msgs = $over->query_xover($$beg, $end);
scalar(@$msgs) or return;
my $tmp = '';
- foreach my $s (@$msgs) {
- $tmp .= $s->{num} . ' ' . $s->$field . "\r\n";
+
+ # ->{$field} is faster than ->$field invocations, so favor that.
+ if ($field eq 'date') {
+ for my $s (@$msgs) {
+ $tmp .= "$s->{num} ".PublicInbox::Smsg::date($s)."\r\n"
+ }
+ } else {
+ for my $s (@$msgs) {
+ $tmp .= "$s->{num} $s->{$field}\r\n";
+ }
}
utf8::encode($tmp);
$self->msg_more($tmp);
$$beg = $msgs->[-1]->{num} + 1;
}
-sub hdr_searchmsg ($$$$) {
+sub hdr_smsg ($$$$) {
my ($self, $xhdr, $field, $range) = @_;
- if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID
+ if (defined $range && $range =~ $ONE_MSGID) {
my ($ng, $n) = mid_lookup($self, $1);
return r430 unless defined $n;
my $v = over_header_for($ng->over, $n, $field);
@@ -744,7 +781,7 @@ sub hdr_searchmsg ($$$$) {
my $r = get_range($self, $range);
return $r unless ref $r;
more($self, $xhdr ? r221 : r225);
- long_response($self, \&searchmsg_range_i, @$r, $field);
+ long_response($self, \&smsg_range_i, @$r, $field);
}
}
@@ -757,9 +794,9 @@ sub do_hdr ($$$;$) {
hdr_xref($self, $xhdr, $range);
} elsif ($sub =~ /\A(?:subject|references|date|from|to|cc|
bytes|lines)\z/x) {
- hdr_searchmsg($self, $xhdr, $sub, $range);
+ hdr_smsg($self, $xhdr, $sub, $range);
} elsif ($sub =~ /\A:(bytes|lines)\z/) {
- hdr_searchmsg($self, $xhdr, $1, $range);
+ hdr_smsg($self, $xhdr, $1, $range);
} else {
$xhdr ? (r221 . "\r\n.") : "503 HDR not permitted on $header";
}
@@ -831,7 +868,7 @@ sub over_line ($$$$) {
my $s = join("\t", $num,
$smsg->{subject},
$smsg->{from},
- PublicInbox::SearchMsg::date($smsg),
+ PublicInbox::Smsg::date($smsg),
"<$smsg->{mid}>",
$smsg->{references},
$smsg->{bytes},
@@ -843,7 +880,7 @@ sub over_line ($$$$) {
sub cmd_over ($;$) {
my ($self, $range) = @_;
- if ($range && $range =~ /\A<(.+)>\z/) {
+ if ($range && $range =~ $ONE_MSGID) {
my ($ng, $n) = mid_lookup($self, $1);
defined $n or return r430;
my $smsg = $ng->over->get_art($n) or return r430;
@@ -888,7 +925,7 @@ sub cmd_starttls ($) {
my ($self) = @_;
my $sock = $self->{sock} or return;
# RFC 4642 2.2.1
- return r502 if (ref($sock) eq 'IO::Socket::SSL' || $self->compressed);
+ return r502 if ($sock->can('accept_SSL') || $self->compressed);
my $opt = $self->{nntpd}->{accept_tls} or
return '580 can not initiate TLS negotiation';
res($self, '382 Continue with TLS negotiation');
@@ -901,7 +938,7 @@ sub cmd_starttls ($) {
sub cmd_compress ($$) {
my ($self, $alg) = @_;
return '503 Only DEFLATE is supported' if uc($alg) ne 'DEFLATE';
- return r502 if $self->compressed || !$have_deflate;
+ return r502 if $self->compressed;
PublicInbox::NNTPdeflate->enable($self);
$self->requeue;
undef
@@ -911,7 +948,7 @@ sub zflush {} # overridden by NNTPdeflate
sub cmd_xpath ($$) {
my ($self, $mid) = @_;
- return r501 unless $mid =~ /\A<(.+)>\z/;
+ return r501 unless $mid =~ $ONE_MSGID;
$mid = $1;
my @paths;
foreach my $ng (values %{$self->{nntpd}->{groups}}) {
@@ -948,38 +985,35 @@ sub out ($$;@) {
sub event_step {
my ($self) = @_;
- return unless $self->flush_write && $self->{sock};
+ return unless $self->flush_write && $self->{sock} && !$self->{long_cb};
$self->update_idle_time;
# only read more requests if we've drained the write buffer,
# otherwise we can be buffering infinitely w/o backpressure
- my $rbuf = $self->{rbuf} // (\(my $x = ''));
- my $r = 1;
-
- if (index($$rbuf, "\n") < 0) {
- my $off = bytes::length($$rbuf);
- $r = $self->do_read($rbuf, LINE_MAX, $off) or return;
- }
- while ($r > 0 && $$rbuf =~ s/\A[ \t]*([^\n]*?)\r?\n//) {
- my $line = $1;
- return $self->close if $line =~ /[[:cntrl:]]/s;
- my $t0 = now();
- my $fd = fileno($self->{sock});
- $r = eval { process_line($self, $line) };
- my $pending = $self->{wbuf} ? ' pending' : '';
- out($self, "[$fd] %s - %0.6f$pending", $line, now() - $t0);
+ my $rbuf = $self->{rbuf} // \(my $x = '');
+ my $line = index($$rbuf, "\n");
+ while ($line < 0) {
+ return $self->close if length($$rbuf) >= LINE_MAX;
+ $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return;
+ $line = index($$rbuf, "\n");
}
-
+ $line = substr($$rbuf, 0, $line + 1, '');
+ $line =~ s/\r?\n\z//s;
+ return $self->close if $line =~ /[[:cntrl:]]/s;
+
+ my $t0 = now();
+ my $fd = fileno($self->{sock});
+ my $r = eval { process_line($self, $line) };
+ my $pending = $self->{wbuf} ? ' pending' : '';
+ out($self, "[$fd] %s - %0.6f$pending", $line, now() - $t0);
return $self->close if $r < 0;
- my $len = bytes::length($$rbuf);
- return $self->close if ($len >= LINE_MAX);
$self->rbuf_idle($rbuf);
$self->update_idle_time;
# maybe there's more pipelined data, or we'll have
# to register it for socket-readiness notifications
- $self->requeue unless $self->{wbuf};
+ $self->requeue unless $pending;
}
# for graceful shutdown in PublicInbox::Daemon:
@@ -988,10 +1022,4 @@ sub busy {
($self->{rbuf} || $self->{wbuf} || $self->not_idle_long($now));
}
-# this is an import to prevent "perl -c" from complaining about fields
-sub import {
- $have_deflate = eval { require PublicInbox::NNTPdeflate } and
- $CAPABILITIES .= "COMPRESS DEFLATE\r\n";
-}
-
1;