#
# This license differs from the rest of public-inbox
#
-# This is a fork of the (for now) unmaintained Danga::Socket 1.61.
-# Unused features will be removed, and updates will be made to take
-# advantage of newer kernels.
+# This is a fork of the unmaintained Danga::Socket (1.61) with
+# significant changes. See Documentation/technical/ds.txt in our
+# source for details.
#
-# API changes to diverge from Danga::Socket will happen to better
-# accomodate new features and improve scalability. Do not expect
-# this to be a stable API like Danga::Socket.
-# Bugs encountered (and likely fixed) are reported to
-# bug-Danga-Socket@rt.cpan.org and visible at:
+# Do not expect this to be a stable API like Danga::Socket,
+# but it will evolve to suite our needs and to take advantage of
+# newer Linux and *BSD features.
+# Bugs encountered were reported to bug-Danga-Socket@rt.cpan.org,
+# fixed in Danga::Socket 1.62 and visible at:
# https://rt.cpan.org/Public/Dist/Display.html?Name=Danga-Socket
package PublicInbox::DS;
use strict;
use bytes;
-use POSIX ();
+use POSIX qw(WNOHANG);
use IO::Handle qw();
-use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD SEEK_SET);
+use Fcntl qw(SEEK_SET :DEFAULT);
use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
use parent qw(Exporter);
our @EXPORT_OK = qw(now msg_more);
use warnings;
use 5.010_001;
+use Scalar::Util qw(blessed);
use PublicInbox::Syscall qw(:epoll);
+use PublicInbox::Tmpfile;
use fields ('sock', # underlying socket
+ 'rbuf', # scalarref, usually undef
'wbuf', # arrayref of coderefs or GLOB refs
'wbuf_off', # offset into first element of wbuf to start writing at
);
-use Errno qw(EAGAIN EINVAL);
-use Carp qw(croak confess carp);
-use File::Temp qw(tempfile);
-
-our $HAVE_KQUEUE = eval { require IO::KQueue; IO::KQueue->import; 1 };
+use Errno qw(EAGAIN EINVAL);
+use Carp qw(confess carp);
+my $nextq; # queue for next_tick
+my $WaitPids; # list of [ pid, callback, callback_arg ]
+my $later_queue; # callbacks
+my $EXPMAP; # fd -> [ idle_time, $self ]
+our $EXPTIME = 180; # 3 minutes
+my ($later_timer, $reap_timer, $exp_timer);
our (
- $HaveEpoll, # Flag -- is epoll available? initially undefined.
- $HaveKQueue,
%DescriptorMap, # fd (num) -> PublicInbox::DS object
- $Epoll, # Global epoll fd (for epoll mode only)
- $KQueue, # Global kqueue fd ref (for kqueue mode only)
+ $Epoll, # Global epoll fd (or DSKQXS ref)
$_io, # IO::Handle for Epoll
@ToClose, # sockets to close when event loop is done
$LoopTimeout, # timeout of event loop in milliseconds
$DoneInit, # if we've done the one-time module init yet
@Timers, # timers
+ $in_loop,
);
Reset();
=cut
sub Reset {
%DescriptorMap = ();
+ $nextq = [];
+ $WaitPids = [];
+ $later_queue = [];
+ $EXPMAP = {};
+ $reap_timer = $later_timer = $exp_timer = undef;
@ToClose = ();
$LoopTimeout = -1; # no timeout by default
@Timers = ();
$PostLoopCallback = undef;
$DoneInit = 0;
- # NOTE kqueue is close-on-fork, and we don't account for it, yet
- # OTOH, we (public-inbox) don't need this sub outside of tests...
- POSIX::close($$KQueue) if !$_io && $KQueue && $$KQueue >= 0;
- $KQueue = undef;
-
- $_io = undef; # close $Epoll
- $Epoll = undef;
+ $_io = undef; # closes real $Epoll FD
+ $Epoll = undef; # may call DSKQXS::DESTROY
*EventLoop = *FirstTimeEventLoop;
}
return $LoopTimeout = $_[1] + 0;
}
-=head2 C<< CLASS->AddTimer( $seconds, $coderef ) >>
+=head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef ) >>
Add a timer to occur $seconds from now. $seconds may be fractional, but timers
are not guaranteed to fire at the exact time you ask for.
-Returns a timer object which you can call C<< $timer->cancel >> on if you need to.
+Returns a timer object which you can call C<< $timer->cancel >> on if you need
+to.
=cut
-sub AddTimer {
- my ($class, $secs, $coderef) = @_;
-
- if (!$secs) {
- my $timer = bless([0, $coderef], 'PublicInbox::DS::Timer');
- unshift(@Timers, $timer);
- return $timer;
- }
+sub add_timer ($$) {
+ my ($secs, $coderef) = @_;
my $fire_time = now() + $secs;
return if $DoneInit;
$DoneInit = 1;
- if ($HAVE_KQUEUE) {
- $KQueue = IO::KQueue->new();
- $HaveKQueue = defined $KQueue;
- if ($HaveKQueue) {
- *EventLoop = *KQueueEventLoop;
- }
- }
- elsif (PublicInbox::Syscall::epoll_defined()) {
- $Epoll = eval { epoll_create(1024); };
- $HaveEpoll = defined $Epoll && $Epoll >= 0;
- if ($HaveEpoll) {
- set_cloexec($Epoll);
- *EventLoop = *EpollEventLoop;
+ if (PublicInbox::Syscall::epoll_defined()) {
+ $Epoll = epoll_create();
+ set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0);
+ } else {
+ my $cls;
+ for (qw(DSKQXS DSPoll)) {
+ $cls = "PublicInbox::$_";
+ last if eval "require $cls";
}
+ $cls->import(qw(epoll_ctl epoll_wait));
+ $Epoll = $cls->new;
}
+ *EventLoop = *EpollEventLoop;
}
=head2 C<< CLASS->EventLoop() >>
_InitPoller();
- if ($HaveEpoll) {
- EpollEventLoop($class);
- } elsif ($HaveKQueue) {
- KQueueEventLoop($class);
- }
+ EventLoop($class);
}
sub now () { clock_gettime(CLOCK_MONOTONIC) }
+sub next_tick () {
+ my $q = $nextq;
+ $nextq = [];
+ for (@$q) {
+ # we avoid "ref" on blessed refs to workaround a Perl 5.16.3 leak:
+ # https://rt.perl.org/Public/Bug/Display.html?id=114340
+ if (blessed($_)) {
+ $_->event_step;
+ } else {
+ $_->();
+ }
+ }
+}
+
# runs timers and returns milliseconds for next one, or next event loop
sub RunTimers {
- return $LoopTimeout unless @Timers;
+ next_tick();
+
+ return ((@$nextq || @ToClose) ? 0 : $LoopTimeout) unless @Timers;
my $now = now();
$to_run->[1]->($now) if $to_run->[1];
}
+ # timers may enqueue into nextq:
+ return 0 if (@$nextq || @ToClose);
+
return $LoopTimeout unless @Timers;
# convert time to an even number of milliseconds, adding 1
return $timeout;
}
-### The epoll-based event loop. Gets installed as EventLoop if IO::Epoll loads
-### okay.
-sub EpollEventLoop {
- my $class = shift;
+# We can't use waitpid(-1) safely here since it can hit ``, system(),
+# and other things. So we scan the $WaitPids list, which is hopefully
+# not too big.
+sub reap_pids {
+ my $tmp = $WaitPids;
+ $WaitPids = [];
+ $reap_timer = undef;
+ foreach my $ary (@$tmp) {
+ my ($pid, $cb, $arg) = @$ary;
+ my $ret = waitpid($pid, WNOHANG);
+ if ($ret == 0) {
+ push @$WaitPids, $ary;
+ } elsif ($cb) {
+ eval { $cb->($arg, $pid) };
+ }
+ }
+ if (@$WaitPids) {
+ # we may not be donea, and we may miss our
+ $reap_timer = add_timer(1, \&reap_pids);
+ }
+}
- while (1) {
+# reentrant SIGCHLD handler (since reap_pids is not reentrant)
+sub enqueue_reap ($) { push @$nextq, \&reap_pids };
+
+sub in_loop () { $in_loop }
+
+sub EpollEventLoop {
+ local $in_loop = 1;
+ do {
my @events;
my $i;
my $timeout = RunTimers();
# in that event.
$DescriptorMap{$events[$i]->[0]}->event_step;
}
- return unless PostEventLoop();
- }
-}
-
-### The kqueue-based event loop. Gets installed as EventLoop if IO::KQueue works
-### okay.
-sub KQueueEventLoop {
- my $class = shift;
-
- while (1) {
- my $timeout = RunTimers();
- my @ret = eval { $KQueue->kevent($timeout) };
- if (my $err = $@) {
- # workaround https://rt.cpan.org/Ticket/Display.html?id=116615
- if ($err =~ /Interrupted system call/) {
- @ret = ();
- } else {
- die $err;
- }
- }
-
- foreach my $kev (@ret) {
- $DescriptorMap{$kev->[0]}->event_step;
- }
- return unless PostEventLoop();
- }
+ } while (PostEventLoop());
+ _run_later();
}
=head2 C<< CLASS->SetPostLoopCallback( CODEREF ) >>
# now we can close sockets that wanted to close during our event processing.
# (we didn't want to close them during the loop, as we didn't want fd numbers
# being reused and confused during the event loop)
- while (my $sock = shift @ToClose) {
- my $fd = fileno($sock);
-
- # close the socket. (not a PublicInbox::DS close)
- CORE::close($sock);
-
- # and now we can finally remove the fd from the map. see
- # comment above in ->close.
- delete $DescriptorMap{$fd};
- }
-
+ delete($DescriptorMap{fileno($_)}) for @ToClose;
+ @ToClose = (); # let refcounting drop everything all at once
# by default we keep running, unless a postloop callback (either per-object
# or global) cancels it
return $keep_running;
}
-# map EPOLL* bits to kqueue EV_* flags for EV_SET
-sub kq_flag ($$) {
- my ($bit, $ev) = @_;
- if ($ev & $bit) {
- my $fl = EV_ADD() | EV_ENABLE();
- ($ev & EPOLLONESHOT) ? ($fl|EV_ONESHOT()) : $fl;
- } else {
- EV_DISABLE();
- }
-}
-
#####################################################################
### PublicInbox::DS-the-object code
#####################################################################
$self->{sock} = $sock;
my $fd = fileno($sock);
- Carp::cluck("undef sock and/or fd in PublicInbox::DS->new. sock=" . ($sock || "") . ", fd=" . ($fd || ""))
- unless $sock && $fd;
-
_InitPoller();
- if ($HaveEpoll) {
-retry:
- if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
- if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
- $ev &= ~EPOLLEXCLUSIVE;
- goto retry;
- }
- die "couldn't add epoll watch for $fd: $!\n";
+ if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
+ if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
+ $ev &= ~EPOLLEXCLUSIVE;
+ goto retry;
}
+ die "couldn't add epoll watch for $fd: $!\n";
}
- elsif ($HaveKQueue) {
- $KQueue->EV_SET($fd, EVFILT_READ(), EV_ADD() | kq_flag(EPOLLIN, $ev));
- $KQueue->EV_SET($fd, EVFILT_WRITE(), EV_ADD() | kq_flag(EPOLLOUT, $ev));
- }
-
- Carp::cluck("PublicInbox::DS::new blowing away existing descriptor map for fd=$fd ($DescriptorMap{$fd})")
- if $DescriptorMap{$fd};
+ confess("DescriptorMap{$fd} defined ($DescriptorMap{$fd})")
+ if defined($DescriptorMap{$fd});
$DescriptorMap{$fd} = $self;
- return $self;
}
### I N S T A N C E M E T H O D S
#####################################################################
+sub requeue ($) { push @$nextq, $_[0] }
+
=head2 C<< $obj->close >>
Close the socket.
# if we're using epoll, we have to remove this from our epoll fd so we stop getting
# notifications about it
- if ($HaveEpoll) {
- my $fd = fileno($sock);
- epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0) and
- confess("EPOLL_CTL_DEL: $!");
- }
+ my $fd = fileno($sock);
+ epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0) and
+ confess("EPOLL_CTL_DEL: $!");
# we explicitly don't delete from DescriptorMap here until we
# actually close the socket, as we might be in the middle of
$written;
}
+sub epbit ($$) { # (sock, default)
+ ref($_[0]) eq 'IO::Socket::SSL' ? PublicInbox::TLS::epollbit() : $_[1];
+}
+
# returns 1 if done, 0 if incomplete
sub flush_write ($) {
my ($self) = @_;
my $wbuf = $self->{wbuf} or return 1;
- my $sock = $self->{sock} or return 1;
+ my $sock = $self->{sock};
next_buf:
while (my $bref = $wbuf->[0]) {
if (ref($bref) ne 'CODE') {
my $off = delete($self->{wbuf_off}) // 0;
- while (1) {
+ while ($sock) {
my $w = psendfile($sock, $bref, \$off);
if (defined $w) {
if ($w == 0) {
goto next_buf;
}
} elsif ($! == EAGAIN) {
+ epwait($sock, epbit($sock, EPOLLOUT) | EPOLLONESHOT);
$self->{wbuf_off} = $off;
- watch($self, EPOLLOUT|EPOLLONESHOT);
return 0;
} else {
return $self->close;
1; # all done
}
-sub do_read ($$$$) {
+sub rbuf_idle ($$) {
+ my ($self, $rbuf) = @_;
+ if ($$rbuf eq '') { # who knows how long till we can read again
+ delete $self->{rbuf};
+ } else {
+ $self->{rbuf} = $rbuf;
+ }
+}
+
+sub do_read ($$$;$) {
my ($self, $rbuf, $len, $off) = @_;
- my $r = sysread($self->{sock}, $$rbuf, $len, $off);
+ my $r = sysread(my $sock = $self->{sock}, $$rbuf, $len, $off // 0);
return ($r == 0 ? $self->close : $r) if defined $r;
# common for clients to break connections without warning,
# would be too noisy to log here:
- if (ref($self) eq 'IO::Socket::SSL') {
- my $ev = PublicInbox::TLS::epollbit() or return $self->close;
- watch($self, $ev | EPOLLONESHOT);
- } elsif ($! == EAGAIN) {
- watch($self, EPOLLIN | EPOLLONESHOT);
+ if ($! == EAGAIN) {
+ epwait($sock, epbit($sock, EPOLLIN) | EPOLLONESHOT);
+ rbuf_idle($self, $rbuf);
+ 0;
} else {
$self->close;
}
# PerlIO::mmap or PerlIO::scalar if needed
sub tmpio ($$$) {
my ($self, $bref, $off) = @_;
- # open(my $fh, '+>>', undef) doesn't set O_APPEND
- my ($fh, $path) = eval { tempfile('wbuf-XXXXXXX', TMPDIR => 1) };
- $fh or return drop($self, "tempfile: $@");
- open($fh, '+>>', $path) or return drop($self, "open: $!");
+ my $fh = tmpfile('wbuf', $self->{sock}, 1) or
+ return drop($self, "tmpfile $!");
$fh->autoflush(1);
- unlink($path) or return drop($self, "unlink: $!");
my $len = bytes::length($$bref) - $off;
$fh->write($$bref, $len, $off) or return drop($self, "write ($len): $!");
$fh
my $sock = $self->{sock} or return 1;
my $ref = ref $data;
my $bref = $ref ? $data : \$data;
- if (my $wbuf = $self->{wbuf}) { # already buffering, can't write more...
+ my $wbuf = $self->{wbuf};
+ if ($wbuf && scalar(@$wbuf)) { # already buffering, can't write more...
if ($ref eq 'CODE') {
push @$wbuf, $bref;
} else {
if (defined $written) {
return 1 if $written == $to_write;
+ requeue($self); # runs: event_step -> flush_write
} elsif ($! == EAGAIN) {
+ epwait($sock, epbit($sock, EPOLLOUT) | EPOLLONESHOT);
$written = 0;
} else {
return $self->close;
}
+
+ # deal with EAGAIN or partial write:
my $tmpio = tmpio($self, $bref, $written) or return 0;
- $self->{wbuf} = [ $tmpio ];
- watch($self, EPOLLOUT|EPOLLONESHOT);
+
+ # wbuf may be an empty array if we're being called inside
+ # ->flush_write via CODE bref:
+ push @{$self->{wbuf} ||= []}, $tmpio;
return 0;
}
}
sub msg_more ($$) {
my $self = $_[0];
my $sock = $self->{sock} or return 1;
+ my $wbuf = $self->{wbuf};
- if (MSG_MORE && !$self->{wbuf} && ref($sock) ne 'IO::Socket::SSL') {
+ if (MSG_MORE && (!defined($wbuf) || !scalar(@$wbuf)) &&
+ ref($sock) ne 'IO::Socket::SSL') {
my $n = send($sock, $_[1], MSG_MORE);
if (defined $n) {
my $nlen = bytes::length($_[1]) - $n;
return 1 if $nlen == 0; # all done!
# queue up the unwritten substring:
my $tmpio = tmpio($self, \($_[1]), $n) or return 0;
- $self->{wbuf} = [ $tmpio ];
- watch($self, EPOLLOUT|EPOLLONESHOT);
+ $self->{wbuf} //= $wbuf //= [];
+ push @$wbuf, $tmpio;
+ epwait($sock, EPOLLOUT|EPOLLONESHOT);
return 0;
}
}
- $self->write(\($_[1]));
-}
-sub watch ($$) {
- my ($self, $ev) = @_;
- my $sock = $self->{sock} or return;
- my $fd = fileno($sock);
- if ($HaveEpoll) {
- epoll_ctl($Epoll, EPOLL_CTL_MOD, $fd, $ev) and
- confess("EPOLL_CTL_MOD $!");
- } elsif ($HaveKQueue) {
- $KQueue->EV_SET($fd, EVFILT_READ(), kq_flag(EPOLLIN, $ev));
- $KQueue->EV_SET($fd, EVFILT_WRITE(), kq_flag(EPOLLOUT, $ev));
- }
- 0;
+ # don't redispatch into NNTPdeflate::write
+ PublicInbox::DS::write($self, \($_[1]));
}
-sub watch_in1 ($) { watch($_[0], EPOLLIN | EPOLLONESHOT) }
+sub epwait ($$) {
+ my ($sock, $ev) = @_;
+ epoll_ctl($Epoll, EPOLL_CTL_MOD, fileno($sock), $ev) and
+ confess("EPOLL_CTL_MOD $!");
+}
# return true if complete, false if incomplete (or failure)
sub accept_tls_step ($) {
my $sock = $self->{sock} or return;
return 1 if $sock->accept_SSL;
return $self->close if $! != EAGAIN;
- if (my $ev = PublicInbox::TLS::epollbit()) {
- unshift @{$self->{wbuf} ||= []}, \&accept_tls_step;
- return watch($self, $ev | EPOLLONESHOT);
+ epwait($sock, PublicInbox::TLS::epollbit() | EPOLLONESHOT);
+ unshift @{$self->{wbuf} ||= []}, \&accept_tls_step;
+ 0;
+}
+
+# return true if complete, false if incomplete (or failure)
+sub shutdn_tls_step ($) {
+ my ($self) = @_;
+ my $sock = $self->{sock} or return;
+ return $self->close if $sock->stop_SSL(SSL_fast_shutdown => 1);
+ return $self->close if $! != EAGAIN;
+ epwait($sock, PublicInbox::TLS::epollbit() | EPOLLONESHOT);
+ unshift @{$self->{wbuf} ||= []}, \&shutdn_tls_step;
+ 0;
+}
+
+# don't bother with shutdown($sock, 2), we don't fork+exec w/o CLOEXEC
+# or fork w/o exec, so no inadvertant socket sharing
+sub shutdn ($) {
+ my ($self) = @_;
+ my $sock = $self->{sock} or return;
+ if (ref($sock) eq 'IO::Socket::SSL') {
+ shutdn_tls_step($self);
+ } else {
+ $self->close;
+ }
+}
+
+# must be called with eval, PublicInbox::DS may not be loaded (see t/qspawn.t)
+sub dwaitpid ($$$) {
+ my ($pid, $cb, $arg) = @_;
+ if ($in_loop) {
+ push @$WaitPids, [ $pid, $cb, $arg ];
+
+ # We could've just missed our SIGCHLD, cover it, here:
+ requeue(\&reap_pids);
+ } else {
+ die "Not in EventLoop\n";
+ }
+}
+
+sub _run_later () {
+ my $run = $later_queue;
+ $later_timer = undef;
+ $later_queue = [];
+ $_->() for @$run;
+}
+
+sub later ($) {
+ my ($cb) = @_;
+ push @$later_queue, $cb;
+ $later_timer //= add_timer(60, \&_run_later);
+}
+
+sub expire_old () {
+ my $now = now();
+ my $exp = $EXPTIME;
+ my $old = $now - $exp;
+ my %new;
+ while (my ($fd, $v) = each %$EXPMAP) {
+ my ($idle_time, $ds_obj) = @$v;
+ if ($idle_time < $old) {
+ if (!$ds_obj->shutdn) {
+ $new{$fd} = $v;
+ }
+ } else {
+ $new{$fd} = $v;
+ }
}
- drop($self, 'BUG? EAGAIN but '.PublicInbox::TLS::err());
+ $EXPMAP = \%new;
+ $exp_timer = scalar(keys %new) ? later(\&expire_old) : undef;
+}
+
+sub update_idle_time {
+ my ($self) = @_;
+ my $sock = $self->{sock} or return;
+ $EXPMAP->{fileno($sock)} = [ now(), $self ];
+ $exp_timer //= later(\&expire_old);
+}
+
+sub not_idle_long {
+ my ($self, $now) = @_;
+ my $sock = $self->{sock} or return;
+ my $ary = $EXPMAP->{fileno($sock)} or return;
+ my $exp_at = $ary->[0] + $EXPTIME;
+ $exp_at > $now;
}
package PublicInbox::DS::Timer;