1 # This library is free software; you can redistribute it and/or modify
2 # it under the same terms as Perl itself.
4 # This license differs from the rest of public-inbox
6 # This is a fork of the (for now) unmaintained Danga::Socket 1.61.
7 # Unused features will be removed, and updates will be made to take
8 # advantage of newer kernels.
10 # API changes to diverge from Danga::Socket will happen to better
11 # accomodate new features and improve scalability. Do not expect
12 # this to be a stable API like Danga::Socket.
13 # Bugs encountered (and likely fixed) are reported to
14 # bug-Danga-Socket@rt.cpan.org and visible at:
15 # https://rt.cpan.org/Public/Dist/Display.html?Name=Danga-Socket
16 package PublicInbox::DS;
22 use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD);
26 use PublicInbox::Syscall qw(:epoll);
28 use fields ('sock', # underlying socket
29 'fd', # numeric file descriptor
30 'wbuf', # arrayref of scalars, scalarrefs, or coderefs to write
31 'wbuf_off', # offset into first element of wbuf to start writing at
32 'closed', # bool: socket is closed
33 'event_watch', # bitmask of events the client is interested in (POLLIN,OUT,etc.)
36 use Errno qw(EPIPE EAGAIN ECONNRESET EINVAL);
37 use Carp qw(croak confess);
39 use constant DebugLevel => 0;
41 use constant POLLIN => 1;
42 use constant POLLOUT => 4;
43 use constant POLLERR => 8;
44 use constant POLLHUP => 16;
45 use constant POLLNVAL => 32;
47 our $HAVE_KQUEUE = eval { require IO::KQueue; 1 };
50 $HaveEpoll, # Flag -- is epoll available? initially undefined.
52 %DescriptorMap, # fd (num) -> PublicInbox::DS object
53 $Epoll, # Global epoll fd (for epoll mode only)
54 $KQueue, # Global kqueue fd ref (for kqueue mode only)
55 $_io, # IO::Handle for Epoll
56 @ToClose, # sockets to close when event loop is done
58 $PostLoopCallback, # subref to call at the end of each loop, if defined (global)
60 $LoopTimeout, # timeout of event loop in milliseconds
61 $DoneInit, # if we've done the one-time module init yet
65 # this may be set to zero with old kernels
66 our $EPOLLEXCLUSIVE = EPOLLEXCLUSIVE;
69 #####################################################################
70 ### C L A S S M E T H O D S
71 #####################################################################
73 =head2 C<< CLASS->Reset() >>
81 $LoopTimeout = -1; # no timeout by default
84 $PostLoopCallback = undef;
87 # NOTE kqueue is close-on-fork, and we don't account for it, yet
88 # OTOH, we (public-inbox) don't need this sub outside of tests...
89 POSIX::close($$KQueue) if !$_io && $KQueue && $$KQueue >= 0;
92 $_io = undef; # close $Epoll
95 *EventLoop = *FirstTimeEventLoop;
98 =head2 C<< CLASS->SetLoopTimeout( $timeout ) >>
100 Set the loop timeout for the event loop to some value in milliseconds.
102 A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return
107 return $LoopTimeout = $_[1] + 0;
110 =head2 C<< CLASS->DebugMsg( $format, @args ) >>
112 Print the debugging message specified by the C<sprintf>-style I<format> and
117 my ( $class, $fmt, @args ) = @_;
119 printf STDERR ">>> $fmt\n", @args;
122 =head2 C<< CLASS->AddTimer( $seconds, $coderef ) >>
124 Add a timer to occur $seconds from now. $seconds may be fractional, but timers
125 are not guaranteed to fire at the exact time you ask for.
127 Returns a timer object which you can call C<< $timer->cancel >> on if you need to.
132 my ($secs, $coderef) = @_;
134 my $fire_time = Time::HiRes::time() + $secs;
136 my $timer = bless [$fire_time, $coderef], "PublicInbox::DS::Timer";
138 if (!@Timers || $fire_time >= $Timers[-1][0]) {
139 push @Timers, $timer;
143 # Now, where do we insert? (NOTE: this appears slow, algorithm-wise,
144 # but it was compared against calendar queues, heaps, naive push/sort,
145 # and a bunch of other versions, and found to be fastest with a large
146 # variety of datasets.)
147 for (my $i = 0; $i < @Timers; $i++) {
148 if ($Timers[$i][0] > $fire_time) {
149 splice(@Timers, $i, 0, $timer);
154 die "Shouldn't get here.";
157 # keeping this around in case we support other FD types for now,
158 # epoll_create1(EPOLL_CLOEXEC) requires Linux 2.6.27+...
159 sub set_cloexec ($) {
162 $_io = IO::Handle->new_from_fd($fd, 'r+') or return;
163 defined(my $fl = fcntl($_io, F_GETFD, 0)) or return;
164 fcntl($_io, F_SETFD, $fl | FD_CLOEXEC);
173 $KQueue = IO::KQueue->new();
174 $HaveKQueue = defined $KQueue;
176 *EventLoop = *KQueueEventLoop;
179 elsif (PublicInbox::Syscall::epoll_defined()) {
180 $Epoll = eval { epoll_create(1024); };
181 $HaveEpoll = defined $Epoll && $Epoll >= 0;
184 *EventLoop = *EpollEventLoop;
188 if (!$HaveEpoll && !$HaveKQueue) {
190 *EventLoop = *PollEventLoop;
194 =head2 C<< CLASS->EventLoop() >>
196 Start processing IO events. In most daemon programs this never exits. See
197 C<PostLoopCallback> below for how to exit the loop.
200 sub FirstTimeEventLoop {
206 EpollEventLoop($class);
207 } elsif ($HaveKQueue) {
208 KQueueEventLoop($class);
210 PollEventLoop($class);
214 # runs timers and returns milliseconds for next one, or next event loop
216 return $LoopTimeout unless @Timers;
218 my $now = Time::HiRes::time();
221 while (@Timers && $Timers[0][0] <= $now) {
222 my $to_run = shift(@Timers);
223 $to_run->[1]->($now) if $to_run->[1];
226 return $LoopTimeout unless @Timers;
228 # convert time to an even number of milliseconds, adding 1
229 # extra, otherwise floating point fun can occur and we'll
230 # call RunTimers like 20-30 times, each returning a timeout
231 # of 0.0000212 seconds
232 my $timeout = int(($Timers[0][0] - $now) * 1000) + 1;
234 # -1 is an infinite timeout, so prefer a real timeout
235 return $timeout if $LoopTimeout == -1;
237 # otherwise pick the lower of our regular timeout and time until
239 return $LoopTimeout if $LoopTimeout < $timeout;
243 ### The epoll-based event loop. Gets installed as EventLoop if IO::Epoll loads
251 my $timeout = RunTimers();
253 # get up to 1000 events
254 my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events);
255 for ($i=0; $i<$evcount; $i++) {
256 my $ev = $events[$i];
258 # it's possible epoll_wait returned many events, including some at the end
259 # that ones in the front triggered unregister-interest actions. if we
260 # can't find the %sock entry, it's because we're no longer interested
262 my PublicInbox::DS $pob = $DescriptorMap{$ev->[0]};
264 my $state = $ev->[1];
266 DebugLevel >= 1 && $class->DebugMsg("Event: fd=%d (%s), state=%d \@ %s\n",
267 $ev->[0], ref($pob), $ev->[1], time);
269 # standard non-profiling codepat
270 $pob->event_read if $state & EPOLLIN && ! $pob->{closed};
271 $pob->event_write if $state & EPOLLOUT && ! $pob->{closed};
272 if ($state & (EPOLLERR|EPOLLHUP)) {
273 $pob->event_err if $state & EPOLLERR && ! $pob->{closed};
274 $pob->event_hup if $state & EPOLLHUP && ! $pob->{closed};
277 return unless PostEventLoop();
282 ### The fallback IO::Poll-based event loop. Gets installed as EventLoop if
283 ### IO::Epoll fails to load.
287 my PublicInbox::DS $pob;
290 my $timeout = RunTimers();
292 # the following sets up @poll as a series of ($poll,$event_mask)
293 # items, then uses IO::Poll::_poll, implemented in XS, which
294 # modifies the array in place with the even elements being
295 # replaced with the event masks that occured.
297 while ( my ($fd, $sock) = each %DescriptorMap ) {
298 push @poll, $fd, $sock->{event_watch};
301 # if nothing to poll, either end immediately (if no timeout)
302 # or just keep calling the callback
304 select undef, undef, undef, ($timeout / 1000);
305 return unless PostEventLoop();
309 my $count = IO::Poll::_poll($timeout, @poll);
310 unless ($count >= 0) {
311 return unless PostEventLoop();
315 # Fetch handles with read events
317 my ($fd, $state) = splice(@poll, 0, 2);
320 $pob = $DescriptorMap{$fd};
322 $pob->event_read if $state & POLLIN && ! $pob->{closed};
323 $pob->event_write if $state & POLLOUT && ! $pob->{closed};
324 $pob->event_err if $state & POLLERR && ! $pob->{closed};
325 $pob->event_hup if $state & POLLHUP && ! $pob->{closed};
328 return unless PostEventLoop();
334 ### The kqueue-based event loop. Gets installed as EventLoop if IO::KQueue works
336 sub KQueueEventLoop {
340 my $timeout = RunTimers();
341 my @ret = eval { $KQueue->kevent($timeout) };
343 # workaround https://rt.cpan.org/Ticket/Display.html?id=116615
344 if ($err =~ /Interrupted system call/) {
351 foreach my $kev (@ret) {
352 my ($fd, $filter, $flags, $fflags) = @$kev;
353 my PublicInbox::DS $pob = $DescriptorMap{$fd};
355 DebugLevel >= 1 && $class->DebugMsg("Event: fd=%d (%s), flags=%d \@ %s\n",
356 $fd, ref($pob), $flags, time);
358 $pob->event_read if $filter == IO::KQueue::EVFILT_READ() && !$pob->{closed};
359 $pob->event_write if $filter == IO::KQueue::EVFILT_WRITE() && !$pob->{closed};
360 if ($flags == IO::KQueue::EV_EOF() && !$pob->{closed}) {
368 return unless PostEventLoop();
374 =head2 C<< CLASS->SetPostLoopCallback( CODEREF ) >>
376 Sets post loop callback function. Pass a subref and it will be
377 called every time the event loop finishes.
379 Return 1 (or any true value) from the sub to make the loop continue, 0 or false
382 The callback function will be passed two parameters: \%DescriptorMap
385 sub SetPostLoopCallback {
386 my ($class, $ref) = @_;
389 $PostLoopCallback = (defined $ref && ref $ref eq 'CODE') ? $ref : undef;
392 # Internal function: run the post-event callback, send read events
393 # for pushed-back data, and close pending connections. returns 1
394 # if event loop should continue, or 0 to shut it all down.
396 # now we can close sockets that wanted to close during our event processing.
397 # (we didn't want to close them during the loop, as we didn't want fd numbers
398 # being reused and confused during the event loop)
399 while (my $sock = shift @ToClose) {
400 my $fd = fileno($sock);
402 # close the socket. (not a PublicInbox::DS close)
405 # and now we can finally remove the fd from the map. see
406 # comment above in _cleanup.
407 delete $DescriptorMap{$fd};
411 # by default we keep running, unless a postloop callback (either per-object
412 # or global) cancels it
413 my $keep_running = 1;
415 # now we're at the very end, call callback if defined
416 if (defined $PostLoopCallback) {
417 $keep_running &&= $PostLoopCallback->(\%DescriptorMap);
420 return $keep_running;
423 #####################################################################
424 ### PublicInbox::DS-the-object code
425 #####################################################################
427 =head2 OBJECT METHODS
429 =head2 C<< CLASS->new( $socket ) >>
431 Create a new PublicInbox::DS subclass object for the given I<socket> which will
432 react to events on it during the C<EventLoop>.
434 This is normally (always?) called from your subclass via:
436 $class->SUPER::new($socket);
440 my ($self, $sock, $exclusive) = @_;
441 $self = fields::new($self) unless ref $self;
443 $self->{sock} = $sock;
444 my $fd = fileno($sock);
446 Carp::cluck("undef sock and/or fd in PublicInbox::DS->new. sock=" . ($sock || "") . ", fd=" . ($fd || ""))
451 $self->{wbuf_off} = 0;
454 my $ev = $self->{event_watch} = POLLERR|POLLHUP|POLLNVAL;
460 $ev = $self->{event_watch} = EPOLLIN|EPOLLERR|EPOLLHUP|$EPOLLEXCLUSIVE;
463 if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
464 if ($! == EINVAL && ($ev & $EPOLLEXCLUSIVE)) {
465 $EPOLLEXCLUSIVE = 0; # old kernel
466 $ev = $self->{event_watch} = EPOLLIN|EPOLLERR|EPOLLHUP;
469 die "couldn't add epoll watch for $fd: $!\n";
472 elsif ($HaveKQueue) {
473 # Add them to the queue but disabled for now
474 $KQueue->EV_SET($fd, IO::KQueue::EVFILT_READ(),
475 IO::KQueue::EV_ADD() | IO::KQueue::EV_DISABLE());
476 $KQueue->EV_SET($fd, IO::KQueue::EVFILT_WRITE(),
477 IO::KQueue::EV_ADD() | IO::KQueue::EV_DISABLE());
480 Carp::cluck("PublicInbox::DS::new blowing away existing descriptor map for fd=$fd ($DescriptorMap{$fd})")
481 if $DescriptorMap{$fd};
483 $DescriptorMap{$fd} = $self;
488 #####################################################################
489 ### I N S T A N C E M E T H O D S
490 #####################################################################
492 =head2 C<< $obj->steal_socket() >>
494 Basically returns our socket and makes it so that we don't try to close it,
495 but we do remove it from epoll handlers. THIS CLOSES $self. It is the same
496 thing as calling close, except it gives you the socket to use.
500 my PublicInbox::DS $self = $_[0];
501 return if $self->{closed};
503 # cleanup does most of the work of closing this socket
506 # now undef our internal sock and fd structures so we don't use them
507 my $sock = $self->{sock};
508 $self->{sock} = undef;
512 =head2 C<< $obj->close( [$reason] ) >>
514 Close the socket. The I<reason> argument will be used in debugging messages.
518 my PublicInbox::DS $self = $_[0];
519 return if $self->{closed};
521 # print out debugging info for this close
523 my ($pkg, $filename, $line) = caller;
524 my $reason = $_[1] || "";
525 warn "Closing \#$self->{fd} due to $pkg/$filename/$line ($reason)\n";
528 # this does most of the work of closing us
531 # defer closing the actual socket until the event loop is done
532 # processing this round of events. (otherwise we might reuse fds)
534 push @ToClose, $self->{sock};
535 $self->{sock} = undef;
541 ### METHOD: _cleanup()
542 ### Called by our closers so we can clean internal data structures.
544 my PublicInbox::DS $self = $_[0];
546 # we're effectively closed; we have no fd and sock when we leave here
549 # we need to flush our write buffer, as there may
550 # be self-referential closures (sub { $client->close })
551 # preventing the object from being destroyed
552 @{$self->{wbuf}} = ();
554 # if we're using epoll, we have to remove this from our epoll fd so we stop getting
555 # notifications about it
556 if ($HaveEpoll && $self->{fd}) {
557 if (epoll_ctl($Epoll, EPOLL_CTL_DEL, $self->{fd}, $self->{event_watch}) != 0) {
558 # dump_error prints a backtrace so we can try to figure out why this happened
559 $self->dump_error("epoll_ctl(): failure deleting fd=$self->{fd} during _cleanup(); $! (" . ($!+0) . ")");
563 # we explicitly don't delete from DescriptorMap here until we
564 # actually close the socket, as we might be in the middle of
565 # processing an epoll_wait/etc that returned hundreds of fds, one
566 # of which is not yet processed and is what we're closing. if we
567 # keep it in DescriptorMap, then the event harnesses can just
568 # looked at $pob->{closed} and ignore it. but if it's an
569 # un-accounted for fd, then it (understandably) freak out a bit
570 # and emit warnings, thinking their state got off.
572 # and finally get rid of our fd so we can't use it anywhere else
576 =head2 C<< $obj->sock() >>
578 Returns the underlying IO::Handle for the object.
582 my PublicInbox::DS $self = shift;
583 return $self->{sock};
586 =head2 C<< $obj->write( $data ) >>
588 Write the specified data to the underlying handle. I<data> may be scalar,
589 scalar ref, code ref (to run when there), or undef just to kick-start.
590 Returns 1 if writes all went through, or 0 if there are writes in queue. If
591 it returns 1, caller should stop waiting for 'writable' events)
595 my PublicInbox::DS $self;
599 # nobody should be writing to closed sockets, but caller code can
600 # do two writes within an event, have the first fail and
601 # disconnect the other side (whose destructor then closes the
602 # calling object, but it's still in a method), and then the
603 # now-dead object does its second write. that is this case. we
604 # just lie and say it worked. it'll be dead soon and won't be
606 return 1 if $self->{closed};
610 # just queue data if there's already a wait
612 my $wbuf = $self->{wbuf};
615 $bref = ref $data ? $data : \$data;
621 # this flag says we're bypassing the queue system, knowing we're the
622 # only outstanding write, and hoping we don't ever need to use it.
623 # if so later, though, we'll need to queue
629 return 1 unless $bref ||= $wbuf->[0];
633 $len = length($$bref); # this will die if $bref is a code ref, caught below
636 if (UNIVERSAL::isa($bref, "CODE")) {
637 unless ($need_queue) {
642 # code refs are just run and never get reenqueued
643 # (they're one-shot), so turn off the flag indicating the
644 # outstanding data needs queueing.
650 die "Write error: $@ <$bref>";
653 my $to_write = $len - $self->{wbuf_off};
654 my $written = syswrite($self->{sock}, $$bref, $to_write,
657 if (! defined $written) {
659 return $self->close("EPIPE");
660 } elsif ($! == EAGAIN) {
661 # since connection has stuff to write, it should now be
662 # interested in pending writes:
666 $self->watch_write(1);
668 } elsif ($! == ECONNRESET) {
669 return $self->close("ECONNRESET");
672 DebugLevel >= 1 && $self->debugmsg("Closing connection ($self) due to write error: $!\n");
674 return $self->close("write_error");
675 } elsif ($written != $to_write) {
676 DebugLevel >= 2 && $self->debugmsg("Wrote PARTIAL %d bytes to %d",
677 $written, $self->{fd});
681 # since connection has stuff to write, it should now be
682 # interested in pending writes:
683 $self->{wbuf_off} += $written;
684 $self->on_incomplete_write;
686 } elsif ($written == $to_write) {
687 DebugLevel >= 2 && $self->debugmsg("Wrote ALL %d bytes to %d (nq=%d)",
688 $written, $self->{fd}, $need_queue);
689 $self->{wbuf_off} = 0;
690 $self->watch_write(0);
692 # this was our only write, so we can return immediately
693 # since we avoided incrementing the buffer size or
694 # putting it in the buffer. we also know there
695 # can't be anything else to write.
696 return 1 if $need_queue;
705 sub on_incomplete_write {
706 my PublicInbox::DS $self = shift;
707 $self->watch_write(1);
710 =head2 C<< $obj->read( $bytecount ) >>
712 Read at most I<bytecount> bytes from the underlying handle; returns scalar
713 ref on read, or undef on connection closed.
717 my PublicInbox::DS $self = shift;
718 return if $self->{closed};
721 my $sock = $self->{sock};
723 # if this is too high, perl quits(!!). reports on mailing lists
724 # don't seem to point to a universal answer. 5MB worked for some,
725 # crashed for others. 1MB works for more people. let's go with 1MB
727 my $req_bytes = $bytes > 1048576 ? 1048576 : $bytes;
729 my $res = sysread($sock, $buf, $req_bytes, 0);
730 DebugLevel >= 2 && $self->debugmsg("sysread = %d; \$! = %d", $res, $!);
732 if (! $res && $! != EAGAIN) {
733 # catches 0=conn closed or undef=error
734 DebugLevel >= 2 && $self->debugmsg("Fd \#%d read hit the end of the road.", $self->{fd});
741 =head2 (VIRTUAL) C<< $obj->event_read() >>
743 Readable event handler. Concrete deriviatives of PublicInbox::DS should
744 provide an implementation of this. The default implementation will die if
748 sub event_read { die "Base class event_read called for $_[0]\n"; }
750 =head2 (VIRTUAL) C<< $obj->event_err() >>
752 Error event handler. Concrete deriviatives of PublicInbox::DS should
753 provide an implementation of this. The default implementation will die if
757 sub event_err { die "Base class event_err called for $_[0]\n"; }
759 =head2 (VIRTUAL) C<< $obj->event_hup() >>
761 'Hangup' event handler. Concrete deriviatives of PublicInbox::DS should
762 provide an implementation of this. The default implementation will die if
766 sub event_hup { die "Base class event_hup called for $_[0]\n"; }
768 =head2 C<< $obj->event_write() >>
770 Writable event handler. Concrete deriviatives of PublicInbox::DS may wish to
771 provide an implementation of this. The default implementation calls
772 C<write()> with an C<undef>.
780 =head2 C<< $obj->watch_read( $boolean ) >>
782 Turn 'readable' event notification on or off.
786 my PublicInbox::DS $self = shift;
787 return if $self->{closed} || !$self->{sock};
790 my $event = $self->{event_watch};
792 $event &= ~POLLIN if ! $val;
793 $event |= POLLIN if $val;
795 # If it changed, set it
796 if ($event != $self->{event_watch}) {
798 $KQueue->EV_SET($self->{fd}, IO::KQueue::EVFILT_READ(),
799 $val ? IO::KQueue::EV_ENABLE() : IO::KQueue::EV_DISABLE());
802 epoll_ctl($Epoll, EPOLL_CTL_MOD, $self->{fd}, $event)
803 and $self->dump_error("couldn't modify epoll settings for $self->{fd} " .
804 "from $self->{event_watch} -> $event: $! (" . ($!+0) . ")");
806 $self->{event_watch} = $event;
810 =head2 C<< $obj->watch_write( $boolean ) >>
812 Turn 'writable' event notification on or off.
816 my PublicInbox::DS $self = shift;
817 return if $self->{closed} || !$self->{sock};
820 my $event = $self->{event_watch};
822 $event &= ~POLLOUT if ! $val;
823 $event |= POLLOUT if $val;
825 # If it changed, set it
826 if ($event != $self->{event_watch}) {
828 $KQueue->EV_SET($self->{fd}, IO::KQueue::EVFILT_WRITE(),
829 $val ? IO::KQueue::EV_ENABLE() : IO::KQueue::EV_DISABLE());
832 epoll_ctl($Epoll, EPOLL_CTL_MOD, $self->{fd}, $event)
833 and $self->dump_error("couldn't modify epoll settings for $self->{fd} " .
834 "from $self->{event_watch} -> $event: $! (" . ($!+0) . ")");
836 $self->{event_watch} = $event;
840 =head2 C<< $obj->dump_error( $message ) >>
842 Prints to STDERR a backtrace with information about this socket and what lead
843 up to the dump_error call.
849 while (my ($file, $line, $sub) = (caller($i++))[1..3]) {
850 push @list, "\t$file:$line called $sub\n";
853 warn "ERROR: $_[1]\n" .
854 "\t$_[0] = " . $_[0]->as_string . "\n" .
858 =head2 C<< $obj->debugmsg( $format, @args ) >>
860 Print the debugging message specified by the C<sprintf>-style I<format> and
865 my ( $self, $fmt, @args ) = @_;
866 confess "Not an object" unless ref $self;
869 printf STDERR ">>> $fmt\n", @args;
872 =head2 C<< $obj->as_string() >>
874 Returns a string describing this socket.
878 my PublicInbox::DS $self = shift;
879 my $rw = "(" . ($self->{event_watch} & POLLIN ? 'R' : '') .
880 ($self->{event_watch} & POLLOUT ? 'W' : '') . ")";
881 my $ret = ref($self) . "$rw: " . ($self->{closed} ? "closed" : "open");
885 package PublicInbox::DS::Timer;
886 # [$abs_float_firetime, $coderef];
893 =head1 AUTHORS (Danga::Socket)
895 Brad Fitzpatrick <brad@danga.com> - author
897 Michael Granger <ged@danga.com> - docs, testing
899 Mark Smith <junior@danga.com> - contributor, heavy user, testing
901 Matt Sergeant <matt@sergeant.org> - kqueue support, docs, timers, other bits