X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSyscall.pm;h=c00385b94db84b63facf7a8d57296ac76b3b1421;hb=4cd7a78f3b8c03670e2d77675229472506eee1eb;hp=cf7004548684c857607b7a46ac8a726d5733a932;hpb=cd50d183273c105a7f08b1875ba6f7a51d9f8e9a;p=public-inbox.git diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm index cf700454..c00385b9 100644 --- a/lib/PublicInbox/Syscall.pm +++ b/lib/PublicInbox/Syscall.pm @@ -5,7 +5,7 @@ # This license differs from the rest of public-inbox # # This module is Copyright (c) 2005 Six Apart, Ltd. -# Copyright (C) 2019 all contributors +# Copyright (C) 2019-2021 all contributors # # All rights reserved. # @@ -13,31 +13,36 @@ # License or the Artistic License, as specified in the Perl README file. package PublicInbox::Syscall; use strict; -use POSIX qw(ENOSYS SEEK_CUR); +use v5.10.1; +use parent qw(Exporter); +use POSIX qw(ENOENT EEXIST ENOSYS O_NONBLOCK); use Config; -require Exporter; -use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION); - -$VERSION = "0.25"; -@ISA = qw(Exporter); -@EXPORT_OK = qw(sendfile epoll_ctl epoll_create epoll_wait - EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND - EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD); -%EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait - EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND - EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD)], - sendfile => [qw(sendfile)], +# $VERSION = '0.25'; # Sys::Syscall version +our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait + EPOLLIN EPOLLOUT EPOLLET + EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD + EPOLLONESHOT EPOLLEXCLUSIVE + signalfd rename_noreplace); +our %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait + EPOLLIN EPOLLOUT + EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD + EPOLLONESHOT EPOLLEXCLUSIVE)], ); -use constant EPOLLIN => 1; -use constant EPOLLOUT => 4; -use constant EPOLLERR => 8; -use constant EPOLLHUP => 16; -use constant EPOLLRDBAND => 128; -use constant EPOLL_CTL_ADD => 1; -use constant EPOLL_CTL_DEL => 2; -use constant EPOLL_CTL_MOD => 3; +use constant { + EPOLLIN => 1, + EPOLLOUT => 4, + # EPOLLERR => 8, + # EPOLLHUP => 16, + # EPOLLRDBAND => 128, + EPOLLEXCLUSIVE => (1 << 28), + EPOLLONESHOT => (1 << 30), + EPOLLET => (1 << 31), + EPOLL_CTL_ADD => 1, + EPOLL_CTL_DEL => 2, + EPOLL_CTL_MOD => 3, +}; our $loaded_syscall = 0; @@ -51,30 +56,33 @@ sub _load_syscall { $clean->(); # don't trust modules before us my $rv = eval { require 'syscall.ph'; 1 } || eval { require 'sys/syscall.ph'; 1 }; $clean->(); # don't require modules after us trust us - return $rv; + $rv; } -our ($sysname, $nodename, $release, $version, $machine) = POSIX::uname(); our ( $SYS_epoll_create, $SYS_epoll_ctl, $SYS_epoll_wait, - $SYS_sendfile, - $SYS_readahead, + $SYS_signalfd4, + $SYS_renameat2, ); +my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC our $no_deprecated = 0; if ($^O eq "linux") { + my (undef, undef, $release, undef, $machine) = POSIX::uname(); + my ($maj, $min) = ($release =~ /\A([0-9]+)\.([0-9]+)/); + $SYS_renameat2 = 0 if "$maj.$min" < 3.15; # whether the machine requires 64-bit numbers to be on 8-byte # boundaries. my $u64_mod_8 = 0; # if we're running on an x86_64 kernel, but a 32-bit process, - # we need to use the i386 syscall numbers. + # we need to use the x32 or i386 syscall numbers. if ($machine eq "x86_64" && $Config{ptrsize} == 4) { - $machine = "i386"; + $machine = $Config{cppsymbols} =~ /\b__ILP32__=1\b/ ? 'x32' : 'i386'; } # Similarly for mips64 vs mips @@ -86,87 +94,99 @@ if ($^O eq "linux") { $SYS_epoll_create = 254; $SYS_epoll_ctl = 255; $SYS_epoll_wait = 256; - $SYS_sendfile = 187; # or 64: 239 - $SYS_readahead = 225; + $SYS_signalfd4 = 327; + $SYS_renameat2 //= 353; } elsif ($machine eq "x86_64") { $SYS_epoll_create = 213; $SYS_epoll_ctl = 233; $SYS_epoll_wait = 232; - $SYS_sendfile = 40; - $SYS_readahead = 187; + $SYS_signalfd4 = 289; + $SYS_renameat2 //= 316; + } elsif ($machine eq 'x32') { + $SYS_epoll_create = 1073742037; + $SYS_epoll_ctl = 1073742057; + $SYS_epoll_wait = 1073742056; + $SYS_signalfd4 = 1073742113; + $SYS_renameat2 //= 0x40000000 + 316; + } elsif ($machine eq 'sparc64') { + $SYS_epoll_create = 193; + $SYS_epoll_ctl = 194; + $SYS_epoll_wait = 195; + $u64_mod_8 = 1; + $SYS_signalfd4 = 317; + $SYS_renameat2 //= 345; + $SFD_CLOEXEC = 020000000; } elsif ($machine =~ m/^parisc/) { $SYS_epoll_create = 224; $SYS_epoll_ctl = 225; $SYS_epoll_wait = 226; - $SYS_sendfile = 122; # sys_sendfile64=209 - $SYS_readahead = 207; $u64_mod_8 = 1; + $SYS_signalfd4 = 309; } elsif ($machine =~ m/^ppc64/) { $SYS_epoll_create = 236; $SYS_epoll_ctl = 237; $SYS_epoll_wait = 238; - $SYS_sendfile = 186; # (sys32_sendfile). sys32_sendfile64=226 (64 bit processes: sys_sendfile64=186) - $SYS_readahead = 191; # both 32-bit and 64-bit vesions $u64_mod_8 = 1; + $SYS_signalfd4 = 313; + $SYS_renameat2 //= 357; } elsif ($machine eq "ppc") { $SYS_epoll_create = 236; $SYS_epoll_ctl = 237; $SYS_epoll_wait = 238; - $SYS_sendfile = 186; # sys_sendfile64=226 - $SYS_readahead = 191; $u64_mod_8 = 1; + $SYS_signalfd4 = 313; + $SYS_renameat2 //= 357; } elsif ($machine =~ m/^s390/) { $SYS_epoll_create = 249; $SYS_epoll_ctl = 250; $SYS_epoll_wait = 251; - $SYS_sendfile = 187; # sys_sendfile64=223 - $SYS_readahead = 222; $u64_mod_8 = 1; + $SYS_signalfd4 = 322; + $SYS_renameat2 //= 347; } elsif ($machine eq "ia64") { $SYS_epoll_create = 1243; $SYS_epoll_ctl = 1244; $SYS_epoll_wait = 1245; - $SYS_sendfile = 1187; - $SYS_readahead = 1216; $u64_mod_8 = 1; + $SYS_signalfd4 = 289; } elsif ($machine eq "alpha") { # natural alignment, ints are 32-bits - $SYS_sendfile = 370; # (sys_sendfile64) $SYS_epoll_create = 407; $SYS_epoll_ctl = 408; $SYS_epoll_wait = 409; - $SYS_readahead = 379; $u64_mod_8 = 1; + $SYS_signalfd4 = 484; + $SFD_CLOEXEC = 010000000; } elsif ($machine eq "aarch64") { $SYS_epoll_create = 20; # (sys_epoll_create1) $SYS_epoll_ctl = 21; $SYS_epoll_wait = 22; # (sys_epoll_pwait) - $SYS_sendfile = 71; # (sys_sendfile64) - $SYS_readahead = 213; $u64_mod_8 = 1; $no_deprecated = 1; + $SYS_signalfd4 = 74; + $SYS_renameat2 //= 276; } elsif ($machine =~ m/arm(v\d+)?.*l/) { # ARM OABI $SYS_epoll_create = 250; $SYS_epoll_ctl = 251; $SYS_epoll_wait = 252; - $SYS_sendfile = 187; - $SYS_readahead = 225; $u64_mod_8 = 1; + $SYS_signalfd4 = 355; + $SYS_renameat2 //= 382; } elsif ($machine =~ m/^mips64/) { - $SYS_sendfile = 5039; $SYS_epoll_create = 5207; $SYS_epoll_ctl = 5208; $SYS_epoll_wait = 5209; - $SYS_readahead = 5179; $u64_mod_8 = 1; + $SYS_signalfd4 = 5283; + $SYS_renameat2 //= 5311; } elsif ($machine =~ m/^mips/) { - $SYS_sendfile = 4207; $SYS_epoll_create = 4248; $SYS_epoll_ctl = 4249; $SYS_epoll_wait = 4250; - $SYS_readahead = 4223; $u64_mod_8 = 1; + $SYS_signalfd4 = 4324; + $SYS_renameat2 //= 4351; } else { # as a last resort, try using the *.ph files which may not # exist or may be wrong @@ -174,7 +194,11 @@ if ($^O eq "linux") { $SYS_epoll_create = eval { &SYS_epoll_create; } || 0; $SYS_epoll_ctl = eval { &SYS_epoll_ctl; } || 0; $SYS_epoll_wait = eval { &SYS_epoll_wait; } || 0; - $SYS_readahead = eval { &SYS_readahead; } || 0; + + # Note: do NOT add new syscalls to depend on *.ph, here. + # Better to miss syscalls (so we can fallback to IO::Poll) + # than to use wrong ones, since the names are not stable + # (at least not on FreeBSD), if the actual numbers are. } if ($u64_mod_8) { @@ -185,85 +209,18 @@ if ($^O eq "linux") { *epoll_ctl = \&epoll_ctl_mod4; } } - -elsif ($^O eq "freebsd") { - if ($ENV{FREEBSD_SENDFILE}) { - # this is still buggy and in development - $SYS_sendfile = 393; # old is 336 - } -} - -############################################################################ -# sendfile functions -############################################################################ - -unless ($SYS_sendfile) { - _load_syscall(); - $SYS_sendfile = eval { &SYS_sendfile; } || 0; -} - -sub sendfile_defined { return $SYS_sendfile ? 1 : 0; } - -if ($^O eq "linux" && $SYS_sendfile) { - *sendfile = \&sendfile_linux; -} elsif ($^O eq "freebsd" && $SYS_sendfile) { - *sendfile = \&sendfile_freebsd; -} else { - *sendfile = \&sendfile_noimpl; -} - -sub sendfile_noimpl { - $! = ENOSYS; - return -1; -} - -# C: ssize_t sendfile(int out_fd, int in_fd, off_t *offset, size_t count) -# Perl: sendfile($write_fd, $read_fd, $max_count) --> $actually_sent -sub sendfile_linux { - return syscall( - $SYS_sendfile, - $_[0] + 0, # fd - $_[1] + 0, # fd - 0, # don't keep track of offset. callers can lseek and keep track. - $_[2] + 0 # count - ); -} - -sub sendfile_freebsd { - my $offset = POSIX::lseek($_[1]+0, 0, SEEK_CUR) + 0; - my $ct = $_[2] + 0; - my $sbytes_buf = "\0" x 8; - my $rv = syscall( - $SYS_sendfile, - $_[1] + 0, # fd (from) - $_[0] + 0, # socket (to) - $offset, - $ct, - 0, # struct sf_hdtr *hdtr - $sbytes_buf, # off_t *sbytes - 0); # flags - return $rv if $rv < 0; - - - my $set = unpack("L", $sbytes_buf); - POSIX::lseek($_[1]+0, SEEK_CUR, $set); - return $set; -} - +# use Inline::C for *BSD-only or general POSIX stuff. +# Linux guarantees stable syscall numbering, BSDs only offer a stable libc +# use scripts/syscall-list on Linux to detect new syscall numbers ############################################################################ # epoll functions ############################################################################ -sub epoll_defined { return $SYS_epoll_create ? 1 : 0; } +sub epoll_defined { $SYS_epoll_create ? 1 : 0; } -# ARGS: (size) -- but in modern Linux 2.6, the -# size doesn't even matter (radix tree now, not hash) sub epoll_create { - return -1 unless defined $SYS_epoll_create; - my $epfd = eval { syscall($SYS_epoll_create, $no_deprecated ? 0 : ($_[0]||100)+0) }; - return -1 if $@; - return $epfd; + syscall($SYS_epoll_create, $no_deprecated ? 0 : 100); } # epoll_ctl wrapper @@ -278,41 +235,92 @@ sub epoll_ctl_mod8 { # epoll_wait wrapper # ARGS: (epfd, maxevents, timeout (milliseconds), arrayref) # arrayref: values modified to be [$fd, $event] -our $epoll_wait_events; +our $epoll_wait_events = ''; our $epoll_wait_size = 0; sub epoll_wait_mod4 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 12 x $epoll_wait_size; - } - my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - for (0..$ct-1) { - @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8)); - } - return $ct; + my ($epfd, $maxevents, $timeout_msec, $events) = @_; + # resize our static buffer if maxevents bigger than we've ever done + if ($maxevents > $epoll_wait_size) { + $epoll_wait_size = $maxevents; + vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0; + } + @$events = (); + my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events, + $maxevents, $timeout_msec); + for (0..$ct - 1) { + # 12-byte struct epoll_event + # 4 bytes uint32_t events mask (skipped, useless to us) + # 8 bytes: epoll_data_t union (first 4 bytes are the fd) + # So we skip the first 4 bytes and take the middle 4: + $events->[$_] = unpack('L', substr($epoll_wait_events, + 12 * $_ + 4, 4)); + } } sub epoll_wait_mod8 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 16 x $epoll_wait_size; - } - my $ct; - if ($no_deprecated) { - $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef); - } else { - $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - } - for (0..$ct-1) { - # 16 byte epoll_event structs, with format: - # 4 byte mask [idx 1] - # 4 byte padding (we put it into idx 2, useless) - # 8 byte data (first 4 bytes are fd, into idx 0) - @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12)); - } - return $ct; + my ($epfd, $maxevents, $timeout_msec, $events) = @_; + + # resize our static buffer if maxevents bigger than we've ever done + if ($maxevents > $epoll_wait_size) { + $epoll_wait_size = $maxevents; + vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0; + } + @$events = (); + my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events, + $maxevents, $timeout_msec, + $no_deprecated ? undef : ()); + for (0..$ct - 1) { + # 16-byte struct epoll_event + # 4 bytes uint32_t events mask (skipped, useless to us) + # 4 bytes padding (skipped, useless) + # 8 bytes epoll_data_t union (first 4 bytes are the fd) + # So skip the first 8 bytes, take 4, and ignore the last 4: + $events->[$_] = unpack('L', substr($epoll_wait_events, + 16 * $_ + 8, 4)); + } +} + +sub signalfd ($$) { + my ($signos, $nonblock) = @_; + if ($SYS_signalfd4) { + my $set = POSIX::SigSet->new(@$signos); + syscall($SYS_signalfd4, -1, "$$set", + # $Config{sig_count} is NSIG, so this is NSIG/8: + int($Config{sig_count}/8), + # SFD_NONBLOCK == O_NONBLOCK for every architecture + ($nonblock ? O_NONBLOCK : 0) |$SFD_CLOEXEC); + } else { + $! = ENOSYS; + undef; + } +} + +sub _rename_noreplace_racy ($$) { + my ($old, $new) = @_; + if (link($old, $new)) { + warn "unlink $old: $!\n" if !unlink($old) && $! != ENOENT; + 1 + } else { + undef; + } +} + +# TODO: support FD args? +sub rename_noreplace ($$) { + my ($old, $new) = @_; + if ($SYS_renameat2) { # RENAME_NOREPLACE = 1, AT_FDCWD = -100 + my $ret = syscall($SYS_renameat2, -100, $old, -100, $new, 1); + if ($ret == 0) { + 1; # like rename() perlop + } elsif ($! == ENOSYS) { + undef $SYS_renameat2; + _rename_noreplace_racy($old, $new); + } else { + undef + } + } else { + _rename_noreplace_racy($old, $new); + } } 1;