# specifically the Debian libsys-syscall-perl 0.25-6 version to
# fix upstream regressions in 0.25.
#
+# See devel/syscall-list in the public-inbox source tree for maintenance
+# <https://80x24.org/public-inbox.git>, and machines from the GCC Farm:
+# <https://cfarm.tetaneutral.net/>
+#
# This license differs from the rest of public-inbox
#
# This module is Copyright (c) 2005 Six Apart, Ltd.
-# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
#
# All rights reserved.
#
# You may distribute under the terms of either the GNU General Public
# License or the Artistic License, as specified in the Perl README file.
package PublicInbox::Syscall;
-use strict;
-use POSIX qw(ENOSYS SEEK_CUR);
+use v5.12;
+use parent qw(Exporter);
+use POSIX qw(ENOENT ENOSYS EINVAL O_NONBLOCK);
+use Socket qw(SOL_SOCKET SCM_RIGHTS);
use Config;
+our %SIGNUM = (WINCH => 28); # most Linux, {Free,Net,Open}BSD, *Darwin
-require Exporter;
-use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION);
-
-$VERSION = "0.25";
-@ISA = qw(Exporter);
-@EXPORT_OK = qw(sendfile epoll_ctl epoll_create epoll_wait
- EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND
+# $VERSION = '0.25'; # Sys::Syscall version
+our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait
+ EPOLLIN EPOLLOUT EPOLLET
EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
- EPOLLEXCLUSIVE);
-%EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
- EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND
+ EPOLLONESHOT EPOLLEXCLUSIVE
+ signalfd rename_noreplace %SIGNUM);
+our %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
+ EPOLLIN EPOLLOUT
EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
- EPOLLEXCLUSIVE)],
- sendfile => [qw(sendfile)],
+ EPOLLONESHOT EPOLLEXCLUSIVE)],
);
-use constant EPOLLIN => 1;
-use constant EPOLLOUT => 4;
-use constant EPOLLERR => 8;
-use constant EPOLLHUP => 16;
-use constant EPOLLRDBAND => 128;
-use constant EPOLLEXCLUSIVE => (1 << 28);
-use constant EPOLL_CTL_ADD => 1;
-use constant EPOLL_CTL_DEL => 2;
-use constant EPOLL_CTL_MOD => 3;
-
-our $loaded_syscall = 0;
-
-sub _load_syscall {
- # props to Gaal for this!
- return if $loaded_syscall++;
- my $clean = sub {
- delete @INC{qw<syscall.ph asm/unistd.ph bits/syscall.ph
- _h2ph_pre.ph sys/syscall.ph>};
- };
- $clean->(); # don't trust modules before us
- my $rv = eval { require 'syscall.ph'; 1 } || eval { require 'sys/syscall.ph'; 1 };
- $clean->(); # don't require modules after us trust us
- return $rv;
-}
-
-our ($sysname, $nodename, $release, $version, $machine) = POSIX::uname();
+use constant {
+ EPOLLIN => 1,
+ EPOLLOUT => 4,
+ # EPOLLERR => 8,
+ # EPOLLHUP => 16,
+ # EPOLLRDBAND => 128,
+ EPOLLEXCLUSIVE => (1 << 28),
+ EPOLLONESHOT => (1 << 30),
+ EPOLLET => (1 << 31),
+ EPOLL_CTL_ADD => 1,
+ EPOLL_CTL_DEL => 2,
+ EPOLL_CTL_MOD => 3,
+ SIZEOF_int => $Config{intsize},
+ SIZEOF_size_t => $Config{sizesize},
+ NUL => "\0",
+};
+
+use constant {
+ TMPL_size_t => SIZEOF_size_t == 8 ? 'Q' : 'L',
+ BYTES_4_hole => SIZEOF_size_t == 8 ? 'L' : '',
+ # cmsg_len, cmsg_level, cmsg_type
+ SIZEOF_cmsghdr => SIZEOF_int * 2 + SIZEOF_size_t,
+};
+
+my @BYTES_4_hole = BYTES_4_hole ? (0) : ();
our (
$SYS_epoll_create,
$SYS_epoll_ctl,
$SYS_epoll_wait,
- $SYS_sendfile,
- $SYS_readahead,
+ $SYS_signalfd4,
+ $SYS_renameat2,
);
+my ($SYS_sendmsg, $SYS_recvmsg);
+my $SYS_fstatfs; # don't need fstatfs64, just statfs.f_type
+my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS);
+my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
our $no_deprecated = 0;
if ($^O eq "linux") {
+ my (undef, undef, $release, undef, $machine) = POSIX::uname();
+ my ($maj, $min) = ($release =~ /\A([0-9]+)\.([0-9]+)/);
+ $SYS_renameat2 = 0 if "$maj.$min" < 3.15;
# whether the machine requires 64-bit numbers to be on 8-byte
# boundaries.
my $u64_mod_8 = 0;
- # if we're running on an x86_64 kernel, but a 32-bit process,
- # we need to use the i386 syscall numbers.
- if ($machine eq "x86_64" && $Config{ptrsize} == 4) {
- $machine = "i386";
- }
-
- # Similarly for mips64 vs mips
- if ($machine eq "mips64" && $Config{ptrsize} == 4) {
- $machine = "mips";
+ if ($Config{ptrsize} == 4) {
+ # if we're running on an x86_64 kernel, but a 32-bit process,
+ # we need to use the x32 or i386 syscall numbers.
+ if ($machine eq 'x86_64') {
+ my $s = $Config{cppsymbols};
+ $machine = ($s =~ /\b__ILP32__=1\b/ && $s =~ /\b__x86_64__=1\b/) ?
+ 'x32' : 'i386'
+ } elsif ($machine eq 'mips64') { # similarly for mips64 vs mips
+ $machine = 'mips';
+ }
}
if ($machine =~ m/^i[3456]86$/) {
$SYS_epoll_create = 254;
$SYS_epoll_ctl = 255;
$SYS_epoll_wait = 256;
- $SYS_sendfile = 187; # or 64: 239
- $SYS_readahead = 225;
+ $SYS_signalfd4 = 327;
+ $SYS_renameat2 //= 353;
+ $SYS_fstatfs = 100;
+ $SYS_sendmsg = 370;
+ $SYS_recvmsg = 372;
+ $FS_IOC_GETFLAGS = 0x80046601;
+ $FS_IOC_SETFLAGS = 0x40046602;
} elsif ($machine eq "x86_64") {
$SYS_epoll_create = 213;
$SYS_epoll_ctl = 233;
$SYS_epoll_wait = 232;
- $SYS_sendfile = 40;
- $SYS_readahead = 187;
+ $SYS_signalfd4 = 289;
+ $SYS_renameat2 //= 316;
+ $SYS_fstatfs = 138;
+ $SYS_sendmsg = 46;
+ $SYS_recvmsg = 47;
+ $FS_IOC_GETFLAGS = 0x80086601;
+ $FS_IOC_SETFLAGS = 0x40086602;
+ } elsif ($machine eq 'x32') {
+ $SYS_epoll_create = 1073742037;
+ $SYS_epoll_ctl = 1073742057;
+ $SYS_epoll_wait = 1073742056;
+ $SYS_signalfd4 = 1073742113;
+ $SYS_renameat2 //= 0x40000000 + 316;
+ $SYS_fstatfs = 138;
+ $SYS_sendmsg = 0x40000206;
+ $SYS_recvmsg = 0x40000207;
+ $FS_IOC_GETFLAGS = 0x80046601;
+ $FS_IOC_SETFLAGS = 0x40046602;
+ } elsif ($machine eq 'sparc64') {
+ $SYS_epoll_create = 193;
+ $SYS_epoll_ctl = 194;
+ $SYS_epoll_wait = 195;
+ $u64_mod_8 = 1;
+ $SYS_signalfd4 = 317;
+ $SYS_renameat2 //= 345;
+ $SFD_CLOEXEC = 020000000;
+ $SYS_fstatfs = 158;
+ $SYS_sendmsg = 114;
+ $SYS_recvmsg = 113;
+ $FS_IOC_GETFLAGS = 0x40086601;
+ $FS_IOC_SETFLAGS = 0x80086602;
} elsif ($machine =~ m/^parisc/) {
$SYS_epoll_create = 224;
$SYS_epoll_ctl = 225;
$SYS_epoll_wait = 226;
- $SYS_sendfile = 122; # sys_sendfile64=209
- $SYS_readahead = 207;
$u64_mod_8 = 1;
+ $SYS_signalfd4 = 309;
+ $SIGNUM{WINCH} = 23;
} elsif ($machine =~ m/^ppc64/) {
$SYS_epoll_create = 236;
$SYS_epoll_ctl = 237;
$SYS_epoll_wait = 238;
- $SYS_sendfile = 186; # (sys32_sendfile). sys32_sendfile64=226 (64 bit processes: sys_sendfile64=186)
- $SYS_readahead = 191; # both 32-bit and 64-bit vesions
$u64_mod_8 = 1;
+ $SYS_signalfd4 = 313;
+ $SYS_renameat2 //= 357;
+ $SYS_fstatfs = 100;
+ $SYS_sendmsg = 341;
+ $SYS_recvmsg = 342;
+ $FS_IOC_GETFLAGS = 0x40086601;
+ $FS_IOC_SETFLAGS = 0x80086602;
} elsif ($machine eq "ppc") {
$SYS_epoll_create = 236;
$SYS_epoll_ctl = 237;
$SYS_epoll_wait = 238;
- $SYS_sendfile = 186; # sys_sendfile64=226
- $SYS_readahead = 191;
$u64_mod_8 = 1;
- } elsif ($machine =~ m/^s390/) {
+ $SYS_signalfd4 = 313;
+ $SYS_renameat2 //= 357;
+ $SYS_fstatfs = 100;
+ $FS_IOC_GETFLAGS = 0x40086601;
+ $FS_IOC_SETFLAGS = 0x80086602;
+ } elsif ($machine =~ m/^s390/) { # untested, no machine on cfarm
$SYS_epoll_create = 249;
$SYS_epoll_ctl = 250;
$SYS_epoll_wait = 251;
- $SYS_sendfile = 187; # sys_sendfile64=223
- $SYS_readahead = 222;
$u64_mod_8 = 1;
- } elsif ($machine eq "ia64") {
+ $SYS_signalfd4 = 322;
+ $SYS_renameat2 //= 347;
+ $SYS_fstatfs = 100;
+ $SYS_sendmsg = 370;
+ $SYS_recvmsg = 372;
+ } elsif ($machine eq 'ia64') { # untested, no machine on cfarm
$SYS_epoll_create = 1243;
$SYS_epoll_ctl = 1244;
$SYS_epoll_wait = 1245;
- $SYS_sendfile = 1187;
- $SYS_readahead = 1216;
$u64_mod_8 = 1;
- } elsif ($machine eq "alpha") {
+ $SYS_signalfd4 = 289;
+ } elsif ($machine eq "alpha") { # untested, no machine on cfarm
# natural alignment, ints are 32-bits
- $SYS_sendfile = 370; # (sys_sendfile64)
$SYS_epoll_create = 407;
$SYS_epoll_ctl = 408;
$SYS_epoll_wait = 409;
- $SYS_readahead = 379;
$u64_mod_8 = 1;
- } elsif ($machine eq "aarch64") {
+ $SYS_signalfd4 = 484;
+ $SFD_CLOEXEC = 010000000;
+ } elsif ($machine =~ /\A(?:loong)?aarch64\z/ || $machine eq 'riscv64') {
$SYS_epoll_create = 20; # (sys_epoll_create1)
$SYS_epoll_ctl = 21;
$SYS_epoll_wait = 22; # (sys_epoll_pwait)
- $SYS_sendfile = 71; # (sys_sendfile64)
- $SYS_readahead = 213;
$u64_mod_8 = 1;
$no_deprecated = 1;
- } elsif ($machine =~ m/arm(v\d+)?.*l/) {
- # ARM OABI
+ $SYS_signalfd4 = 74;
+ $SYS_renameat2 //= 276;
+ $SYS_fstatfs = 44;
+ $SYS_sendmsg = 211;
+ $SYS_recvmsg = 212;
+ $FS_IOC_GETFLAGS = 0x80086601;
+ $FS_IOC_SETFLAGS = 0x40086602;
+ } elsif ($machine =~ m/arm(v\d+)?.*l/) { # ARM OABI (untested on cfarm)
$SYS_epoll_create = 250;
$SYS_epoll_ctl = 251;
$SYS_epoll_wait = 252;
- $SYS_sendfile = 187;
- $SYS_readahead = 225;
$u64_mod_8 = 1;
- } elsif ($machine =~ m/^mips64/) {
- $SYS_sendfile = 5039;
+ $SYS_signalfd4 = 355;
+ $SYS_renameat2 //= 382;
+ $SYS_fstatfs = 100;
+ $SYS_sendmsg = 296;
+ $SYS_recvmsg = 297;
+ } elsif ($machine =~ m/^mips64/) { # cfarm only has 32-bit userspace
$SYS_epoll_create = 5207;
$SYS_epoll_ctl = 5208;
$SYS_epoll_wait = 5209;
- $SYS_readahead = 5179;
$u64_mod_8 = 1;
- } elsif ($machine =~ m/^mips/) {
- $SYS_sendfile = 4207;
+ $SYS_signalfd4 = 5283;
+ $SYS_renameat2 //= 5311;
+ $SYS_fstatfs = 5135;
+ $SYS_sendmsg = 5045;
+ $SYS_recvmsg = 5046;
+ $FS_IOC_GETFLAGS = 0x40046601;
+ $FS_IOC_SETFLAGS = 0x80046602;
+ } elsif ($machine =~ m/^mips/) { # 32-bit, tested on mips64 cfarm machine
$SYS_epoll_create = 4248;
$SYS_epoll_ctl = 4249;
$SYS_epoll_wait = 4250;
- $SYS_readahead = 4223;
$u64_mod_8 = 1;
+ $SYS_signalfd4 = 4324;
+ $SYS_renameat2 //= 4351;
+ $SYS_fstatfs = 4100;
+ $SYS_sendmsg = 4179;
+ $SYS_recvmsg = 4177;
+ $FS_IOC_GETFLAGS = 0x40046601;
+ $FS_IOC_SETFLAGS = 0x80046602;
+ $SIGNUM{WINCH} = 20;
} else {
- # as a last resort, try using the *.ph files which may not
- # exist or may be wrong
- _load_syscall();
- $SYS_epoll_create = eval { &SYS_epoll_create; } || 0;
- $SYS_epoll_ctl = eval { &SYS_epoll_ctl; } || 0;
- $SYS_epoll_wait = eval { &SYS_epoll_wait; } || 0;
- $SYS_readahead = eval { &SYS_readahead; } || 0;
+ warn <<EOM;
+machine=$machine ptrsize=$Config{ptrsize} has no syscall definitions
+git clone https://80x24.org/public-inbox.git and
+Send the output of ./devel/syscall-list to meta\@public-inbox.org
+EOM
}
-
if ($u64_mod_8) {
*epoll_wait = \&epoll_wait_mod8;
*epoll_ctl = \&epoll_ctl_mod8;
*epoll_ctl = \&epoll_ctl_mod4;
}
}
-
-elsif ($^O eq "freebsd") {
- if ($ENV{FREEBSD_SENDFILE}) {
- # this is still buggy and in development
- $SYS_sendfile = 393; # old is 336
- }
-}
-
-############################################################################
-# sendfile functions
-############################################################################
-
-unless ($SYS_sendfile) {
- _load_syscall();
- $SYS_sendfile = eval { &SYS_sendfile; } || 0;
-}
-
-sub sendfile_defined { return $SYS_sendfile ? 1 : 0; }
-
-if ($^O eq "linux" && $SYS_sendfile) {
- *sendfile = \&sendfile_linux;
-} elsif ($^O eq "freebsd" && $SYS_sendfile) {
- *sendfile = \&sendfile_freebsd;
-} else {
- *sendfile = \&sendfile_noimpl;
-}
-
-sub sendfile_noimpl {
- $! = ENOSYS;
- return -1;
-}
-
-# C: ssize_t sendfile(int out_fd, int in_fd, off_t *offset, size_t count)
-# Perl: sendfile($write_fd, $read_fd, $max_count) --> $actually_sent
-sub sendfile_linux {
- return syscall(
- $SYS_sendfile,
- $_[0] + 0, # fd
- $_[1] + 0, # fd
- 0, # don't keep track of offset. callers can lseek and keep track.
- $_[2] + 0 # count
- );
-}
-
-sub sendfile_freebsd {
- my $offset = POSIX::lseek($_[1]+0, 0, SEEK_CUR) + 0;
- my $ct = $_[2] + 0;
- my $sbytes_buf = "\0" x 8;
- my $rv = syscall(
- $SYS_sendfile,
- $_[1] + 0, # fd (from)
- $_[0] + 0, # socket (to)
- $offset,
- $ct,
- 0, # struct sf_hdtr *hdtr
- $sbytes_buf, # off_t *sbytes
- 0); # flags
- return $rv if $rv < 0;
-
-
- my $set = unpack("L", $sbytes_buf);
- POSIX::lseek($_[1]+0, SEEK_CUR, $set);
- return $set;
-}
-
+# use Inline::C for *BSD-only or general POSIX stuff.
+# Linux guarantees stable syscall numbering, BSDs only offer a stable libc
+# use devel/syscall-list on Linux to detect new syscall numbers
############################################################################
# epoll functions
############################################################################
-sub epoll_defined { return $SYS_epoll_create ? 1 : 0; }
-
-# ARGS: (size) -- but in modern Linux 2.6, the
-# size doesn't even matter (radix tree now, not hash)
sub epoll_create {
- return -1 unless defined $SYS_epoll_create;
- my $epfd = eval { syscall($SYS_epoll_create, $no_deprecated ? 0 : ($_[0]||100)+0) };
- return -1 if $@;
- return $epfd;
+ syscall($SYS_epoll_create, $no_deprecated ? 0 : 100);
}
# epoll_ctl wrapper
# epoll_wait wrapper
# ARGS: (epfd, maxevents, timeout (milliseconds), arrayref)
# arrayref: values modified to be [$fd, $event]
-our $epoll_wait_events;
+our $epoll_wait_events = '';
our $epoll_wait_size = 0;
sub epoll_wait_mod4 {
- # resize our static buffer if requested size is bigger than we've ever done
- if ($_[1] > $epoll_wait_size) {
- $epoll_wait_size = $_[1];
- $epoll_wait_events = "\0" x 12 x $epoll_wait_size;
- }
- my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
- for (0..$ct-1) {
- @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8));
- }
- return $ct;
+ my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+ # resize our static buffer if maxevents bigger than we've ever done
+ if ($maxevents > $epoll_wait_size) {
+ $epoll_wait_size = $maxevents;
+ vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0;
+ }
+ @$events = ();
+ my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+ $maxevents, $timeout_msec);
+ for (0..$ct - 1) {
+ # 12-byte struct epoll_event
+ # 4 bytes uint32_t events mask (skipped, useless to us)
+ # 8 bytes: epoll_data_t union (first 4 bytes are the fd)
+ # So we skip the first 4 bytes and take the middle 4:
+ $events->[$_] = unpack('L', substr($epoll_wait_events,
+ 12 * $_ + 4, 4));
+ }
}
sub epoll_wait_mod8 {
- # resize our static buffer if requested size is bigger than we've ever done
- if ($_[1] > $epoll_wait_size) {
- $epoll_wait_size = $_[1];
- $epoll_wait_events = "\0" x 16 x $epoll_wait_size;
- }
- my $ct;
- if ($no_deprecated) {
- $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef);
- } else {
- $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
- }
- for (0..$ct-1) {
- # 16 byte epoll_event structs, with format:
- # 4 byte mask [idx 1]
- # 4 byte padding (we put it into idx 2, useless)
- # 8 byte data (first 4 bytes are fd, into idx 0)
- @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12));
- }
- return $ct;
+ my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+
+ # resize our static buffer if maxevents bigger than we've ever done
+ if ($maxevents > $epoll_wait_size) {
+ $epoll_wait_size = $maxevents;
+ vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0;
+ }
+ @$events = ();
+ my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+ $maxevents, $timeout_msec,
+ $no_deprecated ? undef : ());
+ for (0..$ct - 1) {
+ # 16-byte struct epoll_event
+ # 4 bytes uint32_t events mask (skipped, useless to us)
+ # 4 bytes padding (skipped, useless)
+ # 8 bytes epoll_data_t union (first 4 bytes are the fd)
+ # So skip the first 8 bytes, take 4, and ignore the last 4:
+ $events->[$_] = unpack('L', substr($epoll_wait_events,
+ 16 * $_ + 8, 4));
+ }
+}
+
+sub signalfd ($$) {
+ my ($signos, $nonblock) = @_;
+ if ($SYS_signalfd4) {
+ my $set = POSIX::SigSet->new(@$signos);
+ syscall($SYS_signalfd4, -1, "$$set",
+ # $Config{sig_count} is NSIG, so this is NSIG/8:
+ int($Config{sig_count}/8),
+ # SFD_NONBLOCK == O_NONBLOCK for every architecture
+ ($nonblock ? O_NONBLOCK : 0) |$SFD_CLOEXEC);
+ } else {
+ $! = ENOSYS;
+ undef;
+ }
+}
+
+sub _rename_noreplace_racy ($$) {
+ my ($old, $new) = @_;
+ if (link($old, $new)) {
+ warn "unlink $old: $!\n" if !unlink($old) && $! != ENOENT;
+ 1
+ } else {
+ undef;
+ }
+}
+
+# TODO: support FD args?
+sub rename_noreplace ($$) {
+ my ($old, $new) = @_;
+ if ($SYS_renameat2) { # RENAME_NOREPLACE = 1, AT_FDCWD = -100
+ my $ret = syscall($SYS_renameat2, -100, $old, -100, $new, 1);
+ if ($ret == 0) {
+ 1; # like rename() perlop
+ } elsif ($! == ENOSYS || $! == EINVAL) {
+ undef $SYS_renameat2;
+ _rename_noreplace_racy($old, $new);
+ } else {
+ undef
+ }
+ } else {
+ _rename_noreplace_racy($old, $new);
+ }
+}
+
+sub nodatacow_fh ($) {
+ my ($fh) = @_;
+ my $buf = "\0" x 120;
+ syscall($SYS_fstatfs // return, fileno($fh), $buf) == 0 or
+ return warn("fstatfs: $!\n");
+ my $f_type = unpack('l!', $buf); # statfs.f_type is a signed word
+ return if $f_type != 0x9123683E; # BTRFS_SUPER_MAGIC
+
+ $FS_IOC_GETFLAGS //
+ return warn('FS_IOC_GETFLAGS undefined for platform');
+ ioctl($fh, $FS_IOC_GETFLAGS, $buf) //
+ return warn("FS_IOC_GETFLAGS: $!\n");
+ my $attr = unpack('l!', $buf);
+ return if ($attr & 0x00800000); # FS_NOCOW_FL;
+ ioctl($fh, $FS_IOC_SETFLAGS, pack('l', $attr | 0x00800000)) //
+ warn("FS_IOC_SETFLAGS: $!\n");
+}
+
+sub nodatacow_dir {
+ if (open my $fh, '<', $_[0]) { nodatacow_fh($fh) }
+}
+
+sub CMSG_ALIGN ($) { ($_[0] + SIZEOF_size_t - 1) & ~(SIZEOF_size_t - 1) }
+use constant CMSG_ALIGN_SIZEOF_cmsghdr => CMSG_ALIGN(SIZEOF_cmsghdr);
+sub CMSG_SPACE ($) { CMSG_ALIGN($_[0]) + CMSG_ALIGN_SIZEOF_cmsghdr }
+sub CMSG_LEN ($) { CMSG_ALIGN_SIZEOF_cmsghdr + $_[0] }
+use constant msg_controllen => CMSG_SPACE(10 * SIZEOF_int) + 16; # 10 FDs
+
+if (defined($SYS_sendmsg) && defined($SYS_recvmsg)) {
+no warnings 'once';
+*send_cmd4 = sub ($$$$) {
+ my ($sock, $fds, undef, $flags) = @_;
+ my $iov = pack('P'.TMPL_size_t,
+ $_[2] // NUL, length($_[2] // NUL) || 1);
+ my $cmsghdr = pack(TMPL_size_t . # cmsg_len
+ 'LL' . # cmsg_level, cmsg_type,
+ ('i' x scalar(@$fds)),
+ CMSG_LEN(scalar(@$fds) * SIZEOF_int), # cmsg_len
+ SOL_SOCKET, SCM_RIGHTS, # cmsg_{level,type}
+ @$fds); # CMSG_DATA
+ my $mh = pack('PL' . # msg_name, msg_namelen (socklen_t (U32))
+ BYTES_4_hole . # 4-byte padding on 64-bit
+ 'P'.TMPL_size_t . # msg_iov, msg_iovlen,
+ 'P'.TMPL_size_t . # msg_control, msg_controllen,
+ 'i', # msg_flags
+ NUL, 0, # msg_name, msg_namelen (unused)
+ @BYTES_4_hole,
+ $iov, 1, # msg_iov, msg_iovlen
+ $cmsghdr, # msg_control
+ CMSG_SPACE(scalar(@$fds) * SIZEOF_int), # msg_controllen
+ 0); # msg_flags
+ my $sent;
+ my $try = 0;
+ do {
+ $sent = syscall($SYS_sendmsg, fileno($sock), $mh, $flags);
+ } while ($sent < 0 &&
+ ($!{ENOBUFS} || $!{ENOMEM} || $!{ETOOMANYREFS}) &&
+ (++$try < 50) &&
+ warn "sleeping on sendmsg: $! (#$try)\n" &&
+ select(undef, undef, undef, 0.1) == 0);
+ $sent >= 0 ? $sent : undef;
+};
+
+*recv_cmd4 = sub ($$$) {
+ my ($sock, undef, $len) = @_;
+ vec($_[1] //= '', ($len + 1) * 8, 1) = 0;
+ my $cmsghdr = "\0" x msg_controllen; # 10 * sizeof(int)
+ my $iov = pack('P'.TMPL_size_t, $_[1], $len);
+ my $mh = pack('PL' . # msg_name, msg_namelen (socklen_t (U32))
+ BYTES_4_hole . # 4-byte padding on 64-bit
+ 'P'.TMPL_size_t . # msg_iov, msg_iovlen,
+ 'P'.TMPL_size_t . # msg_control, msg_controllen,
+ 'i', # msg_flags
+ NUL, 0, # msg_name, msg_namelen (unused)
+ @BYTES_4_hole,
+ $iov, 1, # msg_iov, msg_iovlen
+ $cmsghdr, # msg_control
+ msg_controllen,
+ 0); # msg_flags
+ my $r = syscall($SYS_recvmsg, fileno($sock), $mh, 0);
+ return (undef) if $r < 0; # $! set
+ substr($_[1], $r, length($_[1]), '');
+ my @ret;
+ if ($r > 0) {
+ my ($len, $lvl, $type, @fds) = unpack(TMPL_size_t . # cmsg_len
+ 'LLi*', # cmsg_level, cmsg_type, @fds
+ $cmsghdr);
+ if ($lvl == SOL_SOCKET && $type == SCM_RIGHTS) {
+ $len -= CMSG_ALIGN_SIZEOF_cmsghdr;
+ @ret = @fds[0..(($len / SIZEOF_int) - 1)];
+ }
+ }
+ @ret;
+};
}
1;