1 # This is a fork of the (for now) unmaintained Sys::Syscall 0.25,
2 # specifically the Debian libsys-syscall-perl 0.25-6 version to
3 # fix upstream regressions in 0.25.
5 # See devel/syscall-list in the public-inbox source tree for maintenance
6 # <https://80x24.org/public-inbox.git>, and machines from the GCC Farm:
7 # <https://cfarm.tetaneutral.net/>
9 # This license differs from the rest of public-inbox
11 # This module is Copyright (c) 2005 Six Apart, Ltd.
12 # Copyright (C) all contributors <meta@public-inbox.org>
14 # All rights reserved.
16 # You may distribute under the terms of either the GNU General Public
17 # License or the Artistic License, as specified in the Perl README file.
18 package PublicInbox::Syscall;
20 use parent qw(Exporter);
21 use POSIX qw(ENOENT ENOSYS EINVAL O_NONBLOCK);
22 use Socket qw(SOL_SOCKET SCM_RIGHTS);
24 our %SIGNUM = (WINCH => 28); # most Linux, {Free,Net,Open}BSD, *Darwin
26 # $VERSION = '0.25'; # Sys::Syscall version
27 our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait
28 EPOLLIN EPOLLOUT EPOLLET
29 EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
30 EPOLLONESHOT EPOLLEXCLUSIVE
31 signalfd rename_noreplace %SIGNUM);
32 our %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
34 EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
35 EPOLLONESHOT EPOLLEXCLUSIVE)],
44 EPOLLEXCLUSIVE => (1 << 28),
45 EPOLLONESHOT => (1 << 30),
50 SIZEOF_int => $Config{intsize},
51 SIZEOF_size_t => $Config{sizesize},
56 TMPL_size_t => SIZEOF_size_t == 8 ? 'Q' : 'L',
57 BYTES_4_hole => SIZEOF_size_t == 8 ? 'L' : '',
58 # cmsg_len, cmsg_level, cmsg_type
59 SIZEOF_cmsghdr => SIZEOF_int * 2 + SIZEOF_size_t,
62 my @BYTES_4_hole = BYTES_4_hole ? (0) : ();
63 our $loaded_syscall = 0;
66 # props to Gaal for this!
67 return if $loaded_syscall++;
69 delete @INC{qw<syscall.ph asm/unistd.ph bits/syscall.ph
70 _h2ph_pre.ph sys/syscall.ph>};
72 $clean->(); # don't trust modules before us
73 my $rv = eval { require 'syscall.ph'; 1 } || eval { require 'sys/syscall.ph'; 1 };
74 $clean->(); # don't require modules after us trust us
87 my ($SYS_sendmsg, $SYS_recvmsg);
88 my $SYS_fstatfs; # don't need fstatfs64, just statfs.f_type
89 my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS);
90 my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
91 our $no_deprecated = 0;
94 my (undef, undef, $release, undef, $machine) = POSIX::uname();
95 my ($maj, $min) = ($release =~ /\A([0-9]+)\.([0-9]+)/);
96 $SYS_renameat2 = 0 if "$maj.$min" < 3.15;
97 # whether the machine requires 64-bit numbers to be on 8-byte
101 if ($Config{ptrsize} == 4) {
102 # if we're running on an x86_64 kernel, but a 32-bit process,
103 # we need to use the x32 or i386 syscall numbers.
104 if ($machine eq 'x86_64') {
105 $machine = $Config{cppsymbols} =~ /\b__ILP32__=1\b/ ? 'x32' : 'i386'
106 } elsif ($machine eq 'mips64') { # similarly for mips64 vs mips
111 if ($machine =~ m/^i[3456]86$/) {
112 $SYS_epoll_create = 254;
113 $SYS_epoll_ctl = 255;
114 $SYS_epoll_wait = 256;
115 $SYS_signalfd4 = 327;
116 $SYS_renameat2 //= 353;
120 $FS_IOC_GETFLAGS = 0x80046601;
121 $FS_IOC_SETFLAGS = 0x40046602;
122 } elsif ($machine eq "x86_64") {
123 $SYS_epoll_create = 213;
124 $SYS_epoll_ctl = 233;
125 $SYS_epoll_wait = 232;
126 $SYS_signalfd4 = 289;
127 $SYS_renameat2 //= 316;
131 $FS_IOC_GETFLAGS = 0x80086601;
132 $FS_IOC_SETFLAGS = 0x40086602;
133 } elsif ($machine eq 'x32') {
134 $SYS_epoll_create = 1073742037;
135 $SYS_epoll_ctl = 1073742057;
136 $SYS_epoll_wait = 1073742056;
137 $SYS_signalfd4 = 1073742113;
138 $SYS_renameat2 //= 0x40000000 + 316;
140 $SYS_sendmsg = 0x40000206;
141 $SYS_recvmsg = 0x40000207;
142 $FS_IOC_GETFLAGS = 0x80046601;
143 $FS_IOC_SETFLAGS = 0x40046602;
144 } elsif ($machine eq 'sparc64') {
145 $SYS_epoll_create = 193;
146 $SYS_epoll_ctl = 194;
147 $SYS_epoll_wait = 195;
149 $SYS_signalfd4 = 317;
150 $SYS_renameat2 //= 345;
151 $SFD_CLOEXEC = 020000000;
155 $FS_IOC_GETFLAGS = 0x40086601;
156 $FS_IOC_SETFLAGS = 0x80086602;
157 } elsif ($machine =~ m/^parisc/) {
158 $SYS_epoll_create = 224;
159 $SYS_epoll_ctl = 225;
160 $SYS_epoll_wait = 226;
162 $SYS_signalfd4 = 309;
164 } elsif ($machine =~ m/^ppc64/) {
165 $SYS_epoll_create = 236;
166 $SYS_epoll_ctl = 237;
167 $SYS_epoll_wait = 238;
169 $SYS_signalfd4 = 313;
170 $SYS_renameat2 //= 357;
174 $FS_IOC_GETFLAGS = 0x40086601;
175 $FS_IOC_SETFLAGS = 0x80086602;
176 } elsif ($machine eq "ppc") {
177 $SYS_epoll_create = 236;
178 $SYS_epoll_ctl = 237;
179 $SYS_epoll_wait = 238;
181 $SYS_signalfd4 = 313;
182 $SYS_renameat2 //= 357;
184 $FS_IOC_GETFLAGS = 0x40086601;
185 $FS_IOC_SETFLAGS = 0x80086602;
186 } elsif ($machine =~ m/^s390/) { # untested, no machine on cfarm
187 $SYS_epoll_create = 249;
188 $SYS_epoll_ctl = 250;
189 $SYS_epoll_wait = 251;
191 $SYS_signalfd4 = 322;
192 $SYS_renameat2 //= 347;
196 } elsif ($machine eq 'ia64') { # untested, no machine on cfarm
197 $SYS_epoll_create = 1243;
198 $SYS_epoll_ctl = 1244;
199 $SYS_epoll_wait = 1245;
201 $SYS_signalfd4 = 289;
202 } elsif ($machine eq "alpha") { # untested, no machine on cfarm
203 # natural alignment, ints are 32-bits
204 $SYS_epoll_create = 407;
205 $SYS_epoll_ctl = 408;
206 $SYS_epoll_wait = 409;
208 $SYS_signalfd4 = 484;
209 $SFD_CLOEXEC = 010000000;
210 } elsif ($machine =~ /\A(?:loong)?aarch64\z/ || $machine eq 'riscv64') {
211 $SYS_epoll_create = 20; # (sys_epoll_create1)
213 $SYS_epoll_wait = 22; # (sys_epoll_pwait)
217 $SYS_renameat2 //= 276;
221 $FS_IOC_GETFLAGS = 0x80086601;
222 $FS_IOC_SETFLAGS = 0x40086602;
223 } elsif ($machine =~ m/arm(v\d+)?.*l/) { # ARM OABI (untested on cfarm)
224 $SYS_epoll_create = 250;
225 $SYS_epoll_ctl = 251;
226 $SYS_epoll_wait = 252;
228 $SYS_signalfd4 = 355;
229 $SYS_renameat2 //= 382;
233 } elsif ($machine =~ m/^mips64/) { # cfarm only has 32-bit userspace
234 $SYS_epoll_create = 5207;
235 $SYS_epoll_ctl = 5208;
236 $SYS_epoll_wait = 5209;
238 $SYS_signalfd4 = 5283;
239 $SYS_renameat2 //= 5311;
243 $FS_IOC_GETFLAGS = 0x40046601;
244 $FS_IOC_SETFLAGS = 0x80046602;
245 } elsif ($machine =~ m/^mips/) { # 32-bit, tested on mips64 cfarm machine
246 $SYS_epoll_create = 4248;
247 $SYS_epoll_ctl = 4249;
248 $SYS_epoll_wait = 4250;
250 $SYS_signalfd4 = 4324;
251 $SYS_renameat2 //= 4351;
255 $FS_IOC_GETFLAGS = 0x40046601;
256 $FS_IOC_SETFLAGS = 0x80046602;
259 # as a last resort, try using the *.ph files which may not
260 # exist or may be wrong
262 $SYS_epoll_create = eval { &SYS_epoll_create; } || 0;
263 $SYS_epoll_ctl = eval { &SYS_epoll_ctl; } || 0;
264 $SYS_epoll_wait = eval { &SYS_epoll_wait; } || 0;
266 # Note: do NOT add new syscalls to depend on *.ph, here.
267 # Better to miss syscalls (so we can fallback to IO::Poll)
268 # than to use wrong ones, since the names are not stable
269 # (at least not on FreeBSD), if the actual numbers are.
273 *epoll_wait = \&epoll_wait_mod8;
274 *epoll_ctl = \&epoll_ctl_mod8;
276 *epoll_wait = \&epoll_wait_mod4;
277 *epoll_ctl = \&epoll_ctl_mod4;
280 # use Inline::C for *BSD-only or general POSIX stuff.
281 # Linux guarantees stable syscall numbering, BSDs only offer a stable libc
282 # use scripts/syscall-list on Linux to detect new syscall numbers
284 ############################################################################
286 ############################################################################
288 sub epoll_defined { $SYS_epoll_create ? 1 : 0; }
291 syscall($SYS_epoll_create, $no_deprecated ? 0 : 100);
295 # ARGS: (epfd, op, fd, events_mask)
297 syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLL", $_[3], $_[2], 0));
300 syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLLL", $_[3], 0, $_[2], 0));
304 # ARGS: (epfd, maxevents, timeout (milliseconds), arrayref)
305 # arrayref: values modified to be [$fd, $event]
306 our $epoll_wait_events = '';
307 our $epoll_wait_size = 0;
308 sub epoll_wait_mod4 {
309 my ($epfd, $maxevents, $timeout_msec, $events) = @_;
310 # resize our static buffer if maxevents bigger than we've ever done
311 if ($maxevents > $epoll_wait_size) {
312 $epoll_wait_size = $maxevents;
313 vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0;
316 my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
317 $maxevents, $timeout_msec);
319 # 12-byte struct epoll_event
320 # 4 bytes uint32_t events mask (skipped, useless to us)
321 # 8 bytes: epoll_data_t union (first 4 bytes are the fd)
322 # So we skip the first 4 bytes and take the middle 4:
323 $events->[$_] = unpack('L', substr($epoll_wait_events,
328 sub epoll_wait_mod8 {
329 my ($epfd, $maxevents, $timeout_msec, $events) = @_;
331 # resize our static buffer if maxevents bigger than we've ever done
332 if ($maxevents > $epoll_wait_size) {
333 $epoll_wait_size = $maxevents;
334 vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0;
337 my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
338 $maxevents, $timeout_msec,
339 $no_deprecated ? undef : ());
341 # 16-byte struct epoll_event
342 # 4 bytes uint32_t events mask (skipped, useless to us)
343 # 4 bytes padding (skipped, useless)
344 # 8 bytes epoll_data_t union (first 4 bytes are the fd)
345 # So skip the first 8 bytes, take 4, and ignore the last 4:
346 $events->[$_] = unpack('L', substr($epoll_wait_events,
352 my ($signos, $nonblock) = @_;
353 if ($SYS_signalfd4) {
354 my $set = POSIX::SigSet->new(@$signos);
355 syscall($SYS_signalfd4, -1, "$$set",
356 # $Config{sig_count} is NSIG, so this is NSIG/8:
357 int($Config{sig_count}/8),
358 # SFD_NONBLOCK == O_NONBLOCK for every architecture
359 ($nonblock ? O_NONBLOCK : 0) |$SFD_CLOEXEC);
366 sub _rename_noreplace_racy ($$) {
367 my ($old, $new) = @_;
368 if (link($old, $new)) {
369 warn "unlink $old: $!\n" if !unlink($old) && $! != ENOENT;
376 # TODO: support FD args?
377 sub rename_noreplace ($$) {
378 my ($old, $new) = @_;
379 if ($SYS_renameat2) { # RENAME_NOREPLACE = 1, AT_FDCWD = -100
380 my $ret = syscall($SYS_renameat2, -100, $old, -100, $new, 1);
382 1; # like rename() perlop
383 } elsif ($! == ENOSYS || $! == EINVAL) {
384 undef $SYS_renameat2;
385 _rename_noreplace_racy($old, $new);
390 _rename_noreplace_racy($old, $new);
394 sub nodatacow_fh ($) {
396 my $buf = "\0" x 120;
397 syscall($SYS_fstatfs // return, fileno($fh), $buf) == 0 or
398 return warn("fstatfs: $!\n");
399 my $f_type = unpack('l!', $buf); # statfs.f_type is a signed word
400 return if $f_type != 0x9123683E; # BTRFS_SUPER_MAGIC
403 return warn('FS_IOC_GETFLAGS undefined for platform');
404 ioctl($fh, $FS_IOC_GETFLAGS, $buf) //
405 return warn("FS_IOC_GETFLAGS: $!\n");
406 my $attr = unpack('l!', $buf);
407 return if ($attr & 0x00800000); # FS_NOCOW_FL;
408 ioctl($fh, $FS_IOC_SETFLAGS, pack('l', $attr | 0x00800000)) //
409 warn("FS_IOC_SETFLAGS: $!\n");
413 if (open my $fh, '<', $_[0]) { nodatacow_fh($fh) }
416 sub CMSG_ALIGN ($) { ($_[0] + SIZEOF_size_t - 1) & ~(SIZEOF_size_t - 1) }
417 use constant CMSG_ALIGN_SIZEOF_cmsghdr => CMSG_ALIGN(SIZEOF_cmsghdr);
418 sub CMSG_SPACE ($) { CMSG_ALIGN($_[0]) + CMSG_ALIGN_SIZEOF_cmsghdr }
419 sub CMSG_LEN ($) { CMSG_ALIGN_SIZEOF_cmsghdr + $_[0] }
420 use constant msg_controllen => CMSG_SPACE(10 * SIZEOF_int) + 16; # 10 FDs
422 if (defined($SYS_sendmsg) && defined($SYS_recvmsg)) {
424 *send_cmd4 = sub ($$$$) {
425 my ($sock, $fds, undef, $flags) = @_;
426 my $iov = pack('P'.TMPL_size_t,
427 $_[2] // NUL, length($_[2] // NUL) || 1);
428 my $cmsghdr = pack(TMPL_size_t . # cmsg_len
429 'LL' . # cmsg_level, cmsg_type,
430 ('i' x scalar(@$fds)),
431 CMSG_LEN(scalar(@$fds) * SIZEOF_int), # cmsg_len
432 SOL_SOCKET, SCM_RIGHTS, # cmsg_{level,type}
434 my $mh = pack('PL' . # msg_name, msg_namelen (socklen_t (U32))
435 BYTES_4_hole . # 4-byte padding on 64-bit
436 'P'.TMPL_size_t . # msg_iov, msg_iovlen,
437 'P'.TMPL_size_t . # msg_control, msg_controllen,
439 NUL, 0, # msg_name, msg_namelen (unused)
441 $iov, 1, # msg_iov, msg_iovlen
442 $cmsghdr, # msg_control
443 CMSG_SPACE(scalar(@$fds) * SIZEOF_int), # msg_controllen
448 $sent = syscall($SYS_sendmsg, fileno($sock), $mh, $flags);
449 } while ($sent < 0 &&
450 ($!{ENOBUFS} || $!{ENOMEM} || $!{ETOOMANYREFS}) &&
452 warn "sleeping on sendmsg: $! (#$try)\n" &&
453 select(undef, undef, undef, 0.1) == 0);
454 $sent >= 0 ? $sent : undef;
457 *recv_cmd4 = sub ($$$) {
458 my ($sock, undef, $len) = @_;
459 vec($_[1] //= '', ($len + 1) * 8, 1) = 0;
460 my $cmsghdr = "\0" x msg_controllen; # 10 * sizeof(int)
461 my $iov = pack('P'.TMPL_size_t, $_[1], $len);
462 my $mh = pack('PL' . # msg_name, msg_namelen (socklen_t (U32))
463 BYTES_4_hole . # 4-byte padding on 64-bit
464 'P'.TMPL_size_t . # msg_iov, msg_iovlen,
465 'P'.TMPL_size_t . # msg_control, msg_controllen,
467 NUL, 0, # msg_name, msg_namelen (unused)
469 $iov, 1, # msg_iov, msg_iovlen
470 $cmsghdr, # msg_control
473 my $r = syscall($SYS_recvmsg, fileno($sock), $mh, 0);
474 return (undef) if $r < 0; # $! set
475 substr($_[1], $r, length($_[1]), '');
478 my ($len, $lvl, $type, @fds) = unpack(TMPL_size_t . # cmsg_len
479 'LLi*', # cmsg_level, cmsg_type, @fds
481 if ($lvl == SOL_SOCKET && $type == SCM_RIGHTS) {
482 $len -= CMSG_ALIGN_SIZEOF_cmsghdr;
483 @ret = @fds[0..(($len / SIZEOF_int) - 1)];
494 This is free software. IT COMES WITHOUT WARRANTY OF ANY KIND.
498 Brad Fitzpatrick <brad@danga.com>