#!/usr/bin/perl -w
# Copyright (C) 2019-2021 all contributors
# License: AGPL-3.0+
#
# Parallel test runner which preloads code and reuses worker processes
# to give a nice speedup over prove(1). It also generates per-test
# .log files (similar to automake tests).
#
# *.t files run by this should not rely on global state.
#
# Usage: $PERL -I lib -w t/run.perl -j4
# Or via prove(1): prove -lvw t/run.perl :: -j4
use strict;
use v5.10.1;
use IO::Handle; # ->autoflush
use PublicInbox::TestCommon;
use PublicInbox::Spawn;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
use Errno qw(EINTR);
use Fcntl qw(:seek);
use POSIX qw(WNOHANG);
use File::Temp ();
my $jobs = 1;
my $repeat = 1;
$| = 1;
our $log_suffix = '.log';
my ($shuffle, %pids, @err);
GetOptions('j|jobs=i' => \$jobs,
'repeat=i' => \$repeat,
'log=s' => \$log_suffix,
's|shuffle' => \$shuffle,
) or die "Usage: $0 [-j JOBS] [--log=SUFFIX] [--repeat RUNS]";
if (($ENV{TEST_RUN_MODE} // 2) == 0) {
die "$0 is not compatible with TEST_RUN_MODE=0\n";
}
my @tests = scalar(@ARGV) ? @ARGV : glob('t/*.t');
open my $cwd_fh, '<', '.' or die "open .: $!";
open my $OLDOUT, '>&STDOUT' or die "dup STDOUT: $!";
open my $OLDERR, '>&STDERR' or die "dup STDERR: $!";
$OLDOUT->autoflush(1);
$OLDERR->autoflush(1);
my ($run_log, $tmp_rl);
my $rl = $ENV{TEST_RUN_LOG};
unless ($rl) {
$tmp_rl = File::Temp->new(CLEANUP => 1);
$rl = $tmp_rl->filename;
}
open $run_log, '+>>', $rl or die "open $rl: $!";
$run_log->autoflush(1); # one reader, many writers
key2sub($_) for @tests; # precache
my ($for_destroy, $lei_env, $lei_daemon_pid, $owner_pid);
# TEST_LEI_DAEMON_PERSIST is currently broken. I get ECONNRESET from
# lei even with high kern.ipc.soacceptqueue=1073741823 or SOMAXCONN, not
# sure why. Also, testing our internal inotify usage is unreliable
# because lei-daemon uses a single inotify FD for all clients.
if ($ENV{TEST_LEI_DAEMON_PERSIST} && !$ENV{TEST_LEI_DAEMON_PERSIST_DIR} &&
(PublicInbox::Spawn->can('recv_cmd4') ||
eval { require Socket::MsgHdr })) {
$lei_env = {};
($lei_env->{XDG_RUNTIME_DIR}, $for_destroy) = tmpdir;
$ENV{TEST_LEI_DAEMON_PERSIST_DIR} = $lei_env->{XDG_RUNTIME_DIR};
run_script([qw(lei daemon-pid)], $lei_env, { 1 => \$lei_daemon_pid });
chomp $lei_daemon_pid;
$lei_daemon_pid =~ /\A[0-9]+\z/ or die "no daemon pid: $lei_daemon_pid";
kill(0, $lei_daemon_pid) or die "kill $lei_daemon_pid: $!";
if (my $t = $ENV{GNU_TAIL}) {
system("$t --pid=$lei_daemon_pid -F " .
"$lei_env->{XDG_RUNTIME_DIR}/lei/errors.log >&2 &");
}
if (my $strace_cmd = $ENV{STRACE_CMD}) {
system("$strace_cmd -p $lei_daemon_pid &");
}
$owner_pid = $$;
}
if ($shuffle) {
require List::Util;
} elsif (open(my $prove_state, '<', '.prove') && eval { require YAML::XS }) {
# reuse "prove --state=save" data to start slowest tests, first
my $state = YAML::XS::Load(do { local $/; <$prove_state> });
my $t = $state->{tests};
@tests = sort {
($t->{$b}->{elapsed} // 0) <=> ($t->{$a}->{elapsed} // 0)
} @tests;
}
our $tb = Test::More->builder;
sub DIE (;$) {
print $OLDERR @_;
exit(1);
}
our ($worker, $worker_test);
sub test_status () {
$? = 255 if $? == 0 && !$tb->is_passing;
my $status = $? ? 'not ok' : 'ok';
chdir($cwd_fh) or DIE "fchdir: $!";
if ($log_suffix ne '') {
my $log = $worker_test;
$log =~ s/\.t\z/$log_suffix/;
my $skip = '';
if (open my $fh, '<', $log) {
my @not_ok = grep(!/^(?:ok |[ \t]*#)/ms, <$fh>);
my $last = $not_ok[-1] // '';
pop @not_ok if $last =~ /^[0-9]+\.\.[0-9]+$/;
my $pfx = "# $log: ";
print $OLDERR map { $pfx.$_ } @not_ok;
seek($fh, 0, SEEK_SET) or die "seek: $!";
# show unique skip texts and the number of times
# each text was skipped
local $/;
my @sk = (<$fh> =~ m/^ok [0-9]+ (# skip [^\n]+)/mgs);
if (@sk) {
my %nr;
my @err = grep { !$nr{$_}++ } @sk;
print $OLDERR "$pfx$_ ($nr{$_})\n" for @err;
$skip = ' # total skipped: '.scalar(@sk);
}
} else {
print $OLDERR "could not open: $log: $!\n";
}
print $OLDOUT "$status $worker_test$skip\n";
}
}
# Test::Builder or Test2::Hub may call exit() from plan(skip_all => ...)
END { test_status() if (defined($worker_test) && $worker == $$) }
sub run_test ($) {
my ($test) = @_;
syswrite($run_log, "$$ $test\n");
my $log_fh;
if ($log_suffix ne '') {
my $log = $test;
$log =~ s/\.[^\.]+\z/$log_suffix/ or DIE "can't log for $test";
open $log_fh, '>', $log or DIE "open $log: $!";
$log_fh->autoflush(1);
$tb->output($log_fh);
$tb->failure_output($log_fh);
$tb->todo_output($log_fh);
open STDOUT, '>&', $log_fh or DIE "1>$log: $!";
open STDERR, '>&', $log_fh or DIE "2>$log: $!";
}
$worker_test = $test;
run_script([$test]);
test_status();
$worker_test = undef;
push @err, "$test ($?)" if $?;
}
sub UINT_SIZE () { 4 }
# worker processes will SIGUSR1 the producer process when it
# sees EOF on the pipe. On FreeBSD 11.2 and Perl 5.30.0,
# sys/ioctl.ph gives the wrong value for FIONREAD().
my $producer = $$;
my $eof; # we stop respawning if true
my $start_worker = sub {
my ($j, $rd, $wr, $todo) = @_;
my $pid = fork // DIE "fork: $!";
if ($pid == 0) {
close $wr if $wr;
$SIG{USR1} = undef; # undo parent $SIG{USR1}
$worker = $$;
while (1) {
my $r = sysread($rd, my $buf, UINT_SIZE);
if (!defined($r)) {
next if $! == EINTR;
DIE "sysread: $!";
}
last if $r == 0;
DIE "short read $r" if $r != UINT_SIZE;
my $t = unpack('I', $buf);
run_test($todo->[$t]);
$tb->reset;
}
kill 'USR1', $producer if !$eof; # sets $eof in $producer
if (@err) { # write to run_log for $sigchld handler
syswrite($run_log, "$$ @err\n");
DIE join('', map { "E: $_\n" } @err);
}
exit(0);
} else {
$pids{$pid} = $j;
}
};
# negative $repeat means loop forever:
for (my $i = $repeat; $i != 0; $i--) {
my @todo = $shuffle ? List::Util::shuffle(@tests) : @tests;
# single-producer, multi-consumer queue relying on POSIX pipe semantics
# POSIX.1-2008 stipulates a regular file should work, but Linux <3.14
# had broken read(2) semantics according to the read(2) manpage
pipe(my ($rd, $wr)) or DIE "pipe: $!";
# fill the queue before forking so children can start earlier
my $n = (POSIX::PIPE_BUF / UINT_SIZE);
if ($n >= $#todo) {
print $wr join('', map { pack('I', $_) } (0..$#todo)) or DIE;
undef $wr;
} else { # write what we can...
$wr->autoflush(1);
print $wr join('', map { pack('I', $_) } (0..$n)) or DIE;
$n += 1; # and send more ($n..$#todo), later
}
$eof = undef;
local $SIG{USR1} = sub { $eof = 1 };
my $sigchld = sub {
my ($sig) = @_;
my $flags = $sig ? WNOHANG : 0;
while (1) {
my $pid = waitpid(-1, $flags) or return;
return if $pid < 0;
my $j = delete $pids{$pid};
if (!defined($j)) {
push @err, "reaped unknown $pid ($?)";
next;
}
if ($?) {
seek($run_log, 0, SEEK_SET);
chomp(my @t = grep(/^$pid /, <$run_log>));
$t[0] //= "$pid unknown";
push @err, "job[$j] ($?) PID=$t[-1]";
}
# skip_all can exit(0), respawn if needed:
if (!$eof) {
print $OLDERR "# respawning job[$j]\n";
$start_worker->($j, $rd, $wr, \@todo);
}
}
};
# start the workers to consume the queue
for (my $j = 0; $j < $jobs; $j++) {
$start_worker->($j, $rd, $wr, \@todo);
}
if ($wr) {
local $SIG{CHLD} = $sigchld;
# too many tests to fit in the pipe before starting workers,
# send the rest now the workers are running
print $wr join('', map { pack('I', $_) } ($n..$#todo)) or DIE;
undef $wr;
}
$sigchld->(0) while scalar(keys(%pids));
DIE join('', map { "E: $_\n" } @err) if @err;
}
print $OLDOUT "1..".($repeat * scalar(@tests))."\n" if $repeat >= 0;
if ($lei_env && $$ == $owner_pid) {
my $opt = { 1 => $OLDOUT, 2 => $OLDERR };
my $cur_daemon_pid;
run_script([qw(lei daemon-pid)], $lei_env, { 1 => \$cur_daemon_pid });
run_script([qw(lei daemon-kill)], $lei_env, $opt);
DIE "lei daemon restarted\n" if $cur_daemon_pid != $lei_daemon_pid;
}