#!/usr/bin/perl -w
-# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Parallel WWW checker
my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n";
2 => fileno($err_fh),
};
my $pid = spawn($cmd, undef, $rdr);
- defined $pid or die "spawn failure: $!";
while (waitpid($pid, 0) != $pid) {
next if $!{EINTR};
warn "waitpid(xmlstarlet, $pid) $!";
$SIG{CHLD} = 'DEFAULT';
my $m = WWW::Mechanize->new(autocheck => 0);
my $cc = LWP::ConnCache->new;
+ $m->stack_depth(0); # no history
$m->conn_cache($cc);
while (1) {
$todo_rd->recv(my $u, 65535, 0);
my $s;
# blocking
foreach my $l (@links, "DONE\t$u") {
- next if $l eq '';
+ next if $l eq '' || $l =~ /\.mbox(?:\.gz)\z/;
do {
$s = $done_wr->send($l, MSG_EOR);
} while (!defined $s && $!{EINTR});