#!/usr/bin/perl -w
-# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Parallel WWW checker
my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n";
use POSIX qw(:sys_wait_h);
use Time::HiRes qw(gettimeofday tv_interval);
use WWW::Mechanize;
+use Data::Dumper;
my $nproc = 4;
my $slow = 0.5;
my %opts = (
warn "W: ".$r->code . " $u\n"
}
- # check bad links
- my @at = grep(/@/, @links);
- print "BAD: $u ", join("\n", @at), "\n" if @at;
-
my $s;
# blocking
foreach my $l (@links, "DONE\t$u") {
my $n = length($l);
die "$$ send truncated $s < $n\n" if $s != $n;
}
+
+ # make sure the HTML source doesn't screw up terminals
+ # when people curl the source (not remotely an expert
+ # on languages or encodings, here).
+ next if $r->header('Content-Type') !~ m!\btext/html\b!;
+ my $dc = $r->decoded_content;
+ if ($dc =~ /([\x00-\x08\x0d-\x1f\x7f-\x{99999999}]+)/s) {
+ my $o = $1;
+ my $c = Dumper($o);
+ warn "bad: $u $c\n";
+ }
}
}