]> Sergey Matveev's repositories - public-inbox.git/blobdiff - t/check-www-inbox.perl
tests: deal with the removal of '.' from @INC in newer Perl
[public-inbox.git] / t / check-www-inbox.perl
index 7cfe19328b7266d93200c434cb410833e9c0c97d..4319049c8a1c8e7ec5fae84a9e3349be28c30cba 100644 (file)
@@ -13,6 +13,7 @@ use LWP::ConnCache;
 use POSIX qw(:sys_wait_h);
 use Time::HiRes qw(gettimeofday tv_interval);
 use WWW::Mechanize;
+use Data::Dumper;
 my $nproc = 4;
 my $slow = 0.5;
 my %opts = (
@@ -130,10 +131,6 @@ sub worker_loop {
                        warn "W: ".$r->code . " $u\n"
                }
 
-               # check bad links
-               my @at = grep(/@/, @links);
-               print "BAD: $u ", join("\n", @at), "\n" if @at;
-
                my $s;
                # blocking
                foreach my $l (@links, "DONE\t$u") {
@@ -145,5 +142,16 @@ sub worker_loop {
                        my $n = length($l);
                        die "$$ send truncated $s < $n\n" if $s != $n;
                }
+
+               # make sure the HTML source doesn't screw up terminals
+               # when people curl the source (not remotely an expert
+               # on languages or encodings, here).
+               next if $r->header('Content-Type') !~ m!\btext/html\b!;
+               my $dc = $r->decoded_content;
+               if ($dc =~ /([\x00-\x08\x0d-\x1f\x7f-\x{99999999}]+)/s) {
+                       my $o = $1;
+                       my $c = Dumper($o);
+                       warn "bad: $u $c\n";
+               }
        }
 }