}
} elsif (m!\Ainclude=(.+)\z!) {
parse_cgitrc($self, $1, $nesting + 1);
- } elsif (m!\A(scan-hidden-path|remove-suffix)=(\d+)\z!) {
+ } elsif (m!\A(scan-hidden-path|remove-suffix)=([0-9]+)\z!) {
my ($k, $v) = ($1, $2);
$k =~ tr/-/_/;
$self->{"-cgit_$k"} = $v;
my $hex = '[a-f0-9]';
my $addmsg = qr!^:000000 100644 \S+ (\S+) A\t${hex}{2}/${hex}{38}$!;
my $delmsg = qr!^:100644 000000 (\S+) \S+ D\t(${hex}{2}/${hex}{38})$!;
- my $refhex = qr/(?:HEAD|${hex}{4,40})(?:~\d+)?/;
+ my $refhex = qr/(?:HEAD|${hex}{4,40})(?:~[0-9]+)?/;
# revision ranges may be specified
my $range = 'HEAD';
my @v = $hdr->header_raw('X-Mail-Count');
my $n;
foreach (@v) {
- /\A\s*(\d+)\s*\z/ or next;
+ /\A\s*([0-9]+)\s*\z/ or next;
$n = $1;
last;
}
# Documentation/technical/http-protocol.txt in git.git
# requires one and exactly one query parameter:
- if ($env->{QUERY_STRING} =~ /\Aservice=git-\w+-pack\z/ ||
- $path =~ /\Agit-\w+-pack\z/) {
+ if ($env->{QUERY_STRING} =~ /\Aservice=git-[A-Za-z0-9_]+-pack\z/ ||
+ $path =~ /\Agit-[A-Za-z0-9_]+-pack\z/) {
my $ok = serve_smart($env, $git, $path);
return $ok if $ok;
}
my $len = $size;
my $code = 200;
push @$h, 'Content-Type', $type;
- if (($env->{HTTP_RANGE} || '') =~ /\bbytes=(\d*)-(\d*)\z/) {
+ if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) {
($code, $len) = prepare_range($env, $in, $h, $1, $2, $size);
if ($code == 416) {
push @$h, 'Content-Range', "bytes */$size";
foreach my $l (split(/\r?\n/, $h)) {
my ($k, $v) = split(/:\s*/, $l, 2);
if ($k =~ /\AStatus\z/i) {
- ($code) = ($v =~ /\b(\d+)\b/);
+ ($code) = ($v =~ /\b([0-9]+)\b/);
} else {
push @h, $k, $v;
}
$env->{REMOTE_ADDR} = $self->{remote_addr};
$env->{REMOTE_PORT} = $self->{remote_port};
if (my $host = $env->{HTTP_HOST}) {
- $host =~ s/:(\d+)\z// and $env->{SERVER_PORT} = $1;
+ $host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1;
$env->{SERVER_NAME} = $host;
}
if (defined $input) {
to_attr from_attr/;
my $enc_ascii = find_encoding('us-ascii');
+# safe-ish acceptable filename pattern for portability
+our $FN = '[a-zA-Z0-9][a-zA-Z0-9_\-\.]+[a-zA-Z0-9]'; # needs \z anchor
+
sub new {
my ($class, $raw, $href) = @_;
my $val = $opts->{$field};
if (defined $val) {
$val = $val->[-1] if ref($val) eq 'ARRAY';
- $val = undef if $val !~ /\A\d+\z/;
+ $val = undef if $val !~ /\A[0-9]+\z/;
}
$opts->{$field} = $val || $default;
}
my $mkey = $pfx.'max';
my $val = $self->{$mkey} or return;
my $lim;
- if ($val =~ /\A\d+\z/) {
+ if ($val =~ /\A[0-9]+\z/) {
require PublicInbox::Qspawn;
$lim = PublicInbox::Qspawn::Limiter->new($val);
} elsif ($val =~ /\A[a-z][a-z0-9]*\z/) {
if (opendir my $dh, $gits) {
my $max = -1;
while (defined(my $git_dir = readdir($dh))) {
- $git_dir =~ m!\A(\d+)\.git\z! or next;
+ $git_dir =~ m!\A([0-9]+)\.git\z! or next;
$max = $1 if $1 > $max;
}
$part = $self->{-max_git_part} = $max if $max >= 0;
use strict;
use warnings;
use Digest::SHA qw/sha1_hex/;
+use PublicInbox::Hval qw(ascii_html);
my $SALT = rand;
my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
$end = ')';
}
+ $url = ascii_html($url); # for IDN
+
# salt this, as this could be exploited to show
# links in the HTML which don't show up in the raw mail.
my $key = sha1_hex($url . $SALT);
- # only escape ampersands, others do not match LINK_RE
- $url =~ s/&/&/g;
$_[0]->{$key} = $url;
$beg . 'PI-LINK-'. $key . $end;
^ge;
$mid;
}
-# this is idempotent
+# this is idempotent, used for HTML anchor/ids and such
sub id_compress {
my ($id, $force) = @_;
- if ($force || $id =~ /[^\w\-]/ || length($id) > MID_MAX) {
+ if ($force || $id =~ /[^a-zA-Z0-9_\-]/ || length($id) > MID_MAX) {
utf8::encode($id);
return sha1_hex($id);
}
my @recvd = $hdr->header_raw('Received');
my ($ts);
foreach my $r (@recvd) {
- $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+
- \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next;
+ $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+
+ [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+)
+ \s+([\+\-][0-9]+))/sx or next;
$ts = eval { str2date_zone($1) } and return $ts;
my $mid = $hdr->header_raw('Message-ID');
warn "no date in $mid Received: $r\n";
my ($ts);
foreach my $d (@date) {
# Y2K problems: 3-digit years
- $d =~ s!([A-Za-z]{3}) (\d{3}) (\d\d:\d\d:\d\d)!
+ $d =~ s!([A-Za-z]{3}) ([0-9]{3}) ([0-9]{2}:[0-9]{2}:[0-9]{2})!
my $yyyy = $2 + 1900; "$1 $yyyy $3"!e;
$ts = eval { str2date_zone($d) } and return $ts;
if ($@) {
# returns 1 if we can continue, 0 if not due to buffered writes or disconnect
sub process_line ($$) {
my ($self, $l) = @_;
- my ($req, @args) = split(/\s+/, $l);
+ my ($req, @args) = split(/[ \t]/, $l);
return 1 unless defined($req); # skip blank line
$req = lc($req);
$req = eval {
# clobber some
my $xref = xref($self, $ng, $n, $mid);
$hdr->header_set('Xref', $xref);
- $xref =~ s/:\d+//g;
+ $xref =~ s/:[0-9]+//g;
$hdr->header_set('Newsgroups', (split(/ /, $xref, 2))[1]);
header_append($hdr, 'List-Post', "<mailto:$ng->{-primary_address}>");
if (my $url = $ng->base_url) {
my ($n, $mid);
my $err;
if (defined $art) {
- if ($art =~ /\A\d+\z/o) {
+ if ($art =~ /\A[0-9]+\z/) {
$err = '423 no such article number in this group';
$n = int($art);
goto find_mid;
sub set_art {
my ($self, $art) = @_;
- $self->{article} = $art if defined $art && $art =~ /\A\d+\z/;
+ $self->{article} = $art if defined $art && $art =~ /\A[0-9]+\z/;
}
sub _header ($) {
defined $range or return '420 No article(s) selected';
my ($beg, $end);
my ($min, $max) = $ng->mm->minmax;
- if ($range =~ /\A(\d+)\z/) {
+ if ($range =~ /\A([0-9]+)\z/) {
$beg = $end = $1;
- } elsif ($range =~ /\A(\d+)-\z/) {
+ } elsif ($range =~ /\A([0-9]+)-\z/) {
($beg, $end) = ($1, $max);
- } elsif ($range =~ /\A(\d+)-(\d+)\z/) {
+ } elsif ($range =~ /\A([0-9]+)-([0-9]+)\z/) {
($beg, $end) = ($1, $2);
} else {
return r501;
$self->{rbuf} .= $$buf;
}
my $r = 1;
- while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]*)\r?\n//) {
+ while ($r > 0 && $self->{rbuf} =~ s/\A[ \t\r\n]*([^\r\n]*)\r?\n//) {
my $line = $1;
return $self->close if $line =~ /[[:cntrl:]]/s;
my $t0 = now();
if (my $ibx = $pi_config->lookup_newsgroup($ng)) {
my $url = PublicInbox::Hval::prurl($env, $ibx->{url});
my $code = 301;
- if (defined $article && $article =~ /\A\d+\z/) {
+ if (defined $article && $article =~ /\A[0-9]+\z/) {
my $mid = eval { $ibx->mm->mid_for($article) };
if (defined $mid) {
# article IDs are not stable across clones,
my $qpf = \($self->{qp_flags} ||= $QP_FLAGS);
if ($self->{version} >= 2) {
foreach my $part (<$dir/*>) {
- -d $part && $part =~ m!/\d+\z! or next;
+ -d $part && $part =~ m!/[0-9]+\z! or next;
my $sub = Search::Xapian::Database->new($part);
if ($xdb) {
$xdb->add_database($sub);
my ($class, $qp) = @_;
my $r = $qp->{r};
- my ($l) = (($qp->{l} || '') =~ /(\d+)/);
+ my ($l) = (($qp->{l} || '') =~ /([0-9]+)/);
$l = $LIM if !$l || $l > $LIM;
bless {
q => $qp->{'q'},
x => $qp->{x} || '',
- o => (($qp->{o} || '0') =~ /(\d+)/),
+ o => (($qp->{o} || '0') =~ /([0-9]+)/),
l => $l,
r => (defined $r && $r ne '0'),
}, $class;
}
my $msgs = $srch->query($q, { relevance => 1 });
- my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: (\d+))?/;
+ my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: ([0-9]+))?/;
my $di;
foreach my $smsg (@$msgs) {
# due to -compact
if (-d $xpfx) {
foreach my $part (<$xpfx/*>) {
- -d $part && $part =~ m!/\d+\z! or next;
+ -d $part && $part =~ m!/[0-9]+\z! or next;
eval {
Search::Xapian::Database->new($part)->close;
$nparts++;
my $latest;
opendir my $dh, $pfx or die "opendir $pfx: $!\n";
while (defined(my $git_dir = readdir($dh))) {
- $git_dir =~ m!\A(\d+)\.git\z! or next;
+ $git_dir =~ m!\A([0-9]+)\.git\z! or next;
if ($1 > $$max) {
$$max = $1;
$latest = "$pfx/$git_dir";
$desc = $fn unless defined $desc;
$desc = '' unless defined $desc;
my $sfn;
- if (defined $fn && $fn =~ /\A[[:alnum:]][\w\.-]+[[:alnum:]]\z/) {
+ if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) {
$sfn = $fn;
} elsif ($ct eq 'text/plain') {
$sfn = 'a.txt';
# Xapian uses '..' but '-' is perhaps friendier to URL linkifiers
# if only $after exists "YYYYMMDD.." because "." could be skipped
# if interpreted as an end-of-sentence
- $t =~ s/\A(\d{8,14})-// and $after = str2ts($1);
- $t =~ /\A(\d{8,14})\z/ and $before = str2ts($1);
+ $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1);
+ $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1);
my $ibx = $ctx->{-inbox};
my $msgs = $ibx->recent($opts, $after, $before);
(defined($spfx) && defined($oid_a) && defined($oid_b)) or
return "@@ $ca $cb @@";
- my ($n) = ($ca =~ /^-(\d+)/);
+ my ($n) = ($ca =~ /^-([0-9]+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
my $rv = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
- ($n) = ($cb =~ /^\+(\d+)/);
+ ($n) = ($cb =~ /^\+([0-9]+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
$rv .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
our $MID_RE = qr!([^/]+)!;
our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
-our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i;
+our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
our $OID_RE = qr![a-f0-9]{7,40}!;
sub new {
my $method = $env->{REQUEST_METHOD};
if ($method eq 'POST') {
- if ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?(git-upload-pack)\z!) {
+ if ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)?
+ (git-upload-pack)\z!x) {
my ($part, $path) = ($2, $3);
return invalid_inbox($ctx, $1) ||
serve_git($ctx, $part, $path);
invalid_inbox($ctx, $1) || get_atom($ctx);
} elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
invalid_inbox($ctx, $1) || get_new($ctx);
- } elsif ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?
+ } elsif ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)?
($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
my ($part, $path) = ($2, $3);
invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path);
- } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) {
+ } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-]+).mbox\.gz\z!o) {
serve_mbox_range($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) {
msg_page($ctx, $1, $2, $3);
r301($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
get_text($ctx, $1, $2);
- } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) {
+ } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-\.]+)\.css\z!o) {
get_css($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
get_vcs_object($ctx, $1, $2);
- } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) {
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/
+ ($PublicInbox::Hval::FN)\z!ox) {
get_vcs_object($ctx, $1, $2, $3);
} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
r301($ctx, $1, $2, 's/');
$inline_ok = 0;
} else {
my $fn = $_;
+ my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
+ if ($key !~ /\A[a-zA-Z0-9_\-\.]+\z/) {
+ warn "ignoring $fn, non-ASCII word character\n";
+ next;
+ }
open(my $fh, '<', $fn) or do {
warn "failed to open $fn: $!\n";
next;
};
- my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
my $ctime = 0;
my $local = do { local $/; <$fh> };
if ($local =~ /\S/) {
if ($ct && (($ct->{discrete} || '') eq 'text')) {
# display all text as text/plain:
my $cset = $ct->{attributes}->{charset};
- if ($cset && ($cset =~ /\A[\w-]+\z/)) {
+ if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) {
$res->[1]->[1] .= qq(; charset=$cset);
}
} else { # TODO: allow user to configure safe types
my ($self, $env) = @_;
my @list;
my $host = $env->{HTTP_HOST} // $env->{SERVER_NAME};
- $host =~ s/:\d+\z//;
- my $re = qr!\A(?:https?:)?//\Q$host\E(?::\d+)?/!i;
+ $host =~ s/:[0-9]+\z//;
+ my $re = qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i;
$self->{pi_config}->each_inbox(sub {
my ($ibx) = @_;
push @list, $ibx if !$ibx->{-hide}->{www} && $ibx->{url} =~ $re;
} else {
opendir my $dh, $old or die "Failed to opendir $old: $!\n";
while (defined(my $dn = readdir($dh))) {
- if ($dn =~ /\A\d+\z/) {
+ if ($dn =~ /\A[0-9]+\z/) {
my $tmpl = "$dn-XXXXXXXX";
my $dst = tempdir($tmpl, DIR => $old);
same_fs_or_die($old, $dst);
my @p = split('/', $_[0]);
# return "xap15/0" for v2, or "xapian15" for v1:
- ($p[-1] =~ /\A\d+\z/) ? "$p[-2]/$p[-1]" : $p[-1];
+ ($p[-1] =~ /\A[0-9]+\z/) ? "$p[-2]/$p[-1]" : $p[-1];
}
# xapian-compact wrapper
$dst->set_metadata('last_commit', $lc) if $lc;
# only the first xapian partition (0) gets 'indexlevel'
- if ($old =~ m!(?:xapian\d+|xap\d+/0)\z!) {
+ if ($old =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\z!) {
my $l = $src->get_metadata('indexlevel');
if ($l eq 'medium') {
$dst->set_metadata('indexlevel', $l);
my $xdir_ro = $ibx->{search}->xdir(1);
my $npart = 0;
foreach my $part (<$xdir_ro/*>) {
- if (-d $part && $part =~ m!/\d+\z!) {
+ if (-d $part && $part =~ m!/[0-9]+\z!) {
my $bytes = 0;
$bytes += -s $_ foreach glob("$part/*");
$npart++ if $bytes;
'punctuation with unpaired ) OK')
}
+if ('IDN example: <ACDB98F4-178C-43C3-99C4-A1D03DD6A8F5@sb.org>') {
+ my $hc = '月';
+ my $u = "http://www.\x{6708}.example.com/";
+ my $s = $u;
+ my $l = PublicInbox::Linkify->new;
+ $s = $l->linkify_1($s);
+ $s = $l->linkify_2($s);
+ my $expect = qq{<a
+href="http://www.$hc.example.com/">http://www.$hc.example.com/</a>};
+ is($s, $expect, 'IDN message escaped properly');
+}
+
done_testing();