lib/PublicInbox/Linkify.pm | 3 ++- lib/PublicInbox/MID.pm | 12 +++++++----- lib/PublicInbox/NNTP.pm | 16 ++++++++-------- lib/PublicInbox/SearchThread.pm | 3 ++- lib/PublicInbox/View.pm | 5 +++-- scripts/ssoma-replay | 2 +- diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 2bd8f64a142b1152ea1acf96f989ffa4dbd3c621..b85bedfed9aa6f3bfdf2102e2311b52c573b7786 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -14,6 +14,7 @@ use strict; use warnings; use Digest::SHA qw/sha1_hex/; use PublicInbox::Hval qw(ascii_html mid_href); +use PublicInbox::MID qw($MID_EXTRACT); my $SALT = rand; my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher):// @@ -93,7 +94,7 @@ # single pass linkification of within $str # with $pfx being the URL prefix sub linkify_mids { my ($self, $pfx, $str, $raw) = @_; - $$str =~ s!<([^>]+)>! + $$str =~ s!$MID_EXTRACT! my $mid = $1; my $html = ascii_html($mid); my $href = mid_href($mid); diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index d2bbaec71e9ef8b86b35d5f6e54e27da78b9d6fb..dddde092f2581ea1d54b1749804642dbccf89a48 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -6,8 +6,8 @@ package PublicInbox::MID; use strict; use warnings; use base qw/Exporter/; -our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC - mids references mids_for_index/; +our @EXPORT_OK = qw(mid_clean id_compress mid2path mid_mime mid_escape MID_ESC + mids references mids_for_index $MID_EXTRACT); use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; require PublicInbox::Address; @@ -15,12 +15,14 @@ use constant { MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q') }; + +our $MID_EXTRACT = qr/<([^>]+)>/s; sub mid_clean { my ($mid) = @_; defined($mid) or die "no Message-ID"; # MDA->precheck did more checking for us - if ($mid =~ /<([^>]+)>/) { + if ($mid =~ $MID_EXTRACT) { $mid = $1; } $mid; @@ -58,7 +60,7 @@ # only intended for Message-ID and X-Alt-Message-ID sub extract_mids { my @mids; for my $v (@_) { - my @cur = ($v =~ /<([^>]+)>/sg); + my @cur = ($v =~ /$MID_EXTRACT/g); if (@cur) { push(@mids, @cur); } else { @@ -92,7 +94,7 @@ my @mids; foreach my $f (qw(References In-Reply-To)) { my @v = $hdr->header_raw($f); foreach my $v (@v) { - push(@mids, ($v =~ /<([^>]+)>/sg)); + push(@mids, ($v =~ /$MID_EXTRACT/g)); } } diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 277657e602cb3feba30310d715608bf771747379..39e2f88e5135d4e9bb08ed65e1a54b96a722c6f2 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -7,7 +7,7 @@ use strict; use warnings; use base qw(PublicInbox::DS); use fields qw(nntpd article ng long_cb); -use PublicInbox::MID qw(mid_escape); +use PublicInbox::MID qw(mid_escape $MID_EXTRACT); use Email::Simple; use POSIX qw(strftime); use PublicInbox::DS qw(now); @@ -24,7 +24,7 @@ r430 => '430 No article with that message-id', }; use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); use Errno qw(EAGAIN); - +my $ONE_MSGID = qr/\A$MID_EXTRACT\z/; my @OVERVIEW = qw(Subject From Date Message-ID References); my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') . "Xref:full\r\n"; @@ -450,7 +450,7 @@ if ($art =~ /\A[0-9]+\z/) { $err = '423 no such article number in this group'; $n = int($art); goto find_mid; - } elsif ($art =~ /\A<([^>]+)>\z/) { + } elsif ($art =~ $ONE_MSGID) { $mid = $1; $err = r430; $n = $ng->mm->num_for($mid) if $ng; @@ -653,7 +653,7 @@ sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull. my ($self, $xhdr, $range) = @_; - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID + if (defined $range && $range =~ $ONE_MSGID) { my ($ng, $n) = mid_lookup($self, $1); return r430 unless $n; hdr_mid_response($self, $xhdr, $ng, $n, $range, $range); @@ -696,7 +696,7 @@ sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin my ($self, $xhdr, $range) = @_; - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID + if (defined $range && $range =~ $ONE_MSGID) { my $mid = $1; my ($ng, $n) = mid_lookup($self, $mid); return r430 unless $n; @@ -734,7 +734,7 @@ } sub hdr_smsg ($$$$) { my ($self, $xhdr, $field, $range) = @_; - if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID + if (defined $range && $range =~ $ONE_MSGID) { my ($ng, $n) = mid_lookup($self, $1); return r430 unless defined $n; my $v = over_header_for($ng->over, $n, $field); @@ -843,7 +843,7 @@ } sub cmd_over ($;$) { my ($self, $range) = @_; - if ($range && $range =~ /\A<(.+)>\z/) { + if ($range && $range =~ $ONE_MSGID) { my ($ng, $n) = mid_lookup($self, $1); defined $n or return r430; my $smsg = $ng->over->get_art($n) or return r430; @@ -911,7 +911,7 @@ sub zflush {} # overridden by NNTPdeflate sub cmd_xpath ($$) { my ($self, $mid) = @_; - return r501 unless $mid =~ /\A<(.+)>\z/; + return r501 unless $mid =~ $ONE_MSGID; $mid = $1; my @paths; foreach my $ng (values %{$self->{nntpd}->{groups}}) { diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index ab2f1a84adfb990a55395deccb8b749763b10336..8b2cb8059db94043affbbf82abbfbea2eddb57ba 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -20,6 +20,7 @@ # - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479 package PublicInbox::SearchThread; use strict; use warnings; +use PublicInbox::MID qw($MID_EXTRACT); sub thread { my ($msgs, $ordersub, $ctx) = @_; @@ -67,7 +68,7 @@ # messages. It is not needed in a perfect world where # everything is perfectly referenced, only the last ref # matters. my $prev; - foreach my $ref ($refs =~ m/<([^>]+)>/g) { + foreach my $ref ($refs =~ m/$MID_EXTRACT/go) { # Find a Container object for the given Message-ID my $cont = _get_cont_for_id($id_table, $ref); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 5baaffaf5deda70395a5078d47c09f9adff4eac7..89174296335ab594cfac100db4d87664d506afc4 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -10,7 +10,8 @@ use bytes (); # only for bytes::length use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl mid_href); use PublicInbox::Linkify; -use PublicInbox::MID qw/id_compress mids mids_for_index references/; +use PublicInbox::MID qw(id_compress mids mids_for_index references + $MID_EXTRACT); use PublicInbox::MsgIter; use PublicInbox::Address; use PublicInbox::WwwStream; @@ -299,7 +300,7 @@ my $siblings; if (my $smsg = $node->{smsg}) { # delete saves about 200KB on a 1K message thread if (my $refs = delete $smsg->{references}) { - ($$irt) = ($refs =~ m/<([^>]+)>\z/); + ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); } } my $irt_map = $mapping->{$$irt} if defined $$irt; diff --git a/scripts/ssoma-replay b/scripts/ssoma-replay index 46b15d7eb185d26d98304424fc33567dcdda2e34..3e928084bb90dcba67712bc3d8129224651ad7da 100755 --- a/scripts/ssoma-replay +++ b/scripts/ssoma-replay @@ -52,7 +52,7 @@ if (defined $domain) { $archive_url = "https://$domain/$user/"; my $mid = $header_obj->header('Message-Id'); - if ($mid =~ /\A<(.+)>\z/) { + if ($mid =~ /<[ \t]*([^>]+)?[ \t]*>/s) { $mid = $1; } $mid = uri_escape_utf8($mid,