From: Eric Wong Date: Sat, 9 May 2020 08:27:37 +0000 (+0000) Subject: eml: speed up common LF-only emails X-Git-Tag: v1.5.0~6 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=e07a58aa1a1118b2aeb75e674c3542ae5d93b2c9 eml: speed up common LF-only emails Emails a *nix MTA are typically LF-only, so we don't need the complexity of the RE engine when a simple index() works. We still need to ensure there's no "\r\n\r\n" before the first "\n\n", but two calls to index() is still faster than a RE match. This gives a 2-5% speedup in some informal tests and saves ~30MB when scanning a 30MB spam message on newer versions of Perl. I'll have to diagnose why Perl wastes so much memory doing RE matches on giant strings, though. --- diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index 80e7c1af..f022516c 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -71,10 +71,18 @@ sub re_memo ($) { # compatible with our uses of Email::MIME sub new { my $ref = ref($_[1]) ? $_[1] : \(my $cpy = $_[1]); - if ($$ref =~ /\r?\n(\r?\n)/s) { # likely - # This can modify $$ref in-place and to avoid memcpy/memmove - # on a potentially large $$ref. It does need to make a - # copy for $hdr, though. Idea stolen from Email::Simple + # substr() can modify the first arg in-place and to avoid + # memcpy/memmove on a potentially large scalar. It does need + # to make a copy for $hdr, though. Idea stolen from Email::Simple. + + # We also prefer index() on common LFLF emails since it's faster + # and re scan can bump RSS by length($$ref) on big strings + if (index($$ref, "\r\n") < 0 && (my $pos = index($$ref, "\n\n")) >= 0) { + # likely on *nix + my $hdr = substr($$ref, 0, $pos + 2, ''); # sv_chop on $$ref + chop($hdr); # lower SvCUR + bless { hdr => \$hdr, crlf => "\n", bdy => $ref }, __PACKAGE__; + } elsif ($$ref =~ /\r?\n(\r?\n)/s) { my $hdr = substr($$ref, 0, $+[0], ''); # sv_chop on $$ref substr($hdr, -(length($1))) = ''; # lower SvCUR bless { hdr => \$hdr, crlf => $1, bdy => $ref }, __PACKAGE__;