From: Eric Wong Date: Wed, 10 Jun 2020 07:04:19 +0000 (+0000) Subject: imap: speed up HEADER.FIELDS[.NOT] range fetches X-Git-Tag: v1.6.0~445 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=05978869826c50252d49a7977470ee3107eb2d16 imap: speed up HEADER.FIELDS[.NOT] range fetches While we can't memoize the regexp forever like we do with other Eml users, we can still benefit from caching regexp compilation on a per-request basis. A FETCH request from mutt on a 4K message inbox is around 8% faster after this. Since regexp compilation via qr// isn't unbearably slow, a shared cache probably isn't worth the trouble of implementing. A per-request cache seems enough. --- diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm index 0852ffab..39667199 100644 --- a/lib/PublicInbox/IMAP.pm +++ b/lib/PublicInbox/IMAP.pm @@ -544,25 +544,23 @@ sub hdrs_regexp ($) { # BODY[($SECTION_IDX.)?HEADER.FIELDS.NOT ($HDRS)]<$offset.$bytes> sub partial_hdr_not { - my ($eml, $section_idx, $hdrs) = @_; + my ($eml, $section_idx, $hdrs_re) = @_; if (defined $section_idx) { $eml = eml_body_idx($eml, $section_idx) or return; } my $str = $eml->header_obj->as_string; - my $re = hdrs_regexp($hdrs); - $str =~ s/$re//g; + $str =~ s/$hdrs_re//g; $str .= "\r\n"; } # BODY[($SECTION_IDX.)?HEADER.FIELDS ($HDRS)]<$offset.$bytes> sub partial_hdr_get { - my ($eml, $section_idx, $hdrs) = @_; + my ($eml, $section_idx, $hdrs_re) = @_; if (defined $section_idx) { $eml = eml_body_idx($eml, $section_idx) or return; } my $str = $eml->header_obj->as_string; - my $re = hdrs_regexp($hdrs); - join('', ($str =~ m/($re)/g), "\r\n"); + join('', ($str =~ m/($hdrs_re)/g), "\r\n"); } sub partial_prepare ($$$) { @@ -583,9 +581,10 @@ sub partial_prepare ($$$) { (?:HEADER\.FIELDS(\.NOT)?)\x20 # 2 \(([A-Z0-9\-\x20]+)\) # 3 - hdrs \](?:<([0-9]+)(?:\.([0-9]+))?>)?\z/sx) { # 4 5 - $partial->{$att} = [ $2 ? \&partial_hdr_not - : \&partial_hdr_get, - $1, $3, $4, $5 ]; + my $tmp = $partial->{$att} = [ $2 ? \&partial_hdr_not + : \&partial_hdr_get, + $1, undef, $4, $5 ]; + $tmp->[2] = hdrs_regexp($3); } else { undef; } diff --git a/t/imap.t b/t/imap.t index fe6352b6..451b6596 100644 --- a/t/imap.t +++ b/t/imap.t @@ -46,17 +46,21 @@ use PublicInbox::IMAPD; my $partial_body = \&PublicInbox::IMAP::partial_body; my $partial_hdr_get = \&PublicInbox::IMAP::partial_hdr_get; my $partial_hdr_not = \&PublicInbox::IMAP::partial_hdr_not; + my $hdrs_regexp = \&PublicInbox::IMAP::hdrs_regexp; is_deeply($x, { 'BODY[9]' => [ $partial_body, 9, undef, undef, undef ], 'BODY[9]<5>' => [ $partial_body, 9, undef, 5, undef ], 'BODY[9]<5.1>' => [ $partial_body, 9, undef, 5, 1 ], 'BODY[1.1]' => [ $partial_body, '1.1', undef, undef, undef ], 'BODY[HEADER.FIELDS (DATE FROM)]' => [ $partial_hdr_get, - undef, 'DATE FROM', undef, undef ], + undef, $hdrs_regexp->('DATE FROM'), + undef, undef ], 'BODY[HEADER.FIELDS.NOT (TO)]' => [ $partial_hdr_not, - undef, 'TO', undef, undef ], + undef, $hdrs_regexp->('TO'), + undef, undef ], 'BODY[1.1.HEADER.FIELDS (TO)]' => [ $partial_hdr_get, - '1.1', 'TO', undef, undef ], + '1.1', $hdrs_regexp->('TO'), + undef, undef ], }, 'structure matches expected'); }