From fec19e492eacb10f990091592f423542ab4249bd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 5 Apr 2020 07:53:47 +0000 Subject: [PATCH] release large (non ref) scalars using `undef $sv' Using `undef EXPR' like a function call actually frees the heap memory associated with the scalar, whereas `$sv = undef' or `$sv = ""' will hold the buffer around until $sv goes out of scope. The `sv_set_undef' documentation in the perlapi(1) manpage explicitly states this: The perl equivalent is "$sv = undef;". Note that it doesn't free any string buffer, unlike "undef $sv". And I've confirmed by reading Dump() output from Devel::Peek. We'll also inline the old index_body sub in SearchIdx.pm to make the scope of the scalar more obvious. This change saves several hundred kB RSS on both -index and -httpd when hitting large emails with thousands of lines. --- lib/PublicInbox/SearchIdx.pm | 33 ++++++++++++++++----------------- lib/PublicInbox/View.pm | 4 ++-- lib/PublicInbox/ViewDiff.pm | 2 +- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 89d8bc2b..9a5484e3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -275,22 +275,8 @@ sub index_diff ($$$) { index_text($self, join("\n", @xnq), 1, 'XNQ'); } -sub index_body ($$$) { - my ($self, $txt, $doc) = @_; - if ($doc) { - # does it look like a diff? - if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { - index_diff($self, $txt, $doc); - } else { - index_text($self, $txt, 1, 'XNQ'); - } - } else { - index_text($self, $txt, 0, 'XQUOT'); - } -} - sub index_xapian { # msg_iter callback - my ($part, $depth, @idx) = @{$_[0]}; + my $part = $_[0]->[0]; # ignore $depth and @idx my ($self, $doc) = @{$_[1]}; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; @@ -300,11 +286,24 @@ sub index_xapian { # msg_iter callback my ($s, undef) = msg_part_text($part, $ct); defined $s or return; + $_[0]->[0] = $part = undef; # free memory # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); - $part = $s = undef; - index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; + undef $s; # free memory + for my $txt (@sections) { + if ($txt =~ /\A>/) { + index_text($self, $txt, 0, 'XQUOT'); + } else { + # does it look like a diff? + if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { + index_diff($self, $txt, $doc); + } else { + index_text($self, $txt, 1, 'XNQ'); + } + } + undef $txt; # free memory + } } sub add_xapian ($$$$) { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 1e53d8dc..ddd94e48 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -566,7 +566,7 @@ sub add_text_body { # callback for msg_iter # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); - $s = ''; + undef $s; # free memory my $rv = $ctx->{obuf}; if (defined($fn) || $depth > 0 || $err) { # badly-encoded message with $err? tell the world about it! @@ -587,7 +587,7 @@ sub add_text_body { # callback for msg_iter # regular lines, OK $$rv .= $l->to_html($cur); } - $cur = undef; + undef $cur; # free memory } obfuscate_addrs($ibx, $$rv) if $ibx->{obfuscate}; diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index f7422712..3d6058a9 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -192,7 +192,7 @@ sub flush_diff ($$) { my ($ctx, $cur) = @_; my @top = split($EXTRACT_DIFFS, $$cur); - $$cur = undef; + undef $$cur; # free memory my $linkify = $ctx->{-linkify}; my $dst = $ctx->{obuf}; -- 2.44.0