Calling Compress::Raw::Zlib::deflate is fairly expensive.
Relying on the `.=' (concat) operator inside ->zadd operator is
faster, but the method dispatch overhead is noticeable compared
to the original code where we had bare `.=' littered throughout.
Fortunately, `print' and `say' with the PerlIO::scalar IO layer
appears to offer better performance without high method dispatch
overhead. This doesn't allow us to save as much memory as I
originally hoped, but does allow us to rely less on concat
operators in other places and just pass a list of args to
`print' and `say' as a appropriate.
This does reduce scratchpad use, however, allowing for large
memory savings, and we still ->deflate every single $eml.
#!/usr/bin/perl -w
-# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Generates NEWS, NEWS.atom, and NEWS.html files using release emails
# this uses unstable internal APIs of public-inbox, and this script
ibx => $ibx,
-upfx => "$base_url/",
-hr => 1,
+ zfh => $out,
};
if ($dst eq 'NEWS.html') {
html_start($out, $ctx);
http_out($_[0])->write(translate($_[0], $_[1]));
}
-sub zadd {
- my $self = shift;
- $self->{pbuf} .= $_ for @_; # perl internal pad memory use here
+sub zfh {
+ $_[0]->{zfh} // do {
+ open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or
+ die "open: $!";
+ $_[0]->{zfh}
+ };
}
# similar to ->translate; use this when we're sure we know we have
# more data to buffer after this
sub zmore {
- my $self = shift; # $_[1] => input
+ my $self = shift;
+ my $zfh = delete $self->{zfh};
+ if (@_ > 1 || $zfh) {
+ print { $zfh // zfh($self) } @_;
+ @_ = (delete $self->{pbuf});
+ delete $self->{zfh};
+ };
http_out($self);
- my $x;
- defined($x = delete($self->{pbuf})) and unshift(@_, $x);
- for (@_) {
- ($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or
- die "gzip->deflate: $x";
- }
- undef;
+ my $err;
+ ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or
+ die "gzip->deflate: $err";
}
# flushes and returns the final bit of gzipped data
sub zflush ($;@) {
my $self = shift; # $_[1..Inf] => final input (optional)
+ zmore($self, @_) if scalar(@_) || $self->{zfh};
+ # not a bug, recursing on DS->write failure
+ my $gz = delete $self->{gz} // return '';
+ my $err;
my $zbuf = delete $self->{zbuf};
- my $gz = delete $self->{gz};
- my $x;
- defined($x = delete($self->{pbuf})) and unshift(@_, $x);
- for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here:
- ($x = $gz->deflate($_, $zbuf)) == Z_OK or
- die "gzip->deflate: $x";
- }
- $gz // return ''; # not a bug, recursing on DS->write failure
- ($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x";
+ ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err";
$zbuf;
}
my $ibx = $ctx->{ibx};
my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return;
my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
- $ctx->zadd(msg_hdr($ctx, $eml));
+ $ctx->zmore(msg_hdr($ctx, $eml));
if ($n) {
$ctx->translate(msg_body($eml));
} else { # last message
# callback for PublicInbox::WwwStream::getline
sub mset_thread_i {
my ($ctx, $eml) = @_;
- $ctx->zadd($ctx->html_top) if exists $ctx->{-html_tip};
+ print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip};
$eml and return PublicInbox::View::eml_entry($ctx, $eml);
my $smsg = shift @{$ctx->{msgs}} or
- $ctx->zmore(${delete($ctx->{skel})});
+ print { $ctx->zfh } ${delete($ctx->{skel})};
$smsg;
}
"../${\mid_href($smsg->{mid})}/" : '';
if (_msg_page_prepare($eml, $ctx)) {
$eml->each_part(\&add_text_body, $ctx, 1);
- $ctx->zadd('</pre><hr>');
+ print { $ctx->{zfh} } '</pre><hr>';
}
html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
''; # XXX TODO cleanup
PublicInbox::WwwStream::init($ctx);
if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html}
$eml->each_part(\&add_text_body, $ctx, 1);
- $ctx->zadd('</pre><hr>');
+ print { $ctx->{zfh} } '</pre><hr>';
}
html_footer($ctx, $eml);
$ctx->html_done;
my $html = ascii_html($irt);
$rv .= qq(In-Reply-To: <<a\nhref="$href">$html</a>>\n)
}
- $rv .= "\n";
+ say { $ctx->zfh } $rv;
# scan through all parts, looking for displayable text
$ctx->{mhref} = $mhref;
$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
- $ctx->zadd($rv); # XXX $rv is small, reuse below
$eml->each_part(\&add_text_body, $ctx, 1); # expensive
# add the footer
sub thread_eml_entry {
my ($ctx, $eml) = @_;
my ($beg, $end) = thread_adj_level($ctx, $ctx->{level});
- $ctx->zadd($beg.'<pre>');
- eml_entry($ctx, $eml) . '</pre>' . $end;
+ print { $ctx->zfh } $beg, '<pre>';
+ print { $ctx->{zfh} } eml_entry($ctx, $eml), '</pre>', $end;
}
sub next_in_queue ($$) {
if (!$ghost_ok) { # first non-ghost
$ctx->{-title_html} =
ascii_html($smsg->{subject});
- $ctx->zadd($ctx->html_top);
+ print { $ctx->zfh } $ctx->html_top;
}
return $smsg;
}
# buffer the ghost entry and loop
- $ctx->zadd(ghost_index_entry($ctx, $lvl, $smsg));
+ print { $ctx->zfh } ghost_index_entry($ctx, $lvl, $smsg)
} else { # all done
- $ctx->zadd(join('', thread_adj_level($ctx, 0)));
- $ctx->zadd(${delete($ctx->{skel})});
+ print { $ctx->zfh } thread_adj_level($ctx, 0),
+ ${delete($ctx->{skel})};
return;
}
}
my $smsg = $ctx->{smsg};
if (exists $ctx->{-html_tip}) {
$ctx->{-title_html} = ascii_html($smsg->{subject});
- $ctx->zadd($ctx->html_top);
+ print { $ctx->zfh } $ctx->html_top;
}
return eml_entry($ctx, $eml);
} else {
return $smsg if exists($smsg->{blob});
}
my $skel = delete($ctx->{skel}) or return; # all done
- $ctx->zadd($$skel);
+ print { $ctx->zfh } $$skel;
undef;
}
}
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
my ($s, $err) = msg_part_text($part, $ct);
- $s // return $ctx->zadd(attach_link($ctx, $ct, $p, $fn) // '');
- my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : '';
+ my $zfh = $ctx->zfh;
+ $s // return print $zfh (attach_link($ctx, $ct, $p, $fn) // '');
+ say $zfh submsg_hdr($ctx, $part) if $part->{is_submsg};
# makes no difference to browsers, and don't screw up filename
# link generation in diffs with the extra '%0D'
undef $s; # free memory
if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
# badly-encoded message with $err? tell the world about it!
- $buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n";
+ say $zfh attach_link($ctx, $ct, $p, $fn, $err);
}
delete $part->{bdy}; # save memory
- $ctx->zadd($buf);
- undef $buf;
for my $cur (@sections) { # $cur may be huge
if ($cur =~ /\A>/) {
# we use a <span> here to allow users to specify
# their own color for quoted text
- $ctx->zadd(qq(<span\nclass="q">),
- $l->to_html($cur), '</span>');
+ print $zfh qq(<span\nclass="q">),
+ $l->to_html($cur), '</span>';
} elsif ($diff) {
flush_diff($ctx, \$cur);
} else { # regular lines, OK
- $ctx->zadd($l->to_html($cur));
+ print $zfh $l->to_html($cur);
}
undef $cur; # free memory
}
$hbuf .= qq[Message-ID: <$x> (<a href="raw">raw</a>)\n];
}
if (!$nr) { # first (and only) message, common case
- $ctx->zadd($ctx->html_top, $hbuf);
+ print { $ctx->zfh } $ctx->html_top, $hbuf;
} else {
delete $ctx->{-title_html};
- $ctx->zadd($ctx->{-html_tip}, $hbuf);
+ print { $ctx->zfh } $ctx->{-html_tip}, $hbuf;
}
$ctx->{-linkify} //= PublicInbox::Linkify->new;
$hbuf = '';
$hbuf .= "$h: $_\n" for ($eml->header_raw($h));
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML
- $ctx->zadd($hbuf);
+ print { $ctx->{zfh} } $hbuf;
$hbuf = '';
}
my @irt = $eml->header_raw('In-Reply-To');
$hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
- $ctx->zadd($hbuf .= "\n");
+ say { $ctx->{zfh} } $hbuf;
1;
}
sub html_footer {
my ($ctx, $hdr) = @_;
my $upfx = '../';
- my ($related, $skel);
+ my (@related, $skel);
my $foot = '<pre>';
my $qry = delete $ctx->{-qry};
if ($qry && $ctx->{ibx}->isrch) {
$q = wrap('', '', $q);
my $rows = ($q =~ tr/\n/\n/) + 1;
$q = ascii_html($q);
- $related = <<EOM;
+ $related[0] = <<EOM;
<form id=related
action=$upfx
><pre>find likely ancestor, descendant, or conflicting patches for <a
if ($ctx->{ibx}->over) {
my $t = ts2str($ctx->{-t_max});
my $t_fmt = fmt_ts($ctx->{-t_max});
- my $fallback = $related ? "\t" : "<a id=related>\t</a>";
+ my $fallback = @related ? "\t" : "<a id=related>\t</a>";
$skel = <<EOF;
${fallback}other threads:[<a
href="$upfx?t=$t">~$t_fmt UTC</a>|<a
} else { # unindexed inboxes w/o over
$skel = qq( <a\nhref="$upfx">latest</a>);
}
- $foot .= qq(<a\nhref="#R">reply</a>);
# $skel may be big for big threads, don't append it to $foot
- $skel .= '</pre>' . ($related // '');
- $ctx->zadd($foot, $skel .= msg_reply($ctx, $hdr));
+ print { $ctx->zfh } $foot, qq(<a\nhref="#R">reply</a>),
+ $skel, '</pre>', @related,
+ msg_reply($ctx, $hdr);
}
sub ghost_parent {
}
# returns true if diffstat anchor written, false otherwise
-sub anchor0 ($$$$) {
- my ($dst, $ctx, $fn, $rest) = @_;
+sub anchor0 ($$$) {
+ my ($ctx, $fn, $rest) = @_;
my $orig = $fn;
# long filenames will require us to check in anchor1()
push(@{$ctx->{-long_path}}, $fn) if $fn =~ s!\A\.\.\./?!!;
- if (defined(my $attr = to_attr($ctx->{-apfx}.$fn))) {
- $ctx->{-anchors}->{$attr} = 1;
- my $spaces = ($orig =~ s/( +)\z//) ? $1 : '';
- $$dst .= " <a\nid=i$attr\nhref=#$attr>" .
- ascii_html($orig) . '</a>' . $spaces .
+ my $attr = to_attr($ctx->{-apfx}.$fn) // return;
+ $ctx->{-anchors}->{$attr} = 1;
+ my $spaces = ($orig =~ s/( +)\z//) ? $1 : '';
+ print { $ctx->{zfh} } " <a\nid=i$attr\nhref=#$attr>",
+ ascii_html($orig), '</a>', $spaces,
$ctx->{-linkify}->to_html($rest);
- return 1;
- }
- undef;
}
# returns "diff --git" anchor destination, undef otherwise
warn "BUG? <$$x> had no ^index line";
}
$$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems;
- $ctx->zadd(qq(<span\nclass="head">$$x</span>));
+ print { $ctx->{zfh} } qq(<span\nclass="head">), $$x, '</span>';
$dctx;
}
sub diff_before_or_after ($$) {
my ($ctx, $x) = @_;
- if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n"
+ if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n" # \$1
# diffstat lines:
((?:^\x20(?:[^\n]+?)(?:\x20+\|\x20[^\n]*\n))+)
(\x20[0-9]+\x20files?\x20)changed,([^\n]+\n)
(.*?)\z/msx) { # notes, commit message, etc
my @x = ($5, $4, $3, $2, $1);
+ undef $$x;
my $lnk = $ctx->{-linkify};
- $$x = $lnk->to_html(pop @x); # uninteresting prefix
- for my $l (split(/^/m, pop(@x))) { # per-file diffstat lines
+ my $zfh = $ctx->{zfh};
+ print $zfh $lnk->to_html(pop @x); # $1 uninteresting prefix
+ for my $l (split(/^/m, pop(@x))) { # $2 per-file stat lines
$l =~ /^ (.+)( +\| .*\z)/s and
- anchor0($x, $ctx, $1, $2) and next;
- $$x .= $lnk->to_html($l);
+ anchor0($ctx, $1, $2) and next;
+ print $zfh $lnk->to_html($l);
}
- $$x .= pop @x; # $3 /^ \d+ files? /
my $ch = $ctx->{changed_href} // '#related';
- $$x .= qq(<a href="$ch">changed</a>,);
- $$x .= ascii_html(pop @x); # $4: insertions/deletions
- # notes, commit message, etc
- $ctx->zadd($$x .= $lnk->to_html(pop @x));
+ print $zfh pop(@x), # $3 /^ \d+ files? /
+ qq(<a href="$ch">changed</a>,),
+ ascii_html(pop @x), # insertions/deletions
+ $lnk->to_html(@x); # notes, commit message, etc
} else {
- $ctx->zadd($ctx->{-linkify}->to_html($$x));
+ print { $ctx->{zfh} } $ctx->{-linkify}->to_html($$x);
}
}
my $lnk = $ctx->{-linkify};
my $dctx; # {}, keys: Q, oid_a, oid_b
+ my $zfh = $ctx->zfh;
while (defined(my $x = shift @top)) {
if (scalar(@top) >= 4 &&
$top[0] =~ $IS_OID) {
$dctx = diff_header(\$x, $ctx, \@top);
} elsif ($dctx) {
- my $after = '';
+ open(my $afh, '>>', \(my $after='')) or die "open: $!";
# Quiet "Complex regular subexpression recursion limit"
# warning. Perl will truncate matches upon hitting
(?:(?:^-[^\n]*\n)+)|
(?:^@@ [^\n]+\n))/xsm, $x)) {
if (!defined($dctx)) {
- $after .= $s;
+ print $afh $s;
} elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
- $ctx->zadd(qq(<span\nclass="hunk">) .
- diff_hunk($dctx, $1, $2) .
- $lnk->to_html($s) .
- '</span>');
+ print $zfh qq(<span\nclass="hunk">),
+ diff_hunk($dctx, $1, $2),
+ $lnk->to_html($s),
+ '</span>';
} elsif ($s =~ /\A\+/) { # $s may be huge
- $ctx->zadd(qq(<span\nclass="add">),
+ print $zfh qq(<span\nclass="add">),
$lnk->to_html($s),
- '</span>');
+ '</span>';
} elsif ($s =~ /\A-- $/sm) { # email sig starts
$dctx = undef;
- $after .= $s;
+ print $afh $s;
} elsif ($s =~ /\A-/) { # $s may be huge
- $ctx->zadd(qq(<span\nclass="del">),
- $lnk->to_html($s),
- '</span>');
+ print $zfh qq(<span\nclass="del">),
+ $lnk->to_html($s),
+ '</span>';
} else { # $s may be huge
- $ctx->zadd($lnk->to_html($s));
+ print $zfh $lnk->to_html($s);
}
}
diff_before_or_after($ctx, \$after) if !$dctx;
my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
$email = ascii_html($email // $ctx->{ibx}->{-primary_address});
- $ctx->zadd(
- (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '').
+ print { $ctx->zfh }
+ (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : ''),
"<entry><author><name>$name</name><email>$email</email>" .
"</author>$title$updated" .
- qq(<link\nhref="$href"/>).
+ qq(<link\nhref="$href"/>) .
"<id>$uuid</id>$irt" .
qq{<content\ntype="xhtml">} .
qq{<div\nxmlns="http://www.w3.org/1999/xhtml">} .
- qq(<pre\nstyle="white-space:pre-wrap">));
+ qq(<pre\nstyle="white-space:pre-wrap">);
$ctx->{mhref} = $href;
$ctx->{changed_href} = "${href}#related";
$eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
'Content-Length' => undef ];
bless $ctx, __PACKAGE__;
$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env});
+ my @top;
$ctx->{base_url} // do {
- $ctx->zadd(html_top($ctx));
+ @top = html_top($ctx);
$ctx->{base_url} = base_url($ctx);
};
- my $bdy = $ctx->zflush(@_[2..$#_], _html_end($ctx));
+ my $bdy = $ctx->zflush(@top, @_[2..$#_], _html_end($ctx));
$res_hdr->[3] = length($bdy);
[ $code, $res_hdr, [ $bdy ] ]
}
my $h = $ctx->{-res_hdr} = ['Content-Type', 'text/html; charset=UTF-8'];
$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($h, $ctx->{env});
bless $ctx, __PACKAGE__;
- $ctx->zadd(html_top($ctx));
+ print { $ctx->zfh } html_top($ctx);
}
1;