From 205599c0814e1031760e54cce9d8880e747cbb08 Mon Sep 17 00:00:00 2001
From: Eric Wong <e@80x24.org>
Date: Tue, 28 Sep 2021 07:53:49 +0000
Subject: [PATCH] www+httpd: lower priority of large mbox downloads

While each git blob request is treated fairly w.r.t other git
blob requests, responses triggering thousands of git blob
requests can still noticeably increase latency for
less-expensive responses.

Move large mbox results and the nasty all.mbox endpoint to
a low priority queue which only fires once per-event loop
iteration.  This reduces the response time of short HTTP
responses while many gigantic mboxes are being downloaded
simultaneously, but still maximizes use of available I/O
when there's no inexpensive HTTP responses happening.

This only affects PublicInbox::WWW users who use
public-inbox-httpd, not generic PSGI servers.
---
 lib/PublicInbox/GzipFilter.pm | 7 ++++++-
 lib/PublicInbox/Mbox.pm       | 7 +++++--
 lib/PublicInbox/WWW.pm        | 9 +++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 334d6581..c50c26c5 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -175,7 +175,12 @@ sub async_blob_cb { # git->cat_async callback
 	$smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid");
 	eval { $self->async_eml(PublicInbox::Eml->new($bref)) };
 	bail($self, "E: async_eml: $@") if $@;
-	$http->next_step($self->can('async_next'));
+	if ($self->{-low_prio}) {
+		push(@{$self->{www}->{-low_prio_q}}, $self) == 1 and
+				PublicInbox::DS::requeue($self->{www});
+	} else {
+		$http->next_step($self->can('async_next'));
+	}
 }
 
 sub smsg_blob {
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index f72af26b..cec76182 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -174,6 +174,7 @@ sub mbox_all_ids {
 		[404, [qw(Content-Type text/plain)], ["No results found\n"]];
 	$ctx->{ids} = $ids;
 	$ctx->{prev} = $prev;
+	$ctx->{-low_prio} = 1;
 	require PublicInbox::MboxGz;
 	PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all');
 }
@@ -192,12 +193,13 @@ sub results_cb {
 			my $smsg = $over->get_art($num) or next;
 			return $smsg;
 		}
-		# refill result set
+		# refill result set, deprioritize since there's many results
 		my $srch = $ctx->{ibx}->isrch or return gone($ctx, 'search');
 		my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
 		my $size = $mset->size or return;
 		$ctx->{qopts}->{offset} += $size;
 		$ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
+		$ctx->{-low_prio} = 1;
 	}
 }
 
@@ -214,12 +216,13 @@ sub results_thread_cb {
 		# refills ctx->{xids}
 		next if $over->expand_thread($ctx);
 
-		# refill result set
+		# refill result set, deprioritize since there's many results
 		my $srch = $ctx->{ibx}->isrch or return gone($ctx, 'search');
 		my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
 		my $size = $mset->size or return;
 		$ctx->{qopts}->{offset} += $size;
 		$ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
+		$ctx->{-low_prio} = 1;
 	}
 
 }
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 570e690e..a7c961f4 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -659,4 +659,13 @@ sub get_description {
 	};
 }
 
+sub event_step { # called via requeue
+	my ($self) = @_;
+	# gzf = PublicInbox::GzipFilter == $ctx
+	my $gzf = shift(@{$self->{-low_prio_q}}) // return;
+	PublicInbox::DS::requeue($self) if scalar(@{$self->{-low_prio_q}});
+	my $http = $gzf->{env}->{'psgix.io'}; # PublicInbox::HTTP
+	$http->next_step($gzf->can('async_next'));
+}
+
 1;
-- 
2.50.0