1 # Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # Streaming (via getline) interface for formatting messages as an mboxrd.
5 # Used by the PSGI web interface.
7 # public-inbox-httpd favors "getline" response bodies to take a
8 # "pull"-based approach to feeding slow clients (as opposed to a
9 # more common "push" model)
10 package PublicInbox::Mbox;
12 use parent 'PublicInbox::GzipFilter';
13 use PublicInbox::MID qw/mid_escape/;
14 use PublicInbox::Hval qw/to_filename/;
15 use PublicInbox::Smsg;
18 # called by PSGI server as body response
19 # this gets called twice for every message, once to return the header,
20 # once to retrieve the body
23 my $smsg = $ctx->{smsg} or return;
24 my $ibx = $ctx->{-inbox};
25 my $eml = $ibx->smsg_eml($smsg) or return;
26 my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
27 $ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
29 $ctx->translate(msg_body($eml));
30 } else { # last message
31 $ctx->zmore(msg_body($eml));
36 # called by PublicInbox::DS::write
38 my ($http) = @_; # PublicInbox::HTTP
39 my $ctx = $http->{forward} or return; # client aborted
41 my $smsg = $ctx->{smsg} or return $ctx->close;
42 $ctx->smsg_blob($smsg);
47 sub async_eml { # ->{async_eml} for async_blob_cb
49 my $smsg = delete $ctx->{smsg};
51 $ctx->{smsg} = $ctx->{-inbox}->over->next_by_mid(@{$ctx->{next_arg}});
53 $ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
54 $ctx->{http_out}->write($ctx->translate(msg_body($eml)));
58 my ($ctx, $subject) = @_;
59 my $fn = $subject // 'no-subject';
61 $fn = $fn eq '' ? 'no-subject' : to_filename($fn);
62 my @hdr = ('Content-Type');
63 if ($ctx->{-inbox}->{obfuscate}) {
64 # obfuscation is stupid, but maybe scrapers are, too...
65 push @hdr, 'application/mbox';
68 push @hdr, 'text/plain';
71 push @hdr, 'Content-Disposition', "inline; filename=$fn";
75 # for rare cases where v1 inboxes aren't indexed w/ ->over at all
78 my $mref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return;
79 my $eml = PublicInbox::Eml->new($mref);
80 [ 200, res_hdr($ctx, $eml->header_str('Subject')),
81 [ msg_hdr($ctx, $eml, $ctx->{mid}) . msg_body($eml) ] ]
84 # /$INBOX/$MESSAGE_ID/raw
87 $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
88 my $over = $ctx->{-inbox}->over or return no_over_raw($ctx);
90 my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
91 my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return;
92 my $res_hdr = res_hdr($ctx, $smsg->{subject});
93 bless $ctx, __PACKAGE__;
94 $ctx->psgi_response(200, $res_hdr, \&async_next, \&async_eml);
98 my ($ctx, $eml, $mid) = @_;
99 my $header_obj = $eml->header_obj;
101 # drop potentially confusing headers, ssoma already should've dropped
102 # Lines and Content-Length
103 foreach my $d (qw(Lines Bytes Content-Length Status)) {
104 $header_obj->header_set($d);
106 my $ibx = $ctx->{-inbox};
107 my $base = $ctx->{base_url};
108 $mid = $ctx->{mid} unless defined $mid;
109 $mid = mid_escape($mid);
111 'Archived-At', "<$base$mid/>",
112 'List-Archive', "<$base>",
113 'List-Post', "<mailto:$ibx->{-primary_address}>",
115 my $crlf = $header_obj->crlf;
116 my $buf = $header_obj->as_string;
117 # fixup old bug from import (pre-a0c07cba0e5d8b6a)
118 $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
119 $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf;
121 for (my $i = 0; $i < @append; $i += 2) {
123 my $v = $append[$i + 1];
124 my @v = $header_obj->header_raw($k);
131 $buf .= "$k: $v$crlf" if defined $v;
137 my $bdy = $_[0]->{bdy} // return "\n";
138 # mboxrd quoting style
139 # https://en.wikipedia.org/wiki/Mbox#Modified_mbox
140 # https://www.loc.gov/preservation/digital/formats/fdd/fdd000385.shtml
141 # https://web.archive.org/http://www.qmail.org/man/man5/mbox.html
142 $$bdy =~ s/^(>*From )/>$1/gm;
148 my $msgs = $ctx->{msgs};
150 if (my $smsg = shift @$msgs) {
154 $ctx->{msgs} = $msgs = $ctx->{over}->get_thread($ctx->{mid},
156 return unless @$msgs;
157 $ctx->{prev} = $msgs->[-1];
162 my ($ctx, $over, $sfx) = @_;
163 my $msgs = $ctx->{msgs} = $over->get_thread($ctx->{mid}, {});
164 return [404, [qw(Content-Type text/plain)], []] if !@$msgs;
165 $ctx->{prev} = $msgs->[-1];
166 $ctx->{over} = $over; # bump refcnt
167 require PublicInbox::MboxGz;
168 PublicInbox::MboxGz::mbox_gz($ctx, \&thread_cb, $msgs->[0]->{subject});
172 my ($ctx, $range) = @_;
175 if ($range eq 'all') { # TODO: YYYY[-MM]
178 return [404, [qw(Content-Type text/plain)], []];
180 mbox_all($ctx, $query);
185 my $ids = $ctx->{ids};
187 while ((my $num = shift @$ids)) {
188 my $smsg = $ctx->{over}->get_art($num) or next;
191 $ctx->{ids} = $ids = $ctx->{mm}->ids_after(\($ctx->{prev}));
197 my $ibx = $ctx->{-inbox};
199 my $mm = $ctx->{mm} = $ibx->mm;
200 my $ids = $mm->ids_after(\$prev) or return
201 [404, [qw(Content-Type text/plain)], ["No results found\n"]];
202 $ctx->{over} = $ibx->over or
203 return PublicInbox::WWW::need($ctx, 'Overview');
205 $ctx->{prev} = $prev;
206 require PublicInbox::MboxGz;
207 PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all');
212 my $mset = $ctx->{mset};
213 my $srch = $ctx->{srch};
215 while (my $mi = (($mset->items)[$ctx->{iter}++])) {
216 my $smsg = PublicInbox::Smsg::from_mitem($mi,
221 $mset = $ctx->{mset} = $srch->query($ctx->{query},
223 my $size = $mset->size or return;
224 $ctx->{qopts}->{offset} += $size;
230 my ($ctx, $query) = @_;
232 return mbox_all_ids($ctx) if $query eq '';
233 my $qopts = $ctx->{qopts} = { mset => 2 };
234 my $srch = $ctx->{srch} = $ctx->{-inbox}->search or
235 return PublicInbox::WWW::need($ctx, 'Search');;
236 my $mset = $ctx->{mset} = $srch->query($query, $qopts);
237 $qopts->{offset} = $mset->size or
238 return [404, [qw(Content-Type text/plain)],
239 ["No results found\n"]];
241 $ctx->{query} = $query;
242 require PublicInbox::MboxGz;
243 PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, 'results-'.$query);