# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # # Atom body stream for which yields getline+close methods # public-inbox-httpd favors "getline" response bodies to take a # "pull"-based approach to feeding slow clients (as opposed to a # more common "push" model) package PublicInbox::WwwAtomStream; use strict; use warnings; use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); use PublicInbox::Address; use PublicInbox::Hval qw(ascii_html mid_href); use PublicInbox::MsgTime qw(msg_timestamp); use PublicInbox::GzipFilter qw(gzf_maybe); use PublicInbox::GitAsyncCat; # called by generic PSGI server after getline, # and also by PublicInbox::HTTP::close sub close { !!delete($_[0]->{http_out}) } sub new { my ($class, $ctx, $cb) = @_; $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env}); $ctx->{cb} = $cb || \&close; $ctx->{emit_header} = 1; bless $ctx, $class; } # called by PublicInbox::DS::write sub atom_async_next { my ($http) = @_; # PublicInbox::HTTP atom_async_step($http->{forward}); } # this is public-inbox-httpd-specific sub atom_blob_cb { # git->cat_async callback my ($bref, $oid, $type, $size, $ctx) = @_; my $http = $ctx->{env}->{'psgix.io'} or return; # client abort my $smsg = delete $ctx->{smsg} or die 'BUG: no smsg'; if (!defined($oid)) { # it's possible to have TOCTOU if an admin runs # public-inbox-(edit|purge), just move onto the next message return $http->next_step(\&atom_async_next); } else { $smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid"; } my $buf = feed_entry($ctx, $smsg, PublicInbox::Eml->new($bref)); if (my $gzf = $ctx->{gzf}) { $buf = $gzf->translate($buf); } # PublicInbox::HTTP::{Chunked,Identity}::write $ctx->{http_out}->write($buf); $http->next_step(\&atom_async_next); } sub atom_async_step { # this is public-inbox-httpd-specific my ($ctx) = @_; if (my $smsg = $ctx->{smsg} = $ctx->{cb}->($ctx)) { git_async_cat($ctx->{-inbox}->git, $smsg->{blob}, \&atom_blob_cb, $ctx); } elsif (my $out = delete $ctx->{http_out}) { if (my $gzf = delete $ctx->{gzf}) { $out->write($gzf->zflush); } $out->close; } } sub response { my ($class, $ctx, $code, $cb) = @_; my $res_hdr = [ 'Content-Type' => 'application/atom+xml' ]; $class->new($ctx, $cb); $ctx->{gzf} = gzf_maybe($res_hdr, $ctx->{env}); if ($ctx->{env}->{'pi-httpd.async'}) { sub { my ($wcb) = @_; # -httpd provided write callback $ctx->{http_out} = $wcb->([200, $res_hdr]); $ctx->{env}->{'psgix.io'}->{forward} = $ctx; atom_async_step($ctx); # start stepping }; } else { [ $code, $res_hdr, $ctx ]; } } # called once for each message by PSGI server sub getline { my ($self) = @_; my $buf = do { if (my $middle = $self->{cb}) { if (my $smsg = $middle->($self)) { my $eml = $self->{-inbox}->smsg_eml($smsg) or return ''; feed_entry($self, $smsg, $eml); } else { undef; } } } // (delete($self->{cb}) ? '' : undef); # gzf may be GzipFilter, `undef' or `0' my $gzf = $self->{gzf} or return $buf; return $gzf->translate($buf) if defined $buf; $self->{gzf} = 0; # next call to ->getline returns $buf (== undef) $gzf->translate(undef); } # private sub title_tag { my ($title) = @_; $title =~ tr/\t\n / /s; # squeeze spaces # try to avoid the type attribute in title: $title = ascii_html($title); my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : ''; "$title"; } sub to_uuid ($) { my ($any) = @_; utf8::encode($any); # really screwed up In-Reply-To fields exist $any = sha1_hex($any); my $h = '[a-f0-9]'; my (@uuid5) = ($any =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o); 'urn:uuid:' . join('-', @uuid5); } sub atom_header { my ($ctx, $title) = @_; my $ibx = $ctx->{-inbox}; my $base_url = $ctx->{feed_base_url}; my $search_q = $ctx->{search_query}; my $self_url = $base_url; my $mid = $ctx->{mid}; my $page_id; if (defined $mid) { # per-thread $self_url .= mid_href($mid).'/t.atom'; $page_id = to_uuid("t\n".$mid) } elsif (defined $search_q) { my $query = $search_q->{'q'}; $title = title_tag("$query - search results"); $base_url .= '?' . $search_q->qs_html(x => undef); $self_url .= '?' . $search_q->qs_html; $page_id = to_uuid("q\n".$query); } else { $title = title_tag($ibx->description); $self_url .= 'new.atom'; $page_id = "mailto:$ibx->{-primary_address}"; } qq(\n) . qq() . qq{$title} . qq() . qq() . qq($page_id) . feed_updated($ibx->modified); } # returns undef or string sub feed_entry { my ($ctx, $smsg, $eml) = @_; my $hdr = $eml->header_obj; my $mid = $smsg->{mid}; my $irt = PublicInbox::View::in_reply_to($hdr); my $uuid = to_uuid($mid); my $base = $ctx->{feed_base_url}; if (defined $irt) { my $irt_uuid = to_uuid($irt); $irt = mid_href($irt); $irt = qq(); } else { $irt = ''; } my $href = $base . mid_href($mid) . '/'; my $updated = feed_updated(msg_timestamp($hdr)); my $title = $hdr->header('Subject'); $title = '(no subject)' unless defined $title && $title ne ''; $title = title_tag($title); my $from = $hdr->header('From') or return; my ($email) = PublicInbox::Address::emails($from); my $name = join(', ',PublicInbox::Address::names($from)); $name = ascii_html($name); $email = ascii_html($email); my $s = ''; if (delete $ctx->{emit_header}) { $s .= atom_header($ctx, $title); } $s .= "$name$email" . "$title$updated" . qq(). "$uuid$irt" . qq{} . qq{} . qq(); $ctx->{obuf} = \$s; $ctx->{mhref} = $href; PublicInbox::View::multipart_text_as_html($eml, $ctx); delete $ctx->{obuf}; $s .= ''; } sub feed_updated { '' . strftime('%Y-%m-%dT%H:%M:%SZ', gmtime(@_)) . ''; } 1;