X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWwwAtomStream.pm;h=c494fa22679add475dd98f7d698ead5a8c98a30d;hb=c447bbbd;hp=5a10034ba8ecfb33eb90f5210b43af648998fd97;hpb=f63ea68e457f9e2618eac1d3d62227d2b605651b;p=public-inbox.git diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index 5a10034b..c494fa22 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -1,42 +1,54 @@ -# Copyright (C) 2016 all contributors +# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # # Atom body stream for which yields getline+close methods +# public-inbox-httpd favors "getline" response bodies to take a +# "pull"-based approach to feeding slow clients (as opposed to a +# more common "push" model) package PublicInbox::WwwAtomStream; use strict; use warnings; use POSIX qw(strftime); -use Date::Parse qw(strptime); use Digest::SHA qw(sha1_hex); use PublicInbox::Address; -use PublicInbox::Hval qw(ascii_html); -use PublicInbox::MID qw/mid_clean mid_escape/; +use PublicInbox::Hval qw(ascii_html mid_href); +use PublicInbox::MsgTime qw(msg_timestamp); +use PublicInbox::GzipFilter qw(gzf_maybe); # called by PSGI server after getline: sub close {} sub new { my ($class, $ctx, $cb) = @_; - $ctx->{emit_header} = 1; $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env}); - bless { cb => $cb || *close, ctx => $ctx }, $class; + bless { cb => $cb || \&close, ctx => $ctx, emit_header => 1 }, $class; } sub response { my ($class, $ctx, $code, $cb) = @_; - [ $code, [ 'Content-Type', 'application/atom+xml' ], - $class->new($ctx, $cb) ] + my $h = [ 'Content-Type' => 'application/atom+xml' ]; + my $self = $class->new($ctx, $cb); + $self->{gzf} = gzf_maybe($h, $ctx->{env}); + [ $code, $h, $self ] } # called once for each message by PSGI server sub getline { my ($self) = @_; - if (my $middle = $self->{cb}) { - my $mime = $middle->(); - return feed_entry($self, $mime) if $mime; - } - delete $self->{cb} ? '' : undef; + my $buf = do { + if (my $middle = $self->{cb}) { + my $smsg = $middle->($self->{ctx}); + feed_entry($self, $smsg) if $smsg; + } + } // (delete($self->{cb}) ? '' : undef); + + # gzf may be GzipFilter, `undef' or `0' + my $gzf = $self->{gzf} or return $buf; + + return $gzf->translate($buf) if defined $buf; + $self->{gzf} = 0; # next call to ->getline returns $buf (== undef) + $gzf->translate(undef); } # private @@ -50,6 +62,15 @@ sub title_tag { "$title"; } +sub to_uuid ($) { + my ($any) = @_; + utf8::encode($any); # really screwed up In-Reply-To fields exist + $any = sha1_hex($any); + my $h = '[a-f0-9]'; + my (@uuid5) = ($any =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o); + 'urn:uuid:' . join('-', @uuid5); +} + sub atom_header { my ($ctx, $title) = @_; my $ibx = $ctx->{-inbox}; @@ -57,18 +78,22 @@ sub atom_header { my $search_q = $ctx->{search_query}; my $self_url = $base_url; my $mid = $ctx->{mid}; + my $page_id; if (defined $mid) { # per-thread - $self_url .= mid_escape($mid).'/t.atom'; + $self_url .= mid_href($mid).'/t.atom'; + $page_id = to_uuid("t\n".$mid) } elsif (defined $search_q) { my $query = $search_q->{'q'}; $title = title_tag("$query - search results"); $base_url .= '?' . $search_q->qs_html(x => undef); $self_url .= '?' . $search_q->qs_html; + $page_id = to_uuid("q\n".$query); } else { $title = title_tag($ibx->description); $self_url .= 'new.atom'; + $page_id = "mailto:$ibx->{-primary_address}"; } - my $mtime = (stat($ibx->{mainrepo}))[9] || time; + my $mtime = (stat($ibx->{inboxdir}))[9] || time; qq(\n) . qq() . qq() . - qq(mailto:$ibx->{-primary_address}) . - feed_updated(gmtime($mtime)); -} - -sub mid2uuid ($) { - my ($mid) = @_; - utf8::encode($mid); # really screwed up In-Reply-To fields exist - $mid = sha1_hex($mid); - my $h = '[a-f0-9]'; - my (@uuid5) = ($mid =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o); - 'urn:uuid:' . join('-', @uuid5); + qq($page_id) . + feed_updated($mtime); } # returns undef or string sub feed_entry { - my ($self, $mime) = @_; + my ($self, $smsg) = @_; my $ctx = $self->{ctx}; - my $hdr = $mime->header_obj; - my $mid = mid_clean($hdr->header_raw('Message-ID')); + my $eml = $ctx->{-inbox}->smsg_eml($smsg) or return ''; + my $hdr = $eml->header_obj; + my $mid = $smsg->{mid}; my $irt = PublicInbox::View::in_reply_to($hdr); - my $uuid = mid2uuid($mid); + my $uuid = to_uuid($mid); my $base = $ctx->{feed_base_url}; if (defined $irt) { - my $irt_uuid = mid2uuid($irt); - $irt = mid_escape($irt); + my $irt_uuid = to_uuid($irt); + $irt = mid_href($irt); $irt = qq(); } else { $irt = ''; } - my $href = $base . mid_escape($mid) . '/'; - my $date = $hdr->header('Date'); - my @t = eval { strptime($date) } if defined $date; - @t = gmtime(time) unless scalar @t; - my $updated = feed_updated(@t); + my $href = $base . mid_href($mid) . '/'; + my $updated = feed_updated(msg_timestamp($hdr)); my $title = $hdr->header('Subject'); $title = '(no subject)' unless defined $title && $title ne ''; @@ -124,22 +138,25 @@ sub feed_entry { $email = ascii_html($email); my $s = ''; - if (delete $ctx->{emit_header}) { + if (delete $self->{emit_header}) { $s .= atom_header($ctx, $title); } $s .= "$name$email" . "$title$updated" . + qq(). + "$uuid$irt" . qq{} . qq{} . - qq() . - PublicInbox::View::multipart_text_as_html($mime, $href) . - '' . - qq!!. - "$uuid$irt"; + qq(); + $ctx->{obuf} = \$s; + $ctx->{mhref} = $href; + PublicInbox::View::multipart_text_as_html($eml, $ctx); + delete $ctx->{obuf}; + $s .= ''; } sub feed_updated { - '' . strftime('%Y-%m-%dT%H:%M:%SZ', @_) . ''; + '' . strftime('%Y-%m-%dT%H:%M:%SZ', gmtime(@_)) . ''; } 1;