1 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
2 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
3 package PublicInbox::Feed;
6 use XML::Atom::SimpleFeed;
9 use URI::Escape qw/uri_escape/;
10 use Encode qw/encode decode/;
11 use Encode::MIME::Header;
12 use DateTime::Format::Mail;
13 use CGI qw(escapeHTML);
14 use POSIX qw(strftime);
15 use constant DATEFMT => '%Y-%m-%dT%H:%M:%SZ';
16 our $dt_parser = DateTime::Format::Mail->new(loose => 1);
20 my ($class, $git_dir, $max) = @_;
23 local $ENV{GIT_DIR} = $git_dir;
24 my $feed_opts = get_feedopts();
26 my $feed = XML::Atom::SimpleFeed->new(
27 title => $feed_opts->{title},
28 link => $feed_opts->{url} || "http://example.com/",
31 href => $feed_opts->{atomUrl} ||
32 "http://example.com/atom",
34 id => $feed_opts->{email} || 'public-inbox@example.com',
35 updated => strftime(DATEFMT, gmtime),
41 # we could use git log -z, but, we already know ssoma will not
42 # leave us with filenames with spaces in them..
43 my $cmd = "git log --no-color --raw -r --no-abbrev HEAD |";
44 my $pid = open my $log, $cmd or die "open `$cmd' pipe failed: $!\n";
47 foreach my $line (<$log>) {
48 if ($line =~ /^:000000 100644 0{40} ([a-f0-9]{40})/) {
50 next if $deleted{$add};
51 $nr += add_to_feed($feed_opts, $feed, $add);
53 } elsif ($line =~ /^:100644 000000 ([a-f0-9]{40}) 0{40}/) {
62 # private functions below
65 foreach my $key (qw(title url atomUrl email)) {
66 my $tmp = `git config publicInboxFeed.$key`;
74 my ($mime, $name) = @_;
75 encode('utf8', decode('MIME-Header', $mime->header($name)));
80 my $dt = $dt_parser->parse_datetime($date);
81 $dt ? $dt->strftime(DATEFMT) : 0;
84 # returns 0 (skipped) or 1 (added)
86 my ($feed_opts, $feed, $add) = @_;
88 # we can use git cat-file --batch if performance becomes a
89 # problem, but I doubt it...
90 my $str = `git cat-file blob $add`;
92 my $mime = Email::MIME->new($str);
94 my $content = msg_content($mime);
95 defined($content) or return 0;
97 my $mid_url = $feed_opts->{mid_url} || "http://example.com/mid/%s";
98 my $mid = utf8_header($mime, "Message-ID") or return 0;
102 my $subject = utf8_header($mime, "Subject") || "";
103 defined($subject) && length($subject) or return 0;
105 my $from = utf8_header($mime, "From") or return 0;
107 my @from = Email::Address->parse($from);
108 my $name = $from[0]->name;
109 defined $name or $name = "";
110 my $email = $from[0]->address;
111 defined $email or $email = "";
113 my $url = sprintf($mid_url, uri_escape($mid));
114 my $date = utf8_header($mime, "Date");
116 $date = feed_date($date) or return 0;
118 author => { name => $name, email => $email },
121 content => { type => "html", content => $content },
128 # returns a plain-text message body without quoted text
129 # returns undef if there was nothing
134 # scan through all parts, looking for displayable text
135 $mime->walk_parts(sub {
138 return if $part->subparts; # walk_parts already recurses
139 my $ct = $part->content_type || 'text/plain';
140 return if $ct !~ m!\btext/[a-z0-9\+\._-]+\b!i;
142 my $killed_wrote; # omit "So-and-so wrote:" line
144 # no quoted text in Atom feed summary
145 # $part->body should already be decoded for us (no QP)
147 my $state = 0; # 0: beginning, 1: keep, 2: quoted
148 foreach my $l (split(/\r?\n/, $part->body)) {
150 # drop leading blank lines
151 next if $l =~ /\A\s*\z/;
153 $state = ($l =~ /\A>/) ? 2 : 1; # fall-through
155 if ($state == 2) { # quoted text, drop it
157 push @body, "<quoted text snipped>";
164 if ($state == 1) { # stuff we may keep
166 # drop "So-and-so wrote:" line
167 if (@body && !$killed_wrote &&
168 $body[-1] =~ /:\z/) {
179 join("\n", map { escapeHTML($_) } @body) .