]> Sergey Matveev's repositories - feeder.git/blob - cmd/feed2mdir/main.go
Use media:group/media:description if it exists
[feeder.git] / cmd / feed2mdir / main.go
1 /*
2 feeder  -- newsfeeds aggregator
3 Copyright (C) 2022-2023 Sergey Matveev <stargrave@stargrave.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 package main
19
20 import (
21         "crypto/sha512"
22         "encoding/base64"
23         "encoding/hex"
24         "flag"
25         "fmt"
26         "log"
27         "mime"
28         "os"
29         "path"
30         "path/filepath"
31         "strings"
32         "time"
33
34         "github.com/mmcdole/gofeed"
35 )
36
37 func main() {
38         maxEntries := flag.Uint("max-entries", 0, "Max entries to process (0=unlimited)")
39         flag.Parse()
40         mdir := flag.Arg(0)
41         fp := gofeed.NewParser()
42         feed, err := fp.Parse(os.Stdin)
43         if err != nil {
44                 log.Fatalln(err)
45         }
46
47         guids := make(map[string]struct{}, len(feed.Items))
48         useGUID := true
49         for _, item := range feed.Items {
50                 if _, exists := guids[item.GUID]; exists {
51                         useGUID = false
52                         break
53                 } else {
54                         guids[item.GUID] = struct{}{}
55                 }
56         }
57
58         feedTitle := feed.Title
59         if len(feedTitle) == 0 {
60                 feedTitle, err = filepath.Abs(mdir)
61                 if err != nil {
62                         log.Fatalln(err)
63                 }
64                 feedTitle = path.Base(feedTitle)
65         }
66
67         h := sha512.New()
68         news := 0
69         var when *time.Time
70         now := time.Now()
71         latest := &time.Time{}
72         for n, item := range feed.Items {
73                 if *maxEntries > 0 && n == int(*maxEntries) {
74                         break
75                 }
76                 when = nil
77                 if item.PublishedParsed != nil {
78                         when = item.PublishedParsed
79                 } else if item.UpdatedParsed != nil {
80                         when = item.UpdatedParsed
81                 } else {
82                         when = &now
83                 }
84                 if latest.Before(*when) {
85                         latest = when
86                 }
87                 var what string
88                 if len(item.Content) > len(item.Description) {
89                         what = item.Content
90                 } else {
91                         what = item.Description
92                 }
93                 if media, ok := item.Extensions["media"]; ok {
94                         if mediagroups, ok := media["group"]; ok {
95                                 if len(mediagroups) == 1 {
96                                         if mediadescription, ok := mediagroups[0].Children["description"]; ok {
97                                                 if len(mediadescription[0].Value) > len(what) {
98                                                         what = mediadescription[0].Value
99                                                 }
100                                         }
101                                 }
102                         }
103                 }
104                 what = strings.TrimPrefix(what, "<![CDATA[")
105                 what = strings.TrimSuffix(what, "]]>")
106                 h.Reset()
107                 if useGUID {
108                         h.Write([]byte(item.GUID))
109                 } else {
110                         h.Write([]byte(item.Title))
111                         h.Write([]byte{0})
112                         h.Write([]byte(what))
113                 }
114                 fn := hex.EncodeToString(h.Sum(nil)[:sha512.Size/2])
115                 exists := false
116                 for _, d := range []string{"cur", "new"} {
117                         entries, err := os.ReadDir(path.Join(mdir, d))
118                         if err != nil {
119                                 log.Fatalln(err)
120                         }
121                         for _, entry := range entries {
122                                 if strings.HasPrefix(entry.Name(), fn) {
123                                         exists = true
124                                         break
125                                 }
126                         }
127                 }
128                 if exists {
129                         continue
130                 }
131                 fn = path.Join(mdir, "new", fn)
132                 fd, err := os.OpenFile(fn, os.O_WRONLY|os.O_CREATE|os.O_EXCL, os.FileMode(0666))
133                 if err != nil {
134                         log.Fatalln(err)
135                 }
136                 fd.WriteString("From: \"" + feedTitle + "\" <feeder@localhost>\n")
137                 fd.WriteString("Date: " + when.UTC().Format(time.RFC1123Z) + "\n")
138                 fd.WriteString("Subject: " + mime.BEncoding.Encode("UTF-8", item.Title) + "\n")
139                 fd.WriteString("MIME-Version: 1.0\n")
140                 fd.WriteString("Content-Type: text/html; charset=utf-8\n")
141                 fd.WriteString("Content-Transfer-Encoding: base64\n")
142                 for _, author := range item.Authors {
143                         if len(author.Name) > 0 {
144                                 fd.WriteString("X-Author: " + author.Name + "\n")
145                         }
146                 }
147                 for _, link := range item.Links {
148                         fd.WriteString("X-URL: " + link + "\n")
149                 }
150                 for _, enc := range item.Enclosures {
151                         fd.WriteString("X-Enclosure: " + enc.URL + "\n")
152                 }
153                 if len(item.Categories) > 0 {
154                         fd.WriteString("X-Categories: " + strings.Join(item.Categories, ", ") + "\n")
155                 }
156                 fd.WriteString("\n")
157                 what = base64.StdEncoding.EncodeToString([]byte(what))
158                 for i := 0; i < len(what); i += 72 {
159                         b := i + 72
160                         if b > len(what) {
161                                 b = len(what)
162                         }
163                         fd.WriteString(what[i:b] + "\n")
164                 }
165                 fd.Close()
166                 if err = os.Chtimes(fn, *when, *when); err != nil {
167                         log.Fatalln(err)
168                 }
169                 news++
170         }
171         for _, d := range []string{"cur", "new"} {
172                 if err = os.Chtimes(path.Join(mdir, d), *latest, *latest); err != nil {
173                         log.Fatalln(err)
174                 }
175         }
176         fmt.Println(feedTitle)
177 }