]> Sergey Matveev's repositories - feeder.git/blobdiff - cmd/feed2mdir/main.go
Raise copyright years
[feeder.git] / cmd / feed2mdir / main.go
index 9074cefab80bc14b375d4d787d3d2741715b5d67..26cd80a8701a7045db9505129e38dc93d28579be 100644 (file)
@@ -1,19 +1,17 @@
-/*
-go.stargrave.org/feeder  -- newsfeeds aggregator
-Copyright (C) 2022 Sergey Matveev <stargrave@stargrave.org>
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, version 3 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
+// feeder -- newsfeeds aggregator
+// Copyright (C) 2022-2024 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 package main
 
@@ -27,6 +25,7 @@ import (
        "mime"
        "os"
        "path"
+       "path/filepath"
        "strings"
        "time"
 
@@ -34,7 +33,7 @@ import (
 )
 
 func main() {
-       maxEntries := flag.Uint("max-entries", 100, "Max entries to process")
+       maxEntries := flag.Uint("max-entries", 0, "Max entries to process (0=unlimited)")
        flag.Parse()
        mdir := flag.Arg(0)
        fp := gofeed.NewParser()
@@ -42,12 +41,34 @@ func main() {
        if err != nil {
                log.Fatalln(err)
        }
+
+       guids := make(map[string]struct{}, len(feed.Items))
+       useGUID := true
+       for _, item := range feed.Items {
+               if _, exists := guids[item.GUID]; exists {
+                       useGUID = false
+                       break
+               } else {
+                       guids[item.GUID] = struct{}{}
+               }
+       }
+
+       feedTitle := feed.Title
+       if len(feedTitle) == 0 {
+               feedTitle, err = filepath.Abs(mdir)
+               if err != nil {
+                       log.Fatalln(err)
+               }
+               feedTitle = path.Base(feedTitle)
+       }
+
+       h := sha512.New()
        news := 0
        var when *time.Time
        now := time.Now()
-       max := int(*maxEntries) - 1
+       latest := &time.Time{}
        for n, item := range feed.Items {
-               if n == max {
+               if *maxEntries > 0 && n == int(*maxEntries) {
                        break
                }
                when = nil
@@ -58,15 +79,37 @@ func main() {
                } else {
                        when = &now
                }
-               fn := hex.EncodeToString(h.Sum(nil)[:sha512.Size/2])
+               if latest.Before(*when) {
+                       latest = when
+               }
                var what string
-               if len(item.Content) == 0 {
-                       what = item.Description
-               } else {
+               if len(item.Content) > len(item.Description) {
                        what = item.Content
+               } else {
+                       what = item.Description
+               }
+               if media, ok := item.Extensions["media"]; ok {
+                       if mediagroups, ok := media["group"]; ok {
+                               if len(mediagroups) == 1 {
+                                       if mediadescription, ok := mediagroups[0].Children["description"]; ok {
+                                               if len(mediadescription[0].Value) > len(what) {
+                                                       what = mediadescription[0].Value
+                                               }
+                                       }
+                               }
+                       }
                }
                what = strings.TrimPrefix(what, "<![CDATA[")
                what = strings.TrimSuffix(what, "]]>")
+               h.Reset()
+               if useGUID {
+                       h.Write([]byte(item.GUID))
+               } else {
+                       h.Write([]byte(item.Title))
+                       h.Write([]byte{0})
+                       h.Write([]byte(what))
+               }
+               fn := hex.EncodeToString(h.Sum(nil)[:sha512.Size/2])
                exists := false
                for _, d := range []string{"cur", "new"} {
                        entries, err := os.ReadDir(path.Join(mdir, d))
@@ -88,20 +131,25 @@ func main() {
                if err != nil {
                        log.Fatalln(err)
                }
-               fd.WriteString("From: \"" + feed.Title + "\" <feeder@go.stargrave.org>\n")
+               fd.WriteString("From: \"" + feedTitle + "\" <feeder@localhost>\n")
                fd.WriteString("Date: " + when.UTC().Format(time.RFC1123Z) + "\n")
                fd.WriteString("Subject: " + mime.BEncoding.Encode("UTF-8", item.Title) + "\n")
                fd.WriteString("MIME-Version: 1.0\n")
                fd.WriteString("Content-Type: text/html; charset=utf-8\n")
                fd.WriteString("Content-Transfer-Encoding: base64\n")
+               for _, author := range item.Authors {
+                       if len(author.Name) > 0 {
+                               fd.WriteString("X-Author: " + author.Name + "\n")
+                       }
+               }
                for _, link := range item.Links {
                        fd.WriteString("X-URL: " + link + "\n")
                }
-               for _, author := range item.Authors {
-                       fd.WriteString("X-Author: " + author.Name + "\n")
+               for _, enc := range item.Enclosures {
+                       fd.WriteString("X-Enclosure: " + enc.URL + "\n")
                }
-               for _, cat := range item.Categories {
-                       fd.WriteString("X-Category: " + cat + "\n")
+               if len(item.Categories) > 0 {
+                       fd.WriteString("X-Categories: " + strings.Join(item.Categories, ", ") + "\n")
                }
                fd.WriteString("\n")
                what = base64.StdEncoding.EncodeToString([]byte(what))
@@ -118,18 +166,10 @@ func main() {
                }
                news++
        }
-       when = nil
-       if feed.PublishedParsed != nil {
-               when = feed.PublishedParsed
-       } else if feed.UpdatedParsed != nil {
-               when = feed.UpdatedParsed
-       }
-       if when != nil {
-               for _, d := range []string{"cur", "new"} {
-                       if err = os.Chtimes(path.Join(mdir, d), *when, *when); err != nil {
-                               log.Fatalln(err)
-                       }
+       for _, d := range []string{"cur", "new"} {
+               if err = os.Chtimes(path.Join(mdir, d), *latest, *latest); err != nil {
+                       log.Fatalln(err)
                }
        }
-       fmt.Println(feed.Title)
+       fmt.Println(feedTitle)
 }