]> Sergey Matveev's repositories - feeder.git/blobdiff - cmd/feed2mdir/main.go
Use media:group/media:description if it exists
[feeder.git] / cmd / feed2mdir / main.go
index cac223700db4a7dae8ec28300db276e7f7460945..3089a232355796ea41f287902c70a726646d7ce7 100644 (file)
@@ -1,6 +1,6 @@
 /*
-go.stargrave.org/feeder  -- newsfeeds aggregator
-Copyright (C) 2022 Sergey Matveev <stargrave@stargrave.org>
+feeder  -- newsfeeds aggregator
+Copyright (C) 2022-2023 Sergey Matveev <stargrave@stargrave.org>
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -27,6 +27,7 @@ import (
        "mime"
        "os"
        "path"
+       "path/filepath"
        "strings"
        "time"
 
@@ -34,7 +35,7 @@ import (
 )
 
 func main() {
-       maxEntries := flag.Uint("max-entries", 100, "Max entries to process")
+       maxEntries := flag.Uint("max-entries", 0, "Max entries to process (0=unlimited)")
        flag.Parse()
        mdir := flag.Arg(0)
        fp := gofeed.NewParser()
@@ -42,12 +43,34 @@ func main() {
        if err != nil {
                log.Fatalln(err)
        }
+
+       guids := make(map[string]struct{}, len(feed.Items))
+       useGUID := true
+       for _, item := range feed.Items {
+               if _, exists := guids[item.GUID]; exists {
+                       useGUID = false
+                       break
+               } else {
+                       guids[item.GUID] = struct{}{}
+               }
+       }
+
+       feedTitle := feed.Title
+       if len(feedTitle) == 0 {
+               feedTitle, err = filepath.Abs(mdir)
+               if err != nil {
+                       log.Fatalln(err)
+               }
+               feedTitle = path.Base(feedTitle)
+       }
+
+       h := sha512.New()
        news := 0
        var when *time.Time
        now := time.Now()
-       max := int(*maxEntries) - 1
+       latest := &time.Time{}
        for n, item := range feed.Items {
-               if n == max {
+               if *maxEntries > 0 && n == int(*maxEntries) {
                        break
                }
                when = nil
@@ -58,13 +81,37 @@ func main() {
                } else {
                        when = &now
                }
-               fn := hex.EncodeToString(h.Sum(nil)[:sha512.Size/2])
+               if latest.Before(*when) {
+                       latest = when
+               }
                var what string
-               if len(item.Content) == 0 {
+               if len(item.Content) > len(item.Description) {
+                       what = item.Content
+               } else {
                        what = item.Description
+               }
+               if media, ok := item.Extensions["media"]; ok {
+                       if mediagroups, ok := media["group"]; ok {
+                               if len(mediagroups) == 1 {
+                                       if mediadescription, ok := mediagroups[0].Children["description"]; ok {
+                                               if len(mediadescription[0].Value) > len(what) {
+                                                       what = mediadescription[0].Value
+                                               }
+                                       }
+                               }
+                       }
+               }
+               what = strings.TrimPrefix(what, "<![CDATA[")
+               what = strings.TrimSuffix(what, "]]>")
+               h.Reset()
+               if useGUID {
+                       h.Write([]byte(item.GUID))
                } else {
-                       what = item.Content
+                       h.Write([]byte(item.Title))
+                       h.Write([]byte{0})
+                       h.Write([]byte(what))
                }
+               fn := hex.EncodeToString(h.Sum(nil)[:sha512.Size/2])
                exists := false
                for _, d := range []string{"cur", "new"} {
                        entries, err := os.ReadDir(path.Join(mdir, d))
@@ -86,20 +133,25 @@ func main() {
                if err != nil {
                        log.Fatalln(err)
                }
-               fd.WriteString("From: \"" + feed.Title + "\" <feeder@go.stargrave.org>\n")
+               fd.WriteString("From: \"" + feedTitle + "\" <feeder@localhost>\n")
                fd.WriteString("Date: " + when.UTC().Format(time.RFC1123Z) + "\n")
                fd.WriteString("Subject: " + mime.BEncoding.Encode("UTF-8", item.Title) + "\n")
                fd.WriteString("MIME-Version: 1.0\n")
                fd.WriteString("Content-Type: text/html; charset=utf-8\n")
                fd.WriteString("Content-Transfer-Encoding: base64\n")
+               for _, author := range item.Authors {
+                       if len(author.Name) > 0 {
+                               fd.WriteString("X-Author: " + author.Name + "\n")
+                       }
+               }
                for _, link := range item.Links {
                        fd.WriteString("X-URL: " + link + "\n")
                }
-               for _, author := range item.Authors {
-                       fd.WriteString("X-Author: " + author.Name + "\n")
+               for _, enc := range item.Enclosures {
+                       fd.WriteString("X-Enclosure: " + enc.URL + "\n")
                }
-               for _, cat := range item.Categories {
-                       fd.WriteString("X-Category: " + cat + "\n")
+               if len(item.Categories) > 0 {
+                       fd.WriteString("X-Categories: " + strings.Join(item.Categories, ", ") + "\n")
                }
                fd.WriteString("\n")
                what = base64.StdEncoding.EncodeToString([]byte(what))
@@ -116,18 +168,10 @@ func main() {
                }
                news++
        }
-       when = nil
-       if feed.PublishedParsed != nil {
-               when = feed.PublishedParsed
-       } else if feed.UpdatedParsed != nil {
-               when = feed.UpdatedParsed
-       }
-       if when != nil {
-               for _, d := range []string{"cur", "new"} {
-                       if err = os.Chtimes(path.Join(mdir, d), *when, *when); err != nil {
-                               log.Fatalln(err)
-                       }
+       for _, d := range []string{"cur", "new"} {
+               if err = os.Chtimes(path.Join(mdir, d), *latest, *latest); err != nil {
+                       log.Fatalln(err)
                }
        }
-       fmt.Println(feed.Title)
+       fmt.Println(feedTitle)
 }