]> Sergey Matveev's repositories - feeder.git/commitdiff
Try to use GUID
authorSergey Matveev <stargrave@stargrave.org>
Fri, 18 Feb 2022 09:53:44 +0000 (12:53 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Fri, 18 Feb 2022 09:53:47 +0000 (12:53 +0300)
Some fucking feeds update their content all the time feed is downloaded.
But at least they try to preserve their GUIDs among all entries.

cmd/feed2mdir/main.go

index 5783d1f024401c4ee9bbbaa5ba0e1e1127bc84d4..8ee07d0cab392dbf1fc7727b810735075a4e1ebf 100644 (file)
@@ -42,12 +42,24 @@ func main() {
        if err != nil {
                log.Fatalln(err)
        }
+
+       guids := make(map[string]struct{}, len(feed.Items))
+       useGUID := true
+       for _, item := range feed.Items {
+               if _, exists := guids[item.GUID]; exists {
+                       useGUID = false
+                       break
+               } else {
+                       guids[item.GUID] = struct{}{}
+               }
+       }
+
+       h := sha512.New()
        news := 0
        var when *time.Time
        now := time.Now()
-       max := int(*maxEntries) - 1
        for n, item := range feed.Items {
-               if n == max {
+               if n == int(*maxEntries) {
                        break
                }
                when = nil
@@ -66,10 +78,14 @@ func main() {
                }
                what = strings.TrimPrefix(what, "<![CDATA[")
                what = strings.TrimSuffix(what, "]]>")
-               h := sha512.New()
-               h.Write([]byte(item.Title))
-               h.Write([]byte{0})
-               h.Write([]byte(what))
+               h.Reset()
+               if useGUID {
+                       h.Write([]byte(item.GUID))
+               } else {
+                       h.Write([]byte(item.Title))
+                       h.Write([]byte{0})
+                       h.Write([]byte(what))
+               }
                fn := hex.EncodeToString(h.Sum(nil)[:sha512.Size/2])
                exists := false
                for _, d := range []string{"cur", "new"} {