From 012ff296ee4bf9ddfa766b18f561d8ffa308c1cc Mon Sep 17 00:00:00 2001
From: Sergey Matveev <stargrave@stargrave.org>
Date: Fri, 18 Feb 2022 20:01:45 +0300
Subject: [PATCH] Per-feed max number of entries

---
 cmd/feed2mdir/main.go |  4 ++--
 cmd/parse.sh          |  3 ++-
 doc/storage.texi      |  4 ++++
 doc/usage.texi        | 16 ++++++++++------
 feeds-clear.zsh       |  8 ++++----
 5 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/cmd/feed2mdir/main.go b/cmd/feed2mdir/main.go
index 2699d6c..815b7d1 100644
--- a/cmd/feed2mdir/main.go
+++ b/cmd/feed2mdir/main.go
@@ -34,7 +34,7 @@ import (
 )
 
 func main() {
-	maxEntries := flag.Uint("max-entries", 100, "Max entries to process")
+	maxEntries := flag.Uint("max-entries", 0, "Max entries to process (0=unlimited)")
 	flag.Parse()
 	mdir := flag.Arg(0)
 	fp := gofeed.NewParser()
@@ -60,7 +60,7 @@ func main() {
 	now := time.Now()
 	latest := &time.Time{}
 	for n, item := range feed.Items {
-		if n == int(*maxEntries) {
+		if *maxEntries > 0 && n == int(*maxEntries) {
 			break
 		}
 		when = nil
diff --git a/cmd/parse.sh b/cmd/parse.sh
index 9ba9d7c..3af3258 100755
--- a/cmd/parse.sh
+++ b/cmd/parse.sh
@@ -5,6 +5,7 @@ cd "$1"
 [ -s parse.hash ] && hash_our=`cat parse.hash` || :
 [ -s download.hash ] && hash_their=`cat download.hash` || :
 [ "$hash_our" != "$hash_their" ] || exit 0
-zstd -d < feed.zst | $cmds/feed2mdir/feed2mdir . > title.tmp
+[ -s max ] && max=`cat max` || max=${FEEDER_MAX_ITEMS:-100}
+zstd -d < feed.zst | $cmds/feed2mdir/feed2mdir -max-entries $max . > title.tmp
 mv title.tmp title
 echo $hash_their > parse.hash
diff --git a/doc/storage.texi b/doc/storage.texi
index e015254..162c23d 100644
--- a/doc/storage.texi
+++ b/doc/storage.texi
@@ -11,6 +11,10 @@ contains:
 File with the URL of the feed. This is the only file you have to
 manually deal with.
 
+@item max
+If may contain maximal number of messages per current feed to keep and
+process.
+
 @item etag, hdr, out
 Those files are used by @command{curl} to keep the content, its proper
 @code{mtime} (for @code{If-Modified-Since} header generation),
diff --git a/doc/usage.texi b/doc/usage.texi
index 5fa9f80..b59afea 100644
--- a/doc/usage.texi
+++ b/doc/usage.texi
@@ -157,16 +157,20 @@ message flags display and adding name of the feed in parenthesis.
 
 @item Cleanup excess number of messages
 
+By default (@env{$FEEDER_MAX_ITEMS}) only 100 entries are processed.
+Parser only appends them, but does not remove obsolete ones.
+
 @example
 $ ./feeds-clear.zsh
 @end example
 
-That will remove all messages in all feeds @file{cur/} directory that is
-not first hundred of ones, ordered by @code{mtime}. Pay attention that
-@file{new/} directory is not touched, so you won't loose completely new
-and unread messages when you are on vacation and left @command{cron}-ed
-workers. @command{cmd/feed2mdir/feed2mdir} command by default has
-@option{-max-entries 100} option set.
+will clear everything exceeding the quantity limit. You can set that
+limit on per-feed basis. For example @code{echo 50 > feed/FEED/max}.
+0 means no limit and keep all the messages.
+
+Pay attention that @file{new/} directory is not touched, so you won't
+loose completely new and unread messages when you are on vacation and
+left @command{cron}-ed workers.
 
 @item If you want to clean download state
 
diff --git a/feeds-clear.zsh b/feeds-clear.zsh
index 2e5002f..c1a55aa 100755
--- a/feeds-clear.zsh
+++ b/feeds-clear.zsh
@@ -1,8 +1,8 @@
 #!/usr/bin/env zsh
 set -e
 setopt EXTENDED_GLOB
-for f (feeds/**/cur) {
-    pushd $f
-    rm -fv *(Nom[101,-1]) || :
-    popd
+for f (feeds/*) {
+    [[ -s $f/max ]] && max=`cat $f/max` || max=${FEEDER_MAX_ITEMS:-100}
+    (( max++ ))
+    [[ $max -eq 1 ]] || rm -fv $f/cur/*(Nom[$max,-1])
 }
-- 
2.50.0