]> Sergey Matveev's repositories - feeder.git/commitdiff
Various refactoring
authorSergey Matveev <stargrave@stargrave.org>
Fri, 18 Feb 2022 08:20:51 +0000 (11:20 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Fri, 18 Feb 2022 09:39:40 +0000 (12:39 +0300)
* No excess redo
* parallel is used, with failed jobs printing
* All commands can be run outside the project
* feed.zst -- it multiple times smaller
* Various small fixes

15 files changed:
cmd/download-clean.sh [new file with mode: 0755]
cmd/download.sh [new file with mode: 0755]
cmd/muttrc-gen.sh [moved from mutt.rc.do with 94% similarity, mode: 0755]
cmd/parse.sh [new file with mode: 0755]
cmd/print-failed.zsh.rc [new file with mode: 0644]
default.clean.do [deleted file]
default.download.do [deleted file]
default.parse.do [deleted file]
doc/index.texi
doc/storage.texi
doc/usage.texi
feeds-browse.sh [deleted file]
feeds-browse.zsh [new file with mode: 0755]
feeds-download.zsh
feeds-parse.zsh

diff --git a/cmd/download-clean.sh b/cmd/download-clean.sh
new file mode 100755 (executable)
index 0000000..084d5ee
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+
+cd "$1"
+rm -fv download.hash etag feed hdr out
diff --git a/cmd/download.sh b/cmd/download.sh
new file mode 100755 (executable)
index 0000000..b3a42a6
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/sh -e
+
+PROXY="--proxy http://localhost:8080/"
+cd "$1"
+read url < url
+[ -s etag ] && etag_compare="--etag-compare etag" || etag_compare=""
+[ -r out ] && time_cond="--time-cond out" || time_cond=""
+[ -z "$FEEDER_CURL_VERBOSE" ] && silent="--silent" || silent="--verbose"
+curl --fail \
+    --user-agent "go.stargrave.org-feeder/0.1.0" \
+    --compressed \
+    --location --max-redirs 2 \
+    --dump-header hdr \
+    --output out \
+    --remote-time \
+    --etag-save etag \
+    $PROXY \
+    $etag_compare \
+    $time_cond \
+    $silent \
+    "$url" >&2
+if [ -s out ] ; then
+    zstdmt -19 < out > feed.zst
+    touch -r out feed.zst
+    truncate -s 0 out
+    touch -r feed.zst out
+fi
+sha512 < feed.zst > download.hash
old mode 100644 (file)
new mode 100755 (executable)
similarity index 94%
rename from mutt.rc.do
rename to cmd/muttrc-gen.sh
index d980ab1..9c3c432
@@ -1,4 +1,5 @@
-find feeds -name title | sort | xargs redo-ifchange
+#!/bin/sh -e
+
 cat <<EOF
 set mail_check_stats
 set mail_check_stats_interval=5
@@ -24,6 +25,7 @@ unmailboxes *
 mailboxes search
 
 EOF
+
 for f in feeds/* ; do
     [ -s $f/title ] || {
         echo unreadable $f/title >&2
diff --git a/cmd/parse.sh b/cmd/parse.sh
new file mode 100755 (executable)
index 0000000..ca3bf2a
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/sh -e
+
+cmds="$(dirname "$(realpath "$0")")"
+cd "$1"
+[ -s parse.hash ] && hash_our=`cat parse.hash` || hash_our=""
+[ -s download.hash ] && hash_their=`cat download.hash` || hash_their=""
+[ "$hash_our" != "$hash_their" ] || exit 0
+zstd -d < feed.zst | $cmds/feed2mdir/feed2mdir . > title.tmp
+mv title.tmp title
+echo $hash_their > parse.hash
diff --git a/cmd/print-failed.zsh.rc b/cmd/print-failed.zsh.rc
new file mode 100644 (file)
index 0000000..ea52596
--- /dev/null
@@ -0,0 +1,11 @@
+print-joglog-failed() {
+    local row
+    read row
+    local cols=(${(s:  :)row})
+    local exitvalI=${cols[(i)Exitval]}
+    while read row ; do
+        cols=(${(s:    :)row})
+        [[ ${cols[$exitvalI]} -ne 0 ]] || continue
+        print "Failed: ${cols[$#cols]}"
+    done
+}
diff --git a/default.clean.do b/default.clean.do
deleted file mode 100644 (file)
index 931bb9a..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-d=${1%/*}
-cd $d
-rm etag feed* hdr out
diff --git a/default.download.do b/default.download.do
deleted file mode 100644 (file)
index 6591f7d..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-PROXY="--proxy http://localhost:8080/"
-d=${1%/*}
-[ -z "$FEEDER_SKIP_DOWNLOAD" ] || {
-    echo temporarily skipping feed download >&2
-    sha512 < $d/feed
-    exit
-}
-read url < $d/url
-[ -s $d/etag ] && etag_compare="--etag-compare $d/etag" || etag_compare=""
-[ -z "$FEEDER_CURL_VERBOSE" ] && silent="--silent" || silent="--verbose"
-curl --fail \
-    --user-agent "go.stargrave.org-feeder/0.1.0" \
-    --compressed \
-    --location --max-redirs 2 \
-    --dump-header $d/hdr \
-    --output $d/out \
-    --remote-time \
-    --time-cond $d/out \
-    --etag-save $d/etag \
-    $PROXY \
-    $silent \
-    $etag_compare \
-    "$url" >&2
-if [ -s $d/out ] ; then
-    cp -a $d/out $d/feed
-    truncate -s 0 $d/out
-    touch -r $d/feed $d/out
-fi
-sha512 < $d/feed
diff --git a/default.parse.do b/default.parse.do
deleted file mode 100644 (file)
index 8c44c45..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-d=${1%/*}
-FEEDER_SKIP_DOWNLOAD=1 redo-ifchange $d/feed.download
-cmd/feed2mdir/feed2mdir $d < $d/feed > $d/title
index 808bd330f5207636167558e2ad7fa85011f9bde9..6a26bf9c07eaf33ff90c4f1f3145de20b57f3dd2 100644 (file)
@@ -1,6 +1,6 @@
 \input texinfo
 @documentencoding UTF-8
-@settitle feeder
+@settitle go.stargrave.org/feeder
 
 @copying
 Copyright @copyright{} 2022 @email{stargrave@@stargrave.org, Sergey Matveev}
@@ -33,17 +33,18 @@ Its architecture is very simple:
     @url{https://en.wikipedia.org/wiki/Maildir, Maildir}.
 @item Automatically generated @url{http://www.mutt.org/, Mutt} source
     file contains convenient options and human-readable mailboxes list.
-@item @url{http://cr.yp.to/redo.html, redo} is used for parallel
-    invocation of download/parsing jobs and skipping parsing of
-    unchanged feeds.
+@item @url{https://www.gnu.org/software/parallel/, GNU parallel} for
+    parallel invocation of download/parsing jobs.
 @item @url{https://www.djcbsoftware.nl/code/mu/, mu} utilities perform
     indexing and searching among the messages. Of course it would be
     trivial to use @url{https://www.lesbonscomptes.com/recoll/, recoll}
     or @url{https://notmuchmail.org/, notmuch} instead. If you need that.
-@item Bunch of @url{https://www.zsh.org/, Zsh} helper scripts, that are
-    completely optional and pretty trivial.
+@item Bunch of optional @url{https://www.zsh.org/, Zsh} helper scripts.
 @end itemize
 
+But of course it has its price: original feed data, its Base64-encoded
+copy in mailbox and another copy in @command{mu}'s Xapian database.
+
 @insertcopying
 
 @include storage.texi
index 8b9ad85f56dee462be164074f29680d534f17c0e..e01525438b6e756c4d49a08202ffd05954f2aa28 100644 (file)
@@ -16,14 +16,13 @@ Those files are used by @command{curl} to keep the content, its proper
 @code{mtime} (for @code{If-Modified-Since} header generation),
 @code{ETag} and response headers for debugging.
 
-@item feed
-It contains the content itself.
-
-@item feed.download
-Used as intermediate target for the @command{redo} build system and
-contains SHA-512 hash of the @file{feed} file. It is used to nearly
-completely skip file copying/moving on filesystem if feed was not
-modified.
+@item feed.zst
+It contains the content itself. Compressed with
+@url{https://facebook.github.io/zstd/, Zstandard}.
+
+@item download.hash, parse.hash
+SHA-512 hash of the @file{feed.zst}, used to determine if feed was
+updated and parser has to do the job.
 
 @item title
 Automatically generated file with the title of the feed.
index c99e5a8e1f306c2b9ffc5c61699cf891d3a6ed60..280cce0e80e82491046af4fb150c27b6fd89fe93 100644 (file)
@@ -38,42 +38,21 @@ http://blog.stargrave.org/russian/feed.atom
 
 @item Download your feed(s) data
 
-Downloading is implemented in @command{redo}'s
-@file{default.download.do} file. Probably you want to change its default
-@env{$PROXY} value. It uses @command{curl}, that is aware of
-@code{If-Modified-Since} and @code{ETag} headers, compressed content
-encodings and HTTP redirections.
-
-You can invoke feed downloading like that:
-
 @example
-$ redo feeds/blog.stargrave.org_russian_feed.atom/feed.download
+$ cmd/download.sh feeds/blog.stargrave.org_russian_feed.atom
+$ ./feeds-download.zsh # to invoke parallel downloading of everything
 @end example
 
+Probably you want to change its default @env{$PROXY} value. It uses
+@command{curl}, that is aware of @code{If-Modified-Since} and
+@code{ETag} headers, compressed content encodings and HTTP redirections.
 If you want to see verbose output, then set @env{FEEDER_CURL_VERBOSE=1}.
 
-As a rule, you wish to run all feeds downloading in parallel. You can
-use @file{feeds-download.zsh}, that just invokes @command{redo-ifchange}
-with @option{-f} option (forceful rebuild). Why that? Because most
-@command{redo} implementations (that forces target building) do not
-parallelize specified targets build. But you can also use @command{parallel}:
-
-@example
-$ parallel "redo @{@}/feed.download" ::: feeds/*
-@end example
-
 @item Parse your feeds
 
-Parsing (and Maildir filling) is implemented in @command{redo}'s
-@file{default.parse.do} file. It calls @command{cmd/feed2mdir/feed2mdir}
-utility, that read @file{feeds/FEED/feed} from stdin, takes Maildir
-directory as its first argument and prints feed's title.
-
-You can use @file{feeds-parse.zsh} helper or invoke @command{parallel}
-as in example above, replacing @code{.download} with @code{.parse}.
-
 @example
-$ ./feeds-parse.zsh
+$ cmd/parse.sh feeds/blog.stargrave.org_russian_feed.atom
+$ ./feeds-parse.zsh # to parse all feeds in parallel
 @end example
 
 @item Run Mutt
@@ -170,7 +149,7 @@ workers. @command{cmd/feed2mdir/feed2mdir} command by default has
 @item If you want to clean download state
 
 @example
-$ redo feeds/FEED/feed.clean
+$ cmd/download-clean.sh feed/FEED
 @end example
 
 @end table
diff --git a/feeds-browse.sh b/feeds-browse.sh
deleted file mode 100755 (executable)
index dfc3f63..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh -e
-
-redo-ifchange mutt.rc
-mutt -e "source mutt.rc" -y
diff --git a/feeds-browse.zsh b/feeds-browse.zsh
new file mode 100755 (executable)
index 0000000..e1a6af7
--- /dev/null
@@ -0,0 +1,4 @@
+#!/usr/bin/env zsh
+set -e
+$0:h/cmd/muttrc-gen.sh > mutt.rc
+mutt -e "source mutt.rc" -y
index 5a9375bb78dfef2c6227ae46cb5b19caa1911813..d83f1bb5fc87f3558b7a789573f784aea060aed4 100755 (executable)
@@ -1,3 +1,5 @@
 #!/usr/bin/env zsh
-set -e
-redo-ifchange -f -j 10 `for f (feeds/*) print $f/feed.download`
+cmds=$0:h:a/cmd
+parallel --joblog download.log "$cmds/download.sh {}" ::: feeds/*
+. $cmds/print-failed.zsh.rc
+print-joglog-failed < download.log
index 17fbec72ca587ba3c1d6771cc36f7b3ee34cc452..f5af82dccddfb8787287c3d29c1032e6d40cbd0e 100755 (executable)
@@ -1,3 +1,5 @@
 #!/usr/bin/env zsh
-set -e
-redo-ifchange -j 16 `for f (feeds/*) print $f/feed.parse`
+cmds=$0:h:a/cmd
+parallel --joblog parse.log "$cmds/parse.sh {}" ::: feeds/*
+. $cmds/print-failed.zsh.rc
+print-joglog-failed < parse.log