--- /dev/null
+#!/usr/bin/env zsh
+set -e
+cmds=$0:h:a
+. $cmds/env.rc
+setopt EXTENDED_GLOB
+[[ -s $1/max ]] && max=`cat $1/max` || max=$FEEDER_MAX_ITEMS
+(( max++ ))
+[[ $max -eq 1 ]] || rm -fv $1/cur/*(Nom[$max,-1])
--- /dev/null
+#!/usr/bin/env zsh
+set -e
+cmds=$0:h:a
+. $cmds/env.rc
+log=$1.log
+${=PARALLEL} --jobs ${(P)2} --joblog $log $3 ::: feeds/*
+fpath=($cmds/functions.zsh $fpath)
+autoload print-joblog-failed
+print-joblog-failed < $log
#!/bin/sh -e
-PROXY="--proxy http://localhost:8080/"
+cmds="$(dirname "$(realpath -- "$0")")"
+. "$cmds/env.rc"
cd "$1"
read url < url
[ -s etag ] && etag_compare="--etag-compare etag" || :
[ -r out ] && time_cond="--time-cond out" || :
[ -z "$FEEDER_CURL_VERBOSE" ] && silent="--silent" || silent="--verbose"
-curl --fail \
- --user-agent "go.stargrave.org-feeder/0.1.0" \
+${CURL:-curl} --fail \
+ --user-agent "$FEEDER_USER_AGENT" \
--compressed \
--location --max-redirs 2 \
--dump-header hdr \
--output out \
--remote-time \
--etag-save etag \
- $PROXY \
$etag_compare \
$time_cond \
$silent \
"$url" >&2
if [ -s out ] ; then
- zstdmt -19 < out > feed.zst
+ $ZSTD < out > feed.zst
touch -r out feed.zst
truncate -s 0 out
touch -r feed.zst out
fi
-sha512 < feed.zst > download.hash
+$SHA512 < feed.zst > download.hash
#!/usr/bin/env zsh
set -e
-fpath=($0:h:a/functions.zsh $fpath)
+cmds=$0:h:a
+. $cmds/env.rc
+fpath=($cmds/functions.zsh $fpath)
dst=$2:a
cd $1
[[ -n "$dst" ]] || { dst=encs ; dst=$dst:a }
url=$cols[2]
[[ -n "$url" ]]
fn=$(strftime %Y%m%d-%H%M%S)-$(url-to-filename $url)
- wget --timestamping --output-document=$dst/$fn $url 2>&2 2>enc.log
+ ${=WGET} --user-agent=$FEEDER_USER_AGENT \
+ --output-document=$dst/$fn $url 2>&2 2>encs.log
print $dst/$fn
done < $new
}
--- /dev/null
+CURL="${CURL:-curl}"
+ZSTD="${ZSTD:-zstdmt -19}"
+WGET="${WGET:-wget}"
+PARALLEL="${PARALLEL:-parallel --bar}"
+
+FEEDER_USER_AGENT="${FEEDER_USER_AGENT:-go.stargrave.org-feeder/0.1.0}"
+#FEEDER_CURL_VERBOSE=1
+FEEDER_MAX_ITEMS=${FEEDER_MAX_ITEMS:-100}
+FEEDER_DOWNLOAD_JOBS=${FEEDER_DOWNLOAD_JOBS:-10}
+FEEDER_PARSE_JOBS=${FEEDER_PARSE_JOBS:-0}
+
+command -v sha512 >/dev/null && SHA512="sha512" || SHA512="sha512sum --binary"
+
+#MAILCAPS="${MAILCAPS:-$cmds/../contrib/mailcap}"
unignore X-Author X-URL X-Enclosure X-Categories
+alternative_order text/plain text/html
+auto_view text/html
+
set folder = \`pwd\`
unmailboxes *
mailboxes search
#!/bin/sh -e
cmds="$(dirname "$(realpath -- "$0")")"
+. "$cmds/env.rc"
cd "$1"
-[ -s parse.hash ] && hash_our=`cat parse.hash` || :
-[ -s download.hash ] && hash_their=`cat download.hash` || :
+[ -s parse.hash ] && hash_our="`cat parse.hash`" || :
+[ -s download.hash ] && hash_their="`cat download.hash`" || :
[ "$hash_our" != "$hash_their" ] || exit 0
-[ -s max ] && max=`cat max` || max=${FEEDER_MAX_ITEMS:-100}
-zstd -d < feed.zst | $cmds/feed2mdir/feed2mdir -max-entries $max . > title.tmp
+[ -s max ] && max=`cat max` || max=$FEEDER_MAX_ITEMS
+$ZSTD -d < feed.zst | $cmds/feed2mdir/feed2mdir -max-entries $max . > title.tmp
mv title.tmp title
-echo $hash_their > parse.hash
+echo "$hash_their" > parse.hash
#!/usr/bin/env zsh
set -e
-fpath=($0:h:a/functions.zsh $fpath)
+cmds=$0:h:a
+. $cmds/env.rc
+fpath=($cmds/functions.zsh $fpath)
dst=$2:a
cd $1
[[ -n "$dst" ]] || { dst=warcs ; dst=$dst:a }
zmodload -F zsh/datetime b:strftime
setopt EXTENDED_GLOB
wget_opts=(
+ --user-agent="$FEEDER_USER_AGENT"
--page-requisites
--compression=auto
--no-warc-keep-log
wget_opts=(--output-document=$tmp $wget_opts)
}
fn=$(strftime %Y%m%d-%H%M%S)-$(url-to-filename $url)
- wget $wget_opts --output-file=warcs.log --warc-file=$dst/$fn $url
+ ${=WGET} $wget_opts --output-file=warcs.log --warc-file=$dst/$fn $url
print $dst/$fn.warc
done < $new
}
--- /dev/null
+# text/html; w3m -T text/html -I %{charset} -dump %s; copiousoutput; nametemplate=%s.html
+text/html; lynx -assume_charset=%{charset} -dump %s; copiousoutput; nametemplate=%s.html
@command{urls2feeds.zsh} won't touch already existing directories and will
warn if some of them disappeared from @file{urls}.
+@item Check configuration options
+
+@file{cmd/env.rc} contains list of various options you can override by
+environment variables, like @command{curl}, @command{wget},
+@command{zstd}, @command{parallel} command invocations,
+@code{User-Agent}, number of download/parse jobs run in parallel and so on.
+
@item Download your feed(s) data
@example
$ ./feeds-parse.zsh # to parse all feeds in parallel
@end example
+@item Download-n-parse
+
+You can also download and parse the feeds immediately:
+
+@example
+$ ./feeds-dnp.zsh
+@end example
+
@item Quick overview of the news:
@example
@item Run Mutt
@example
-$ ./feeds-browse.zsh
+$ ./feeds-browse.sh
@end example
That will read all feeds titles and create @file{mutt.rc} sourceable
configuration file with predefined helpers and @code{mailboxes}
-commands. Mutt will be started in mailboxes browser mode (I will skip
-many entries):
+commands.
+
+That configuration contains @code{auto_view text/html}, that expects
+proper @file{mailcap} configuration file with @code{text/html} entry to
+exists. Mutt has some built-in default search paths for, but you can
+override them with @env{$MAILCAPS} environment variable. There is
+example @file{contrib/mailcap}.
+
+Mutt will be started in mailboxes browser mode (I will skip many entries):
@verbatim
1 N [ 1|101] 2021-02-17 20:41 Cryptology ePrint Archive/
@example
$ ./feeds-clear.zsh
+$ cmd/clear.zsh feeds/FEED # to clear single feed
@end example
will clear everything exceeding the quantity limit. You can set that
Of course you can also download only single feed's enclosures:
@example
-$ cmd/download-encs.sh path/to/FEED [optional overriden destination directory]
+$ cmd/encs.zsh path/to/FEED [optional overriden destination directory]
@end example
@end table
Of course you can also download only single feed's enclosures:
@example
-$ cmd/download-warcs.sh path/to/FEED [optional overriden destination directory]
+$ cmd/warcs.zsh path/to/FEED [optional overriden destination directory]
@end example
--- /dev/null
+#!/bin/sh -e
+cmds="$(dirname "$(realpath -- "$0")")"/cmd
+muttrc_their="$($cmds/muttrc-gen.sh)"
+[ -r mutt.rc ] && muttrc_our="$(cat mutt.rc)" || :
+[ "$muttrc_our" = "$muttrc_their" ] || cat > mutt.rc <<EOF
+$muttrc_their
+EOF
+mutt -e "source mutt.rc" -y
+++ /dev/null
-#!/usr/bin/env zsh
-set -e
-muttrc_their=`$0:h/cmd/muttrc-gen.sh`
-[[ -r mutt.rc ]] && muttrc_our=`cat mutt.rc` || :
-[[ "$muttrc_our" = "$muttrc_their" ]] || cat > mutt.rc <<EOF
-$muttrc_their
-EOF
-mutt -e "source mutt.rc" -y
#!/usr/bin/env zsh
set -e
-setopt EXTENDED_GLOB
-for f (feeds/*) {
- [[ -s $f/max ]] && max=`cat $f/max` || max=${FEEDER_MAX_ITEMS:-100}
- (( max++ ))
- [[ $max -eq 1 ]] || rm -fv $f/cur/*(Nom[$max,-1])
-}
+cmds=$0:h:a/cmd
+for f (feeds/*) $cmds/clear.zsh $f
--- /dev/null
+#!/usr/bin/env zsh
+cmds=$0:h:a/cmd
+exec $cmds/do-in-parallel.zsh dnp FEEDER_DOWNLOAD_JOBS "$cmds/download.sh {} ; $cmds/parse.sh {}"
#!/usr/bin/env zsh
cmds=$0:h:a/cmd
-fpath=($cmds/functions.zsh $fpath)
-autoload print-joblog-failed
-parallel --joblog download.log "$cmds/download.sh {}" ::: feeds/*
-print-joblog-failed < download.log
+exec $cmds/do-in-parallel.zsh download FEEDER_DOWNLOAD_JOBS "$cmds/download.sh {}"
#!/usr/bin/env zsh
-dst=$1
cmds=$0:h:a/cmd
-fpath=($cmds/functions.zsh $fpath)
-autoload print-joblog-failed
-parallel --jobs 1 --joblog encs.log "$cmds/download-encs.zsh {} $dst" ::: feeds/*
-print-joblog-failed < encs.log
+JOBS=1 exec $cmds/do-in-parallel.zsh encs JOBS "$cmds/encs.zsh {} $1"
#!/usr/bin/env zsh
cmds=$0:h:a/cmd
-fpath=($cmds/functions.zsh $fpath)
-autoload print-joblog-failed
-parallel --joblog parse.log "$cmds/parse.sh {}" ::: feeds/*
-print-joblog-failed < parse.log
+exec $cmds/do-in-parallel.zsh parse FEEDER_PARSE_JOBS "$cmds/parse.sh {}"
#!/usr/bin/env zsh
-dst=$1
cmds=$0:h:a/cmd
-fpath=($cmds/functions.zsh $fpath)
-autoload print-joblog-failed
-parallel --joblog warcs.log "$cmds/download-warcs.zsh {} $dst" ::: feeds/*
-print-joblog-failed < warcs.log
+exec $cmds/do-in-parallel.zsh warcs FEEDER_DOWNLOAD_JOBS "$cmds/warcs.zsh {} $1"