#!/usr/bin/env zsh set -e cmds=$0:h:a . $cmds/env.rc fpath=($cmds/functions.zsh $fpath) dst=$2:a cd $1 [[ -n "$dst" ]] || { dst=warcs ; dst=$dst:a } mkdir -p $dst autoload url-to-filename zmodload -F zsh/datetime b:strftime setopt EXTENDED_GLOB wget_opts=( --user-agent="$FEEDER_USER_AGENT" --page-requisites --compression=auto --no-warc-keep-log --no-warc-digests --no-warc-compression ) for new (new/*(N)) { while read line ; do [[ "$line" != "" ]] || break cols=(${(s: :)line}) [[ $cols[1] = "X-URL:" ]] || continue url=$cols[2] [[ -n "$url" ]] [[ -n "$tmp" ]] || { # Lazy temporary file creation tmp=`mktemp` trap "rm -f $tmp" HUP PIPE INT QUIT TERM EXIT wget_opts=(--output-document=$tmp $wget_opts) } fn=$(strftime %Y%m%d-%H%M%S)-$(url-to-filename $url) ${=WGET} $wget_opts --output-file=warcs.log --warc-file=$dst/$fn $url $FEEDER_WARC_COMPRESS $dst/$fn.warc print $dst/$fn.warc* done < $new }