#!/usr/bin/env zsh set -e fpath=($0:h:a/functions.zsh $fpath) dst=$2:a cd $1 [[ -n "$dst" ]] || { dst=warcs ; dst=$dst:a } mkdir -p $dst autoload url-to-filename zmodload -F zsh/datetime b:strftime setopt EXTENDED_GLOB wget_opts=( --page-requisites --compression=auto --no-warc-keep-log --no-warc-digests --no-warc-compression ) for new (new/*(N)) { while read line ; do [[ "$line" != "" ]] || break cols=(${(s: :)line}) [[ $cols[1] = "X-URL:" ]] || continue url=$cols[2] [[ -n "$url" ]] [[ -n "$tmp" ]] || { # Lazy temporary file creation tmp=`mktemp` trap "rm -f $tmp" HUP PIPE INT QUIT TERM EXIT wget_opts=(--output-document=$tmp $wget_opts) } fn=$(strftime %Y%m%d-%H%M%S)-$(url-to-filename $url) wget $wget_opts --output-file=warcs.log --warc-file=$dst/$fn $url print $dst/$fn.warc done < $new }