#!/usr/bin/env zsh set -e fpath=($0:h:a/functions.zsh $fpath) dst=$2:a cd $1 autoload url-to-filename zmodload -F zsh/datetime b:strftime setopt EXTENDED_GLOB wget_opts=( --page-requisites --compression=auto --no-warc-keep-log --no-warc-digests --no-warc-compression ) for new (new/*(N)) { while read line ; do [[ "$line" != "" ]] || break cols=(${(s: :)line}) [[ $cols[1] = "X-URL:" ]] || continue url=$cols[2] [[ -n "$url" ]] [[ -n "$tmp" ]] || { # Lazy temporary file creation tmp=`mktemp` trap "rm -f $tmp" HUP PIPE INT QUIT TERM EXIT wget_opts=(--output-document=$tmp $wget_opts) } fn=$(url-to-filename $url)-$(strftime %Y%m%d-%H%M%S) wget $wget_opts --output-file=warc.log --warc-file=$dst/$fn $url print $fn done < $new }