--- /dev/null
+/*
+tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management
+Copyright (C) 2021 Sergey Matveev <stargrave@stargrave.org>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, version 3 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "os"
+
+ "github.com/dustin/go-humanize"
+ "go.stargrave.org/tofuproxy/warc"
+)
+
+func main() {
+ uri := flag.String("uri", "", "URI to extract, if specified")
+ hdr := flag.Bool("hdr", false, "Also extract WARC's header")
+ idx := flag.Bool("idx", false, "Save WARC indexes")
+ flag.Parse()
+ log.SetFlags(log.Lshortfile)
+
+ for _, p := range flag.Args() {
+ log.Println("adding", p)
+ if err := warc.Add(p); err != nil {
+ log.Fatalln(err)
+ }
+ log.Println("added", p, len(warc.WARCs[p]), "URIs")
+ if *idx {
+ if err := warc.SaveIndexes(); err != nil {
+ log.Fatalln(err)
+ }
+ }
+ }
+ if *uri == "" {
+ for warcPath, uris := range warc.WARCs {
+ for uri, rec := range uris {
+ fmt.Printf(
+ "%s\t%s\t%s\n",
+ warcPath, uri,
+ humanize.IBytes(uint64(rec.TotalSize())),
+ )
+ }
+ }
+ } else {
+ for _, uris := range warc.WARCs {
+ rec := uris[*uri]
+ if rec == nil {
+ continue
+ }
+ r, err := rec.Reader(!*hdr)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ io.Copy(os.Stdout, r)
+ }
+ }
+ return
+}