/* tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management Copyright (C) 2021 Sergey Matveev This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package main import ( "flag" "fmt" "io" "log" "os" "github.com/dustin/go-humanize" "go.stargrave.org/tofuproxy/warc" ) func main() { uri := flag.String("uri", "", "URI to extract, if specified") hdr := flag.Bool("hdr", false, "Also extract WARC's header") idx := flag.Bool("idx", false, "Save WARC indexes") flag.Parse() log.SetFlags(log.Lshortfile) for _, p := range flag.Args() { log.Println("adding", p) if err := warc.Add(p); err != nil { log.Fatalln(err) } log.Println("added", p, len(warc.WARCs[p]), "URIs") if *idx { if err := warc.SaveIndexes(); err != nil { log.Fatalln(err) } } } if *uri == "" { for warcPath, uris := range warc.WARCs { for uri, rec := range uris { fmt.Printf( "%s\t%s\t%s\n", warcPath, uri, humanize.IBytes(uint64(rec.TotalSize())), ) } } } else { for _, uris := range warc.WARCs { rec := uris[*uri] if rec == nil { continue } r, err := rec.Reader(!*hdr) if err != nil { log.Fatalln(err) } io.Copy(os.Stdout, r) } } return }