2 tofuproxy -- flexible HTTP proxy, TLS terminator, X.509 certificates
3 manager, WARC/Gemini browser
4 Copyright (C) 2021 Sergey Matveev <stargrave@stargrave.org>
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, version 3 of the License.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
31 "github.com/dustin/go-humanize"
32 "go.stargrave.org/tofuproxy/warc"
36 uri := flag.String("uri", "", "URI to extract, if specified")
37 hdr := flag.Bool("hdr", false, "Also extract WARC's header")
38 idx := flag.Bool("idx", false, "Save WARC indexes")
39 recompress := flag.Bool("for-enzstd", false, "Output for enzstd utility")
41 log.SetFlags(log.Lshortfile)
45 size := make([]byte, 8)
46 bw := bufio.NewWriter(os.Stdout)
47 for _, p := range flag.Args() {
48 r, err := warc.NewReader(p)
53 rec, rr, err := r.ReadRecord()
60 for _, line := range rec.HdrLines {
63 hdr.WriteString("\r\n")
64 binary.BigEndian.PutUint64(size, uint64(hdr.Len())+uint64(rec.Size)+4)
65 if _, err = bw.Write(size); err != nil {
68 if _, err = io.Copy(bw, &hdr); err != nil {
71 if _, err = io.Copy(bw, rr); err != nil {
75 if _, err = bw.Write([]byte("\r\n\r\n")); err != nil {
80 if err := bw.Flush(); err != nil {
86 for _, p := range flag.Args() {
87 log.Println("adding", p)
88 if err := warc.Add(p); err != nil {
91 log.Println("added", p, len(warc.WARCs[p]), "URIs")
93 if err := warc.SaveIndexes(); err != nil {
99 for warcPath, uris := range warc.WARCs {
100 for uri, rec := range uris {
104 humanize.IBytes(uint64(rec.TotalSize())),
109 for _, uris := range warc.WARCs {
114 r, err := rec.Reader(!*hdr, warc.WARCsOffsets)
118 io.Copy(os.Stdout, r)