/* tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management Copyright (C) 2021 Sergey Matveev This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package warc import ( "compress/gzip" "fmt" "io" "os" "path" "github.com/klauspost/compress/zstd" ) type Compressed struct { r io.ReadCloser fd *os.File offset int64 } func (c *Compressed) Read(p []byte) (int, error) { n, err := c.r.Read(p) c.offset += int64(n) return n, err } func (c *Compressed) Close() error { c.r.Close() return c.fd.Close() } func (c *Compressed) Seek(offset int64, whence int) (int64, error) { if whence != io.SeekStart { panic("can only seek from the start") } if _, err := io.CopyN(io.Discard, c, offset-c.offset); err != nil { return 0, err } c.offset = offset return c.offset, nil } func Open(warcPath string) (io.ReadSeekCloser, error) { ext := path.Ext(warcPath) switch ext { case ".warc": return os.Open(warcPath) case ".gz": fd, err := os.Open(warcPath) if err != nil { return nil, err } gzr, err := gzip.NewReader(fd) if err != nil { return nil, err } gzr.Multistream(true) return &Compressed{r: gzr, fd: fd}, nil case ".zst": fd, err := os.Open(warcPath) if err != nil { return nil, err } zstdr, err := zstd.NewReader(fd) if err != nil { return nil, err } return &Compressed{r: zstdr.IOReadCloser(), fd: fd}, nil } return nil, fmt.Errorf("unknown extensions: %s", ext) }