// tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU // manager, WARC/geminispace browser // Copyright (C) 2021-2024 Sergey Matveev // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, version 3 of the License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . package warc import ( "bufio" "compress/gzip" "io" "os" ) type CountableReader struct { *bufio.Reader n int64 } func (r *CountableReader) ReadByte() (b byte, err error) { b, err = r.Reader.ReadByte() if err == nil { r.n++ } return b, err } func (r *CountableReader) Read(p []byte) (n int, err error) { n, err = r.Reader.Read(p) r.n += int64(n) return } type GZIPReader struct { fd *os.File r io.Reader offsets []Offset } func (r *GZIPReader) Read(p []byte) (n int, err error) { return r.r.Read(p) } func (r *GZIPReader) Close() error { return r.fd.Close() } func (r *GZIPReader) Offsets() []Offset { return r.offsets } func NewGZIPReader( warcPath string, offsets []Offset, uOffset int64, ) (*GZIPReader, error) { var offZ, offU int64 for _, off := range offsets { if uOffset < offU+off.U { break } offU += off.U offZ += off.Z } fd, err := os.Open(warcPath) if err != nil { return nil, err } if _, err = fd.Seek(offZ, io.SeekStart); err != nil { fd.Close() return nil, err } cr := &CountableReader{Reader: bufio.NewReader(fd)} z, err := gzip.NewReader(cr) if err != nil { fd.Close() return nil, err } r, w := io.Pipe() gr := GZIPReader{r: r} go func() { z.Multistream(false) var offset, offsetPrev int64 for { written, err := io.Copy(w, z) if err != nil { w.CloseWithError(err) return } offset = cr.n gr.offsets = append(gr.offsets, Offset{offset - offsetPrev, written}) offsetPrev = offset err = z.Reset(cr) if err != nil { if err == io.EOF { break } w.CloseWithError(err) return } z.Multistream(false) } w.CloseWithError(io.EOF) }() _, err = io.CopyN(io.Discard, r, uOffset-offU) if err != nil { fd.Close() return nil, err } return &gr, nil }