/* tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management Copyright (C) 2021 Sergey Matveev This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package warc import ( "bufio" "bytes" "io" "log" "os" "os/exec" "strconv" "strings" "sync" ) type CompressedReader struct { cmd *exec.Cmd fd *os.File stdout io.ReadCloser offsets []Offset offW *os.File offReader sync.WaitGroup } func NewCompressedReader( warcPath, unCmd string, offsets []Offset, uOffset int64, ) (*CompressedReader, error) { var offZ, offU int64 for _, off := range offsets { if uOffset < offU+off.U { break } offU += off.U offZ += off.Z } fd, err := os.Open(warcPath) if err != nil { return nil, err } var dict []byte if len(offsets) > 0 && offsets[0].U == 0 { dict = make([]byte, offsets[0].Z) if _, err = io.ReadFull(fd, dict); err != nil { fd.Close() return nil, err } } if _, err = fd.Seek(offZ, io.SeekStart); err != nil { fd.Close() return nil, err } cmd := exec.Command(unCmd) stdout, err := cmd.StdoutPipe() if err != nil { fd.Close() return nil, err } if dict == nil { cmd.Stdin = fd } else { cmd.Stdin = io.MultiReader(bytes.NewReader(dict), fd) } if offsets == nil { offR, offW, err := os.Pipe() if err != nil { fd.Close() return nil, err } cmd.ExtraFiles = append(cmd.ExtraFiles, offW) err = cmd.Start() if err != nil { fd.Close() offW.Close() return nil, err } r := CompressedReader{ cmd: cmd, fd: fd, stdout: stdout, offW: offW, } r.offReader.Add(1) go r.offsetsReader(offR) return &r, nil } err = cmd.Start() if err != nil { fd.Close() return nil, err } _, err = io.CopyN(io.Discard, stdout, uOffset-offU) if err != nil { cmd.Process.Kill() fd.Close() return nil, err } return &CompressedReader{cmd: cmd, fd: fd, stdout: stdout}, nil } func (r *CompressedReader) offsetsReader(offsets *os.File) { scanner := bufio.NewScanner(offsets) for scanner.Scan() { l := scanner.Text() cols := strings.Split(l, "\t") if len(cols) != 2 { log.Println("len(cols) != 2:", l) continue } z, err := strconv.ParseUint(cols[0], 10, 64) if err != nil { log.Println(err) continue } u, err := strconv.ParseUint(cols[1], 10, 64) if err != nil { log.Println(err) continue } r.offsets = append(r.offsets, Offset{int64(z), int64(u)}) } err := scanner.Err() if err != nil { log.Println(err) } r.offReader.Done() } func (r *CompressedReader) Read(p []byte) (int, error) { return r.stdout.Read(p) } func (r *CompressedReader) Close() error { err := r.cmd.Process.Kill() r.stdout.Close() r.fd.Close() r.offW.Close() r.offReader.Wait() return err } func (r *CompressedReader) Offsets() []Offset { return r.offsets }