2 tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU
3 manager, WARC/geminispace browser
4 Copyright (C) 2021-2023 Sergey Matveev <stargrave@stargrave.org>
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, version 3 of the License.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
40 func NewReader(warcPath string) (*Reader, error) {
41 rrr, err := Open(warcPath, nil, 0)
48 br: bufio.NewReader(rrr),
52 func (r *Reader) next() error {
56 if _, err := r.br.Discard(int(r.prevRec.Size)); err != nil {
59 r.offset += int64(r.prevRec.HdrLen) + r.prevRec.Size
60 for i := 0; i < 2; i++ {
61 line, err := r.br.ReadString('\n')
65 r.offset += int64(len(line))
67 return fmt.Errorf("non-CRLF: %q", line)
73 func (r *Reader) ReadRecord() (*Record, io.Reader, error) {
75 line, err := r.br.ReadString('\n')
79 if !strings.HasPrefix(line, "WARC/") {
80 return nil, nil, fmt.Errorf("non-WARC header: %q", line)
82 hdrLines := []string{line}
86 line, err := r.br.ReadString('\n')
94 hdrLines = append(hdrLines, line)
97 size, err := strconv.ParseUint(hdr.Get("Content-Length"), 10, 64)
110 return rec, &io.LimitedReader{R: r.br, N: int64(size)}, nil
113 func (r *Reader) RecordWasRead() {
118 func (r *Reader) Close() error {
120 r.offsets = r.rrr.Offsets()