-/*
-tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management
-Copyright (C) 2021 Sergey Matveev <stargrave@stargrave.org>
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, version 3 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+// tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU
+// manager, WARC/geminispace browser
+// Copyright (C) 2021-2024 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
package warc
type Reader struct {
Path string
- r *bufio.Reader
- rsc io.ReadSeekCloser
+ rrr RawRecordReader
+ br *bufio.Reader
offset int64
prevRec *Record
+ offsets []Offset
}
func NewReader(warcPath string) (*Reader, error) {
- rsc, err := Open(warcPath)
+ rrr, err := Open(warcPath, nil, 0)
if err != nil {
return nil, err
}
return &Reader{
Path: warcPath,
- rsc: rsc,
- r: bufio.NewReader(rsc),
+ rrr: rrr,
+ br: bufio.NewReader(rrr),
}, nil
}
if r.prevRec == nil {
return nil
}
- if _, err := r.r.Discard(int(r.prevRec.Size)); err != nil {
+ if _, err := r.br.Discard(int(r.prevRec.Size)); err != nil {
return err
}
r.offset += int64(r.prevRec.HdrLen) + r.prevRec.Size
for i := 0; i < 2; i++ {
- line, err := r.r.ReadString('\n')
+ line, err := r.br.ReadString('\n')
if err != nil {
return err
}
return nil
}
-func (r *Reader) ReadRecord() (*Record, error) {
+func (r *Reader) ReadRecord() (*Record, io.Reader, error) {
r.next()
- line, err := r.r.ReadString('\n')
+ line, err := r.br.ReadString('\n')
if err != nil {
- return nil, err
+ return nil, nil, err
}
if !strings.HasPrefix(line, "WARC/") {
- return nil, fmt.Errorf("non-WARC header: %q", line)
+ return nil, nil, fmt.Errorf("non-WARC header: %q", line)
}
+ hdrLines := []string{line}
hdrLen := len(line)
hdr := NewHeader()
for {
- line, err := r.r.ReadString('\n')
+ line, err := r.br.ReadString('\n')
if err != nil {
- return nil, err
+ return nil, nil, err
}
hdrLen += len(line)
if line == CRLF {
break
}
+ hdrLines = append(hdrLines, line)
hdr.AddLine(line)
}
size, err := strconv.ParseUint(hdr.Get("Content-Length"), 10, 64)
if err != nil {
- return nil, err
+ return nil, nil, err
}
rec := &Record{
WARCPath: r.Path,
Offset: r.offset,
+ Size: int64(size),
Hdr: hdr,
HdrLen: hdrLen,
- Size: int64(size),
+ HdrLines: hdrLines,
}
r.prevRec = rec
- return rec, nil
+ return rec, &io.LimitedReader{R: r.br, N: int64(size)}, nil
+}
+
+func (r *Reader) RecordWasRead() {
+ r.prevRec.HdrLen = 0
+ r.prevRec.Size = 0
}
func (r *Reader) Close() error {
- return r.rsc.Close()
+ err := r.rrr.Close()
+ r.offsets = r.rrr.Offsets()
+ return err
}