X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=warc%2Frecord.go;h=38181ab92724bc509f05402a87846624a5efe365;hb=aebff9ec61ff98c73f83f73f081401038699d37f;hp=2dd123d3c972a515fb78a5705b507e7097110178;hpb=0c0a261a6ef4fddfc34a9150005f7964cc69c420;p=tofuproxy.git diff --git a/warc/record.go b/warc/record.go index 2dd123d..38181ab 100644 --- a/warc/record.go +++ b/warc/record.go @@ -1,19 +1,18 @@ -/* -tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management -Copyright (C) 2021 Sergey Matveev - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ +// tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU +// manager, WARC/geminispace browser +// Copyright (C) 2021-2024 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . package warc @@ -25,9 +24,11 @@ import ( type Record struct { WARCPath string Offset int64 + Size int64 + Hdr Header HdrLen int - Size int64 + HdrLines []string Continuations []*Record } @@ -45,12 +46,12 @@ func (rec *Record) TotalSize() int64 { } type SelfRecordReader struct { - r *io.LimitedReader - rsc io.ReadSeekCloser + lr *io.LimitedReader + rrr io.ReadCloser } func (srr *SelfRecordReader) Read(p []byte) (n int, err error) { - n, err = srr.r.Read(p) + n, err = srr.lr.Read(p) if err != nil { srr.Close() } @@ -58,23 +59,23 @@ func (srr *SelfRecordReader) Read(p []byte) (n int, err error) { } func (srr *SelfRecordReader) Close() error { - return srr.rsc.Close() + return srr.rrr.Close() } -func (rec *Record) selfReader(noHdr bool) (*SelfRecordReader, error) { - rsc, err := Open(rec.WARCPath) - if err != nil { - return nil, err - } +func (rec *Record) selfReader(noHdr bool, offsets []Offset) (*SelfRecordReader, error) { offset := rec.Offset if noHdr { offset += int64(rec.HdrLen) } - if _, err = rsc.Seek(offset, io.SeekStart); err != nil { - rsc.Close() + rrr, err := Open(rec.WARCPath, offsets, offset) + if err != nil { return nil, err } - return &SelfRecordReader{r: &io.LimitedReader{R: rsc, N: rec.Size}, rsc: rsc}, nil + size := rec.Size + if !noHdr { + size += int64(rec.HdrLen) + } + return &SelfRecordReader{lr: &io.LimitedReader{R: rrr, N: size}, rrr: rrr}, nil } type RecordReader struct { @@ -82,14 +83,17 @@ type RecordReader struct { srrs []*SelfRecordReader } -func (rec *Record) Reader(noHdr bool) (*RecordReader, error) { +func (rec *Record) Reader( + noHdr bool, + warcOffsets map[string][]Offset, +) (*RecordReader, error) { srrs := make([]*SelfRecordReader, 0, 1+len(rec.Continuations)) rs := make([]io.Reader, 0, 1+len(rec.Continuations)) for i, r := range append([]*Record{rec}, rec.Continuations...) { if i > 0 { noHdr = true } - srr, err := r.selfReader(noHdr) + srr, err := r.selfReader(noHdr, warcOffsets[rec.WARCPath]) if err != nil { for _, srr := range srrs { srr.Close()