2 tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU
3 manager, WARC/geminispace browser
4 Copyright (C) 2021-2023 Sergey Matveev <stargrave@stargrave.org>
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, version 3 of the License.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
34 const IndexExt = ".idx.gob"
37 WARCs = map[string]map[string]*Record{}
38 WARCsOffsets = map[string][]Offset{}
41 Incomplete = map[string]*Record{}
44 func Add(warcPath string) error {
45 fd, err := os.Open(warcPath + IndexExt)
48 var uris map[string]*Record
50 dec := gob.NewDecoder(fd)
51 if err := dec.Decode(&uris); err != nil {
54 if err := dec.Decode(&offsets); err != nil {
58 WARCs[warcPath] = uris
59 WARCsOffsets[warcPath] = offsets
61 log.Println("loaded marshalled index:", warcPath+IndexExt)
64 if err != nil && !errors.Is(err, fs.ErrNotExist) {
67 r, err := NewReader(warcPath)
72 uris := map[string]*Record{}
74 rec, _, err := r.ReadRecord()
82 segNum := rec.Hdr.Get("WARC-Segment-Number")
83 switch rec.Hdr.Get("WARC-Type") {
90 Incomplete[rec.Hdr.Get("WARC-Record-ID")] = rec
95 originID := rec.Hdr.Get("WARC-Segment-Origin-ID")
96 incomplete := Incomplete[originID]
97 if incomplete == nil {
98 return fmt.Errorf("can not find WARC-Segment-Origin-ID: %q", originID)
100 segNumExpected := strconv.Itoa(len(incomplete.Continuations) + 1 + 1)
101 if segNum != segNumExpected {
103 "unexpected WARC-Segment-Number %s != %s",
104 segNum, segNumExpected,
107 incomplete.Continuations = append(incomplete.Continuations, rec)
108 if rec.Hdr.Get("WARC-Segment-Total-Length") != "" {
109 if incomplete.WARCPath == warcPath {
110 uris[incomplete.URI()] = incomplete
113 WARCs[incomplete.WARCPath][incomplete.URI()] = incomplete
116 delete(Incomplete, originID)
122 WARCs[warcPath] = uris
123 WARCsOffsets[warcPath] = r.offsets
128 func SaveIndices() error {
130 defer WARCsM.RUnlock()
131 for warcPath, uris := range WARCs {
132 p := warcPath + IndexExt
133 if _, err := os.Stat(p); err == nil {
136 tmpSuffix := strconv.FormatInt(time.Now().UnixNano()+int64(os.Getpid()), 16)
137 fd, err := os.OpenFile(
139 os.O_WRONLY|os.O_CREATE|os.O_EXCL,
145 enc := gob.NewEncoder(fd)
146 if err = enc.Encode(&uris); err != nil {
150 offsets := WARCsOffsets[warcPath]
151 if err = enc.Encode(&offsets); err != nil {
155 if err = fd.Close(); err != nil {
158 if err = os.Rename(p+tmpSuffix, p); err != nil {
161 log.Println("saved:", p)