2 tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU
3 manager, WARC/geminispace browser
4 Copyright (C) 2021-2023 Sergey Matveev <stargrave@stargrave.org>
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, version 3 of the License.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
32 const IndexExt = ".idx.gob"
35 WARCs = map[string]map[string]*Record{}
36 WARCsOffsets = map[string][]Offset{}
39 Incomplete = map[string]*Record{}
42 func Add(warcPath string) error {
43 fd, err := os.Open(warcPath + IndexExt)
46 var uris map[string]*Record
48 dec := gob.NewDecoder(fd)
49 if err := dec.Decode(&uris); err != nil {
52 if err := dec.Decode(&offsets); err != nil {
56 WARCs[warcPath] = uris
57 WARCsOffsets[warcPath] = offsets
59 log.Println("loaded marshalled index:", warcPath+IndexExt)
62 if err != nil && !os.IsNotExist(err) {
65 r, err := NewReader(warcPath)
70 uris := map[string]*Record{}
72 rec, _, err := r.ReadRecord()
80 segNum := rec.Hdr.Get("WARC-Segment-Number")
81 switch rec.Hdr.Get("WARC-Type") {
88 Incomplete[rec.Hdr.Get("WARC-Record-ID")] = rec
93 originID := rec.Hdr.Get("WARC-Segment-Origin-ID")
94 incomplete := Incomplete[originID]
95 if incomplete == nil {
96 return fmt.Errorf("can not find WARC-Segment-Origin-ID: %q", originID)
98 segNumExpected := strconv.Itoa(len(incomplete.Continuations) + 1 + 1)
99 if segNum != segNumExpected {
101 "unexpected WARC-Segment-Number %s != %s",
102 segNum, segNumExpected,
105 incomplete.Continuations = append(incomplete.Continuations, rec)
106 if rec.Hdr.Get("WARC-Segment-Total-Length") != "" {
107 if incomplete.WARCPath == warcPath {
108 uris[incomplete.URI()] = incomplete
111 WARCs[incomplete.WARCPath][incomplete.URI()] = incomplete
114 delete(Incomplete, originID)
120 WARCs[warcPath] = uris
121 WARCsOffsets[warcPath] = r.offsets
126 func SaveIndices() error {
128 defer WARCsM.RUnlock()
129 for warcPath, uris := range WARCs {
130 p := warcPath + IndexExt
131 if _, err := os.Stat(p); err == nil {
134 tmpSuffix := strconv.FormatInt(time.Now().UnixNano()+int64(os.Getpid()), 16)
135 fd, err := os.OpenFile(
137 os.O_WRONLY|os.O_CREATE|os.O_EXCL,
143 enc := gob.NewEncoder(fd)
144 if err = enc.Encode(&uris); err != nil {
148 offsets := WARCsOffsets[warcPath]
149 if err = enc.Encode(&offsets); err != nil {
153 if err = fd.Close(); err != nil {
156 if err = os.Rename(p+tmpSuffix, p); err != nil {
159 log.Println("saved:", p)