2 tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management
3 Copyright (C) 2021 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
31 const IndexExt = ".idx.gob"
34 WARCs = map[string]map[string]*Record{}
35 WARCsOffsets = map[string][]Offset{}
38 Incomplete = map[string]*Record{}
41 func Add(warcPath string) error {
42 fd, err := os.Open(warcPath + IndexExt)
45 var uris map[string]*Record
47 dec := gob.NewDecoder(fd)
48 if err := dec.Decode(&uris); err != nil {
51 if err := dec.Decode(&offsets); err != nil {
55 WARCs[warcPath] = uris
56 WARCsOffsets[warcPath] = offsets
58 log.Println("loaded marshalled index:", warcPath+IndexExt)
61 if err != nil && !os.IsNotExist(err) {
64 r, err := NewReader(warcPath)
69 uris := map[string]*Record{}
71 rec, _, err := r.ReadRecord()
79 segNum := rec.Hdr.Get("WARC-Segment-Number")
80 switch rec.Hdr.Get("WARC-Type") {
87 Incomplete[rec.Hdr.Get("WARC-Record-ID")] = rec
92 originID := rec.Hdr.Get("WARC-Segment-Origin-ID")
93 incomplete := Incomplete[originID]
94 if incomplete == nil {
95 return fmt.Errorf("can not find WARC-Segment-Origin-ID: %q", originID)
97 segNumExpected := strconv.Itoa(len(incomplete.Continuations) + 1 + 1)
98 if segNum != segNumExpected {
100 "unexpected WARC-Segment-Number %s != %s",
101 segNum, segNumExpected,
104 incomplete.Continuations = append(incomplete.Continuations, rec)
105 if rec.Hdr.Get("WARC-Segment-Total-Length") != "" {
106 if incomplete.WARCPath == warcPath {
107 uris[incomplete.URI()] = incomplete
110 WARCs[incomplete.WARCPath][incomplete.URI()] = incomplete
113 delete(Incomplete, originID)
119 WARCs[warcPath] = uris
120 WARCsOffsets[warcPath] = r.offsets
125 func SaveIndexes() error {
127 defer WARCsM.RUnlock()
128 for warcPath, uris := range WARCs {
129 p := warcPath + IndexExt
130 if _, err := os.Stat(p); err == nil {
133 tmpSuffix := strconv.FormatInt(time.Now().UnixNano()+int64(os.Getpid()), 16)
134 fd, err := os.OpenFile(
136 os.O_WRONLY|os.O_CREATE|os.O_EXCL,
142 enc := gob.NewEncoder(fd)
143 if err = enc.Encode(&uris); err != nil {
147 offsets := WARCsOffsets[warcPath]
148 if err = enc.Encode(&offsets); err != nil {
152 if err = fd.Close(); err != nil {
155 if err = os.Rename(p+tmpSuffix, p); err != nil {
158 log.Println("saved:", p)