-/*
-tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management
-Copyright (C) 2021 Sergey Matveev <stargrave@stargrave.org>
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, version 3 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+// tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU
+// manager, WARC/geminispace browser
+// Copyright (C) 2021-2024 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
package warc
import (
"encoding/gob"
+ "errors"
"fmt"
"io"
+ "io/fs"
"log"
"os"
"strconv"
"sync"
+ "time"
)
const IndexExt = ".idx.gob"
var (
- WARCs = map[string]map[string]*Record{}
- WARCsM sync.RWMutex
+ WARCs = map[string]map[string]*Record{}
+ WARCsOffsets = map[string][]Offset{}
+ WARCsM sync.RWMutex
Incomplete = map[string]*Record{}
)
if err == nil {
defer fd.Close()
var uris map[string]*Record
- if err := gob.NewDecoder(fd).Decode(&uris); err != nil {
+ var offsets []Offset
+ dec := gob.NewDecoder(fd)
+ if err := dec.Decode(&uris); err != nil {
+ return err
+ }
+ if err := dec.Decode(&offsets); err != nil {
return err
}
WARCsM.Lock()
WARCs[warcPath] = uris
+ WARCsOffsets[warcPath] = offsets
WARCsM.Unlock()
+ log.Println("loaded marshalled index:", warcPath+IndexExt)
return nil
}
- if err != nil && !os.IsNotExist(err) {
+ if err != nil && !errors.Is(err, fs.ErrNotExist) {
return err
}
r, err := NewReader(warcPath)
defer r.Close()
uris := map[string]*Record{}
for {
- rec, err := r.ReadRecord()
+ rec, _, err := r.ReadRecord()
if err != nil {
if err == io.EOF {
break
}
return err
}
+ rec.HdrLines = nil
segNum := rec.Hdr.Get("WARC-Segment-Number")
switch rec.Hdr.Get("WARC-Type") {
case "response":
}
incomplete.Continuations = append(incomplete.Continuations, rec)
if rec.Hdr.Get("WARC-Segment-Total-Length") != "" {
- WARCsM.Lock()
- WARCs[incomplete.WARCPath][incomplete.URI()] = incomplete
- WARCsM.Unlock()
+ if incomplete.WARCPath == warcPath {
+ uris[incomplete.URI()] = incomplete
+ } else {
+ WARCsM.Lock()
+ WARCs[incomplete.WARCPath][incomplete.URI()] = incomplete
+ WARCsM.Unlock()
+ }
delete(Incomplete, originID)
}
}
}
+ r.Close()
WARCsM.Lock()
WARCs[warcPath] = uris
+ WARCsOffsets[warcPath] = r.offsets
WARCsM.Unlock()
return nil
}
-func SaveIndexes() error {
+func SaveIndices() error {
WARCsM.RLock()
defer WARCsM.RUnlock()
for warcPath, uris := range WARCs {
if _, err := os.Stat(p); err == nil {
continue
}
+ tmpSuffix := strconv.FormatInt(time.Now().UnixNano()+int64(os.Getpid()), 16)
fd, err := os.OpenFile(
- p+".tmp",
- os.O_CREATE|os.O_WRONLY|os.O_EXCL,
+ p+tmpSuffix,
+ os.O_WRONLY|os.O_CREATE|os.O_EXCL,
os.FileMode(0666),
)
if err != nil {
return err
}
- if err = gob.NewEncoder(fd).Encode(&uris); err != nil {
+ enc := gob.NewEncoder(fd)
+ if err = enc.Encode(&uris); err != nil {
fd.Close()
return err
}
- fd.Close()
- if err = os.Rename(p+".tmp", p); err != nil {
+ offsets := WARCsOffsets[warcPath]
+ if err = enc.Encode(&offsets); err != nil {
+ fd.Close()
+ return err
+ }
+ if err = fd.Close(); err != nil {
+ return err
+ }
+ if err = os.Rename(p+tmpSuffix, p); err != nil {
return err
}
log.Println("saved:", p)