X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=warc%2Furis.go;h=2c40dcfbb7f60cde38240edd8309c07749fbd3c1;hb=HEAD;hp=a971ff0edce5f35660c9ec6c4c22dbc4a1c2fed3;hpb=0c0a261a6ef4fddfc34a9150005f7964cc69c420;p=tofuproxy.git diff --git a/warc/uris.go b/warc/uris.go index a971ff0..56cd415 100644 --- a/warc/uris.go +++ b/warc/uris.go @@ -1,37 +1,40 @@ -/* -tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management -Copyright (C) 2021 Sergey Matveev - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ +// tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU +// manager, WARC/geminispace browser +// Copyright (C) 2021-2024 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . package warc import ( "encoding/gob" + "errors" "fmt" "io" + "io/fs" "log" "os" "strconv" "sync" + "time" ) const IndexExt = ".idx.gob" var ( - WARCs = map[string]map[string]*Record{} - WARCsM sync.RWMutex + WARCs = map[string]map[string]*Record{} + WARCsOffsets = map[string][]Offset{} + WARCsM sync.RWMutex Incomplete = map[string]*Record{} ) @@ -41,15 +44,22 @@ func Add(warcPath string) error { if err == nil { defer fd.Close() var uris map[string]*Record - if err := gob.NewDecoder(fd).Decode(&uris); err != nil { + var offsets []Offset + dec := gob.NewDecoder(fd) + if err := dec.Decode(&uris); err != nil { + return err + } + if err := dec.Decode(&offsets); err != nil { return err } WARCsM.Lock() WARCs[warcPath] = uris + WARCsOffsets[warcPath] = offsets WARCsM.Unlock() + log.Println("loaded marshalled index:", warcPath+IndexExt) return nil } - if err != nil && !os.IsNotExist(err) { + if err != nil && !errors.Is(err, fs.ErrNotExist) { return err } r, err := NewReader(warcPath) @@ -59,13 +69,14 @@ func Add(warcPath string) error { defer r.Close() uris := map[string]*Record{} for { - rec, err := r.ReadRecord() + rec, _, err := r.ReadRecord() if err != nil { if err == io.EOF { break } return err } + rec.HdrLines = nil segNum := rec.Hdr.Get("WARC-Segment-Number") switch rec.Hdr.Get("WARC-Type") { case "response": @@ -93,20 +104,26 @@ func Add(warcPath string) error { } incomplete.Continuations = append(incomplete.Continuations, rec) if rec.Hdr.Get("WARC-Segment-Total-Length") != "" { - WARCsM.Lock() - WARCs[incomplete.WARCPath][incomplete.URI()] = incomplete - WARCsM.Unlock() + if incomplete.WARCPath == warcPath { + uris[incomplete.URI()] = incomplete + } else { + WARCsM.Lock() + WARCs[incomplete.WARCPath][incomplete.URI()] = incomplete + WARCsM.Unlock() + } delete(Incomplete, originID) } } } + r.Close() WARCsM.Lock() WARCs[warcPath] = uris + WARCsOffsets[warcPath] = r.offsets WARCsM.Unlock() return nil } -func SaveIndexes() error { +func SaveIndices() error { WARCsM.RLock() defer WARCsM.RUnlock() for warcPath, uris := range WARCs { @@ -114,20 +131,29 @@ func SaveIndexes() error { if _, err := os.Stat(p); err == nil { continue } + tmpSuffix := strconv.FormatInt(time.Now().UnixNano()+int64(os.Getpid()), 16) fd, err := os.OpenFile( - p+".tmp", - os.O_CREATE|os.O_WRONLY|os.O_EXCL, + p+tmpSuffix, + os.O_WRONLY|os.O_CREATE|os.O_EXCL, os.FileMode(0666), ) if err != nil { return err } - if err = gob.NewEncoder(fd).Encode(&uris); err != nil { + enc := gob.NewEncoder(fd) + if err = enc.Encode(&uris); err != nil { fd.Close() return err } - fd.Close() - if err = os.Rename(p+".tmp", p); err != nil { + offsets := WARCsOffsets[warcPath] + if err = enc.Encode(&offsets); err != nil { + fd.Close() + return err + } + if err = fd.Close(); err != nil { + return err + } + if err = os.Rename(p+tmpSuffix, p); err != nil { return err } log.Println("saved:", p)