/* tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management Copyright (C) 2021 Sergey Matveev This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package warc import ( "encoding/gob" "fmt" "io" "log" "os" "strconv" "sync" ) const IndexExt = ".idx.gob" var ( WARCs = map[string]map[string]*Record{} WARCsM sync.RWMutex Incomplete = map[string]*Record{} ) func Add(warcPath string) error { fd, err := os.Open(warcPath + IndexExt) if err == nil { defer fd.Close() var uris map[string]*Record if err := gob.NewDecoder(fd).Decode(&uris); err != nil { return err } WARCsM.Lock() WARCs[warcPath] = uris WARCsM.Unlock() return nil } if err != nil && !os.IsNotExist(err) { return err } r, err := NewReader(warcPath) if err != nil { return err } defer r.Close() uris := map[string]*Record{} for { rec, err := r.ReadRecord() if err != nil { if err == io.EOF { break } return err } segNum := rec.Hdr.Get("WARC-Segment-Number") switch rec.Hdr.Get("WARC-Type") { case "response": uri := rec.URI() if uri == "" { continue } if segNum == "1" { Incomplete[rec.Hdr.Get("WARC-Record-ID")] = rec continue } uris[uri] = rec case "continuation": originID := rec.Hdr.Get("WARC-Segment-Origin-ID") incomplete := Incomplete[originID] if incomplete == nil { return fmt.Errorf("can not find WARC-Segment-Origin-ID: %q", originID) } segNumExpected := strconv.Itoa(len(incomplete.Continuations) + 1 + 1) if segNum != segNumExpected { return fmt.Errorf( "unexpected WARC-Segment-Number %s != %s", segNum, segNumExpected, ) } incomplete.Continuations = append(incomplete.Continuations, rec) if rec.Hdr.Get("WARC-Segment-Total-Length") != "" { WARCsM.Lock() WARCs[incomplete.WARCPath][incomplete.URI()] = incomplete WARCsM.Unlock() delete(Incomplete, originID) } } } WARCsM.Lock() WARCs[warcPath] = uris WARCsM.Unlock() return nil } func SaveIndexes() error { WARCsM.RLock() defer WARCsM.RUnlock() for warcPath, uris := range WARCs { p := warcPath + IndexExt if _, err := os.Stat(p); err == nil { continue } fd, err := os.OpenFile( p+".tmp", os.O_CREATE|os.O_WRONLY|os.O_EXCL, os.FileMode(0666), ) if err != nil { return err } if err = gob.NewEncoder(fd).Encode(&uris); err != nil { fd.Close() return err } fd.Close() if err = os.Rename(p+".tmp", p); err != nil { return err } log.Println("saved:", p) } return nil }