/* tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU manager, WARC/geminispace browser Copyright (C) 2021 Sergey Matveev This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package rounds import ( _ "embed" "fmt" "html/template" "io" "log" "net/http" "path" "sort" "strings" "github.com/dustin/go-humanize" "go.stargrave.org/tofuproxy/fifos" "go.stargrave.org/tofuproxy/warc" ) const WARCEntrypoint = "http://warc/" var ( WARCOnly bool //go:embed warc-index.tmpl TmplWARCIndexRaw string TmplWARCIndex = template.Must(template.New("warc-index").Parse(TmplWARCIndexRaw)) ) type WARCEntry struct { WARC string URI string Size string } type ByDepth []*WARCEntry func (a ByDepth) Len() int { return len(a) } func (a ByDepth) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a ByDepth) Less(i, j int) bool { ci := len(a[i].WARC) cj := len(a[j].WARC) if ci != cj { return ci < cj } uriI := strings.TrimSuffix(a[i].URI, "/") uriJ := strings.TrimSuffix(a[j].URI, "/") ci = strings.Count(uriI, "/") cj = strings.Count(uriJ, "/") if ci != cj { return ci < cj } return len(uriI) < len(uriJ) } func RoundWARC( host string, resp *http.Response, w http.ResponseWriter, req *http.Request, ) (bool, error) { if req.URL.String() == WARCEntrypoint { var entries []*WARCEntry warc.WARCsM.RLock() for warcPath, uris := range warc.WARCs { for uri, rec := range uris { entries = append(entries, &WARCEntry{ path.Base(warcPath), uri, humanize.IBytes(uint64(rec.TotalSize())), }) } } warc.WARCsM.RUnlock() sort.Sort(ByDepth(entries)) err := TmplWARCIndex.Execute(w, struct{ Entries []*WARCEntry }{entries}) if err == nil { return false, nil } else { log.Printf("WARC: error during %s: %+v\n", req.URL, err) return false, err } } var rec *warc.Record var warcPath string var uris map[string]*warc.Record hostOrig := req.URL.Host if req.URL.Scheme == "https" { req.URL.Host = strings.TrimSuffix(req.URL.Host, ":443") } warc.WARCsM.RLock() for warcPath, uris = range warc.WARCs { rec = uris[req.URL.String()] if rec != nil { break } } warc.WARCsM.RUnlock() req.URL.Host = hostOrig if rec == nil { if WARCOnly { http.NotFound(w, req) fifos.LogNonOK <- fmt.Sprintf("%s %s\tnot in WARC", req.Method, req.URL) return false, nil } return true, nil } wr, err := rec.Reader(true, warc.WARCsOffsets) if err != nil { log.Printf("WARC: error during %s: %+v\n", req.URL, err) return false, err } defer wr.Close() hj, ok := w.(http.Hijacker) if !ok { http.Error(w, "can not hijack", http.StatusInternalServerError) return false, err } conn, _, err := hj.Hijack() if err != nil { panic(err) } _, err = io.Copy(conn, wr) conn.Close() fifos.LogWARC <- fmt.Sprintf( "%s %s\t%s\t%s\t%s", req.Method, req.URL, strings.TrimSuffix(rec.Hdr.Get("Content-Type"), ";msgtype=response"), warcPath, humanize.IBytes(uint64(rec.TotalSize())), ) return false, err }