1 // tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU
2 // manager, WARC/geminispace browser
3 // Copyright (C) 2021-2024 Sergey Matveev <stargrave@stargrave.org>
5 // This program is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, version 3 of the License.
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
30 "github.com/dustin/go-humanize"
31 "go.stargrave.org/tofuproxy/fifos"
32 "go.stargrave.org/tofuproxy/warc"
35 const WARCEntrypoint = "http://warc/"
40 //go:embed warc-index.tmpl
41 TmplWARCIndexRaw string
42 TmplWARCIndex = template.Must(template.New("warc-index").Parse(TmplWARCIndexRaw))
45 type WARCEntry struct {
51 type ByDepth []*WARCEntry
53 func (a ByDepth) Len() int {
57 func (a ByDepth) Swap(i, j int) {
58 a[i], a[j] = a[j], a[i]
61 func (a ByDepth) Less(i, j int) bool {
67 uriI := strings.TrimSuffix(a[i].URI, "/")
68 uriJ := strings.TrimSuffix(a[j].URI, "/")
69 ci = strings.Count(uriI, "/")
70 cj = strings.Count(uriJ, "/")
74 return len(uriI) < len(uriJ)
80 w http.ResponseWriter,
83 if req.URL.String() == WARCEntrypoint {
84 var entries []*WARCEntry
86 for warcPath, uris := range warc.WARCs {
87 for uri, rec := range uris {
88 entries = append(entries, &WARCEntry{
91 humanize.IBytes(uint64(rec.TotalSize())),
96 sort.Sort(ByDepth(entries))
97 err := TmplWARCIndex.Execute(w, struct{ Entries []*WARCEntry }{entries})
101 log.Printf("WARC: error during %s: %+v\n", req.URL, err)
108 var uris map[string]*warc.Record
109 hostOrig := req.URL.Host
110 if req.URL.Scheme == "https" {
111 req.URL.Host = strings.TrimSuffix(req.URL.Host, ":443")
114 for warcPath, uris = range warc.WARCs {
115 rec = uris[req.URL.String()]
120 warc.WARCsM.RUnlock()
121 req.URL.Host = hostOrig
124 http.NotFound(w, req)
125 fifos.LogNonOK <- fmt.Sprintf(
126 "%s %s\tnot in WARC", req.Method, req.URL,
133 wr, err := rec.Reader(true, warc.WARCsOffsets)
135 log.Printf("WARC: error during %s: %+v\n", req.URL, err)
139 hj, ok := w.(http.Hijacker)
141 http.Error(w, "can not hijack", http.StatusInternalServerError)
144 conn, _, err := hj.Hijack()
148 _, err = io.Copy(conn, wr)
150 fifos.LogWARC <- fmt.Sprintf(
153 strings.TrimSuffix(rec.Hdr.Get("Content-Type"), ";msgtype=response"),
155 humanize.IBytes(uint64(rec.TotalSize())),