2 tofuproxy -- flexible HTTP/HTTPS proxy, TLS terminator, X.509 TOFU
3 manager, WARC/geminispace browser
4 Copyright (C) 2021-2022 Sergey Matveev <stargrave@stargrave.org>
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, version 3 of the License.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
32 "github.com/dustin/go-humanize"
33 "go.stargrave.org/tofuproxy/fifos"
34 "go.stargrave.org/tofuproxy/warc"
37 const WARCEntrypoint = "http://warc/"
42 //go:embed warc-index.tmpl
43 TmplWARCIndexRaw string
44 TmplWARCIndex = template.Must(template.New("warc-index").Parse(TmplWARCIndexRaw))
47 type WARCEntry struct {
53 type ByDepth []*WARCEntry
55 func (a ByDepth) Len() int {
59 func (a ByDepth) Swap(i, j int) {
60 a[i], a[j] = a[j], a[i]
63 func (a ByDepth) Less(i, j int) bool {
69 uriI := strings.TrimSuffix(a[i].URI, "/")
70 uriJ := strings.TrimSuffix(a[j].URI, "/")
71 ci = strings.Count(uriI, "/")
72 cj = strings.Count(uriJ, "/")
76 return len(uriI) < len(uriJ)
82 w http.ResponseWriter,
85 if req.URL.String() == WARCEntrypoint {
86 var entries []*WARCEntry
88 for warcPath, uris := range warc.WARCs {
89 for uri, rec := range uris {
90 entries = append(entries, &WARCEntry{
93 humanize.IBytes(uint64(rec.TotalSize())),
98 sort.Sort(ByDepth(entries))
99 err := TmplWARCIndex.Execute(w, struct{ Entries []*WARCEntry }{entries})
103 log.Printf("WARC: error during %s: %+v\n", req.URL, err)
110 var uris map[string]*warc.Record
111 hostOrig := req.URL.Host
112 if req.URL.Scheme == "https" {
113 req.URL.Host = strings.TrimSuffix(req.URL.Host, ":443")
116 for warcPath, uris = range warc.WARCs {
117 rec = uris[req.URL.String()]
122 warc.WARCsM.RUnlock()
123 req.URL.Host = hostOrig
126 http.NotFound(w, req)
127 fifos.LogNonOK <- fmt.Sprintf("%s %s\tnot in WARC", req.Method, req.URL)
133 wr, err := rec.Reader(true, warc.WARCsOffsets)
135 log.Printf("WARC: error during %s: %+v\n", req.URL, err)
139 hj, ok := w.(http.Hijacker)
141 http.Error(w, "can not hijack", http.StatusInternalServerError)
144 conn, _, err := hj.Hijack()
148 _, err = io.Copy(conn, wr)
150 fifos.LogWARC <- fmt.Sprintf(
153 strings.TrimSuffix(rec.Hdr.Get("Content-Type"), ";msgtype=response"),
155 humanize.IBytes(uint64(rec.TotalSize())),