2 tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management
3 Copyright (C) 2021 Sergey Matveev <stargrave@stargrave.org>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, version 3 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
31 "github.com/dustin/go-humanize"
32 "go.stargrave.org/tofuproxy/fifos"
33 "go.stargrave.org/tofuproxy/warc"
36 const WARCEntrypoint = "http://warc/"
41 //go:embed warc-index.tmpl
42 TmplWARCIndexRaw string
43 TmplWARCIndex = template.Must(template.New("warc-index").Parse(TmplWARCIndexRaw))
46 type WARCEntry struct {
52 type ByDepth []*WARCEntry
54 func (a ByDepth) Len() int {
58 func (a ByDepth) Swap(i, j int) {
59 a[i], a[j] = a[j], a[i]
62 func (a ByDepth) Less(i, j int) bool {
68 uriI := strings.TrimSuffix(a[i].URI, "/")
69 uriJ := strings.TrimSuffix(a[j].URI, "/")
70 ci = strings.Count(uriI, "/")
71 cj = strings.Count(uriJ, "/")
75 return len(uriI) < len(uriJ)
81 w http.ResponseWriter,
84 if req.URL.String() == WARCEntrypoint {
85 var entries []*WARCEntry
87 for warcPath, uris := range warc.WARCs {
88 for uri, rec := range uris {
89 entries = append(entries, &WARCEntry{
92 humanize.IBytes(uint64(rec.TotalSize())),
97 sort.Sort(ByDepth(entries))
98 err := TmplWARCIndex.Execute(w, struct{ Entries []*WARCEntry }{entries})
102 log.Printf("WARC: error during %s: %+v\n", req.URL, err)
109 var uris map[string]*warc.Record
110 hostOrig := req.URL.Host
111 if req.URL.Scheme == "https" {
112 req.URL.Host = strings.TrimSuffix(req.URL.Host, ":443")
115 for warcPath, uris = range warc.WARCs {
116 rec = uris[req.URL.String()]
121 warc.WARCsM.RUnlock()
122 req.URL.Host = hostOrig
125 http.NotFound(w, req)
126 fifos.LogNonOK <- fmt.Sprintf("%s %s\tnot in WARC", req.Method, req.URL)
132 wr, err := rec.Reader(true)
134 log.Printf("WARC: error during %s: %+v\n", req.URL, err)
137 hj, ok := w.(http.Hijacker)
139 http.Error(w, "can not hijack", http.StatusInternalServerError)
142 conn, _, err := hj.Hijack()
146 _, err = io.Copy(conn, wr)
148 fifos.LogWARC <- fmt.Sprintf(
151 strings.TrimSuffix(rec.Hdr.Get("Content-Type"), ";msgtype=response"),
153 humanize.IBytes(uint64(rec.TotalSize())),