+/*
+tofuproxy -- flexible HTTP/WARC proxy with TLS certificates management
+Copyright (C) 2021 Sergey Matveev <stargrave@stargrave.org>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, version 3 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package rounds
+
+import (
+ _ "embed"
+ "fmt"
+ "html/template"
+ "io"
+ "log"
+ "net/http"
+ "path"
+ "sort"
+ "strings"
+
+ "github.com/dustin/go-humanize"
+ "go.stargrave.org/tofuproxy/fifos"
+ "go.stargrave.org/tofuproxy/warc"
+)
+
+const WARCEntrypoint = "http://warc/"
+
+var (
+ WARCOnly bool
+
+ //go:embed warc-index.tmpl
+ TmplWARCIndexRaw string
+ TmplWARCIndex = template.Must(template.New("warc-index").Parse(TmplWARCIndexRaw))
+)
+
+type WARCEntry struct {
+ WARC string
+ URI string
+ Size string
+}
+
+type ByDepth []*WARCEntry
+
+func (a ByDepth) Len() int {
+ return len(a)
+}
+
+func (a ByDepth) Swap(i, j int) {
+ a[i], a[j] = a[j], a[i]
+}
+
+func (a ByDepth) Less(i, j int) bool {
+ ci := len(a[i].WARC)
+ cj := len(a[j].WARC)
+ if ci != cj {
+ return ci < cj
+ }
+ uriI := strings.TrimSuffix(a[i].URI, "/")
+ uriJ := strings.TrimSuffix(a[j].URI, "/")
+ ci = strings.Count(uriI, "/")
+ cj = strings.Count(uriJ, "/")
+ if ci != cj {
+ return ci < cj
+ }
+ return len(uriI) < len(uriJ)
+}
+
+func RoundWARC(
+ host string,
+ resp *http.Response,
+ w http.ResponseWriter,
+ req *http.Request,
+) (bool, error) {
+ if req.URL.String() == WARCEntrypoint {
+ var entries []*WARCEntry
+ warc.WARCsM.RLock()
+ for warcPath, uris := range warc.WARCs {
+ for uri, rec := range uris {
+ entries = append(entries, &WARCEntry{
+ path.Base(warcPath),
+ uri,
+ humanize.IBytes(uint64(rec.TotalSize())),
+ })
+ }
+ }
+ warc.WARCsM.RUnlock()
+ sort.Sort(ByDepth(entries))
+ err := TmplWARCIndex.Execute(w, struct{ Entries []*WARCEntry }{entries})
+ if err == nil {
+ return false, nil
+ } else {
+ log.Printf("WARC: error during %s: %+v\n", req.URL, err)
+ return false, err
+ }
+ }
+
+ var rec *warc.Record
+ var warcPath string
+ var uris map[string]*warc.Record
+ hostOrig := req.URL.Host
+ if req.URL.Scheme == "https" {
+ req.URL.Host = strings.TrimSuffix(req.URL.Host, ":443")
+ }
+ warc.WARCsM.RLock()
+ for warcPath, uris = range warc.WARCs {
+ rec = uris[req.URL.String()]
+ if rec != nil {
+ break
+ }
+ }
+ warc.WARCsM.RUnlock()
+ req.URL.Host = hostOrig
+ if rec == nil {
+ if WARCOnly {
+ http.NotFound(w, req)
+ fifos.LogNonOK <- fmt.Sprintf("%s %s\tnot in WARC", req.Method, req.URL)
+ return false, nil
+ }
+ return true, nil
+ }
+
+ wr, err := rec.Reader(true)
+ if err != nil {
+ log.Printf("WARC: error during %s: %+v\n", req.URL, err)
+ return false, err
+ }
+ hj, ok := w.(http.Hijacker)
+ if !ok {
+ http.Error(w, "can not hijack", http.StatusInternalServerError)
+ return false, err
+ }
+ conn, _, err := hj.Hijack()
+ if err != nil {
+ panic(err)
+ }
+ _, err = io.Copy(conn, wr)
+ conn.Close()
+ fifos.LogWARC <- fmt.Sprintf(
+ "%s %s\t%s\t%s\t%s",
+ req.Method, req.URL,
+ strings.TrimSuffix(rec.Hdr.Get("Content-Type"), ";msgtype=response"),
+ warcPath,
+ humanize.IBytes(uint64(rec.TotalSize())),
+ )
+ return false, err
+}