Support custom path escaping for WebSeeds

author Matt Joiner <anacrolix@gmail.com>

Tue, 26 Apr 2022 00:57:40 +0000 (10:57 +1000)

committer Matt Joiner <anacrolix@gmail.com>

Tue, 26 Apr 2022 00:57:40 +0000 (10:57 +1000)
author Matt Joiner <anacrolix@gmail.com>
Tue, 26 Apr 2022 00:57:40 +0000 (10:57 +1000)
committer Matt Joiner <anacrolix@gmail.com>
Tue, 26 Apr 2022 00:57:40 +0000 (10:57 +1000)
diff --git a/spec.go b/spec.go

index 332ea139670f9d9f3a2564cde4205794a6f6d157..8cce3cb326f1ff3594b921338804f381d5a7e7cd 100644 (file)
--- a/spec.go
+++ b/spec.go
@@ -19,9 +19,11 @@ type TorrentSpec struct {
         InfoBytes []byte
         // The name to use if the Name field from the Info isn't available.
         DisplayName string
-       Webseeds    []string
-       DhtNodes    []string
-       PeerAddrs   []string
+       // WebSeed URLs. For additional options add the URLs separately with Torrent.AddWebSeeds
+       // instead.
+       Webseeds  []string
+       DhtNodes  []string
+       PeerAddrs []string
         // The combination of the "xs" and "as" fields in magnet links, for now.
         Sources []string
  
diff --git a/torrent.go b/torrent.go

index 79c676ca5d1874515e2fb33517c95910cd117f11..ae3d1c05a6d578a755e84a4a580b80a77ccc69c2 100644 (file)
--- a/torrent.go
+++ b/torrent.go
@@ -2350,15 +2350,24 @@ func (t *Torrent) callbacks() *Callbacks {
         return &t.cl.config.Callbacks
  }
  
-func (t *Torrent) AddWebSeeds(urls []string) {
+type AddWebSeedsOpt func(*webseed.Client)
+
+// Sets the WebSeed trailing path escaper for a webseed.Client.
+func WebSeedPathEscaper(custom webseed.PathEscaper) AddWebSeedsOpt {
+       return func(c *webseed.Client) {
+               c.PathEscaper = custom
+       }
+}
+
+func (t *Torrent) AddWebSeeds(urls []string, opts ...AddWebSeedsOpt) {
         t.cl.lock()
         defer t.cl.unlock()
         for _, u := range urls {
-               t.addWebSeed(u)
+               t.addWebSeed(u, opts...)
         }
  }
  
-func (t *Torrent) addWebSeed(url string) {
+func (t *Torrent) addWebSeed(url string, opts ...AddWebSeedsOpt) {
         if t.cl.config.DisableWebseeds {
                 return
         }
@@ -2399,6 +2408,9 @@ func (t *Torrent) addWebSeed(url string) {
                 activeRequests: make(map[Request]webseed.Request, maxRequests),
                 maxRequests:    maxRequests,
         }
+       for _, opt := range opts {
+               opt(&ws.client)
+       }
         ws.peer.initUpdateRequestsTimer()
         ws.requesterCond.L = t.cl.locker()
         for i := 0; i < maxRequests; i += 1 {
diff --git a/webseed/client.go b/webseed/client.go

index a04b34300ef025ba08719699591eb9fdd413af06..a86be17aea2df82d32164148b99f11a2342136d3 100644 (file)
--- a/webseed/client.go
+++ b/webseed/client.go
@@ -52,6 +52,7 @@ type Client struct {
         // private in the future, if Client ever starts removing pieces.
         Pieces              roaring.Bitmap
         ResponseBodyWrapper ResponseBodyWrapper
+       PathEscaper         PathEscaper
  }
  
  type ResponseBodyWrapper func(io.Reader) io.Reader
@@ -76,7 +77,10 @@ func (ws *Client) NewRequest(r RequestSpec) Request {
         ctx, cancel := context.WithCancel(context.Background())
         var requestParts []requestPart
         if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool {
-               req, err := NewRequest(ws.Url, i, ws.info, e.Start, e.Length)
+               req, err := newRequest(
+                       ws.Url, i, ws.info, e.Start, e.Length,
+                       ws.PathEscaper,
+               )
                 if err != nil {
                         panic(err)
                 }
diff --git a/webseed/request.go b/webseed/request.go

index 4e3ef6091472ffee381219c10d71c84a42ee042d..a38e6372952357a7a8ea9d723bcafa84086bb887 100644 (file)
--- a/webseed/request.go
+++ b/webseed/request.go
@@ -10,30 +10,48 @@ import (
         "github.com/anacrolix/torrent/metainfo"
  )
  
+type PathEscaper func(pathComps []string) string
+
  // Escapes path name components suitable for appending to a webseed URL. This works for converting
  // S3 object keys to URLs too.
+//
+// Contrary to the name, this actually does a QueryEscape, rather than a PathEscape. This works
+// better with most S3 providers.
  func EscapePath(pathComps []string) string {
-       return path.Join(
-               func() (ret []string) {
-                       for _, comp := range pathComps {
-                               ret = append(ret, url.QueryEscape(comp))
-                       }
-                       return
-               }()...,
-       )
+       return defaultPathEscaper(pathComps)
  }
  
-func trailingPath(infoName string, fileComps []string) string {
-       return EscapePath(append([]string{infoName}, fileComps...))
+func defaultPathEscaper(pathComps []string) string {
+       var ret []string
+       for _, comp := range pathComps {
+               ret = append(ret, url.QueryEscape(comp))
+       }
+       return path.Join(ret...)
+}
+
+func trailingPath(
+       infoName string,
+       fileComps []string,
+       pathEscaper PathEscaper,
+) string {
+       if pathEscaper == nil {
+               pathEscaper = defaultPathEscaper
+       }
+       return pathEscaper(append([]string{infoName}, fileComps...))
  }
  
  // Creates a request per BEP 19.
-func NewRequest(url_ string, fileIndex int, info *metainfo.Info, offset, length int64) (*http.Request, error) {
+func newRequest(
+       url_ string, fileIndex int,
+       info *metainfo.Info,
+       offset, length int64,
+       pathEscaper PathEscaper,
+) (*http.Request, error) {
         fileInfo := info.UpvertedFiles()[fileIndex]
         if strings.HasSuffix(url_, "/") {
                 // BEP specifies that we append the file path. We need to escape each component of the path
                 // for things like spaces and '#'.
-               url_ += trailingPath(info.Name, fileInfo.Path)
+               url_ += trailingPath(info.Name, fileInfo.Path, pathEscaper)
         }
         req, err := http.NewRequest(http.MethodGet, url_, nil)
         if err != nil {
diff --git a/webseed/request_test.go b/webseed/request_test.go

index f7c18a03fdb92df6e4649d71019a581dbada6833..7f691e0a04523b16f0e3bc11d9afa32b1ab2b45b 100644 (file)
--- a/webseed/request_test.go
+++ b/webseed/request_test.go
@@ -2,28 +2,71 @@ package webseed
  
  import (
         "net/url"
+       "path"
         "testing"
  
         qt "github.com/frankban/quicktest"
  )
  
-func TestTrailingPath(t *testing.T) {
+func TestEscapePath(t *testing.T) {
         c := qt.New(t)
-       test := func(parts []string, result string) {
-               unescaped, err := url.QueryUnescape(trailingPath(parts[0], parts[1:]))
+       test := func(
+               parts []string, result string,
+               escaper PathEscaper,
+               unescaper func(string) (string, error),
+       ) {
+               unescaped, err := unescaper(escaper(parts))
                 if !c.Check(err, qt.IsNil) {
                         return
                 }
                 c.Check(unescaped, qt.Equals, result)
         }
-       test([]string{"a_b-c", "d + e.f"}, "a_b-c/d + e.f")
-       test([]string{"a_1-b_c2", "d 3. (e, f).g"},
+
+       // Test with nil escapers (always uses url.QueryEscape)
+       // ------
+       test(
+               []string{"a_b-c", "d + e.f"},
+               "a_b-c/d + e.f",
+               defaultPathEscaper,
+               url.QueryUnescape,
+       )
+       test(
+               []string{"a_1-b_c2", "d 3. (e, f).g"},
+               "a_1-b_c2/d 3. (e, f).g",
+               defaultPathEscaper,
+               url.QueryUnescape,
+       )
+
+       // Test with custom escapers
+       // ------
+       test(
+               []string{"a_b-c", "d + e.f"},
+               "a_b-c/d + e.f",
+               func(s []string) string {
+                       var ret []string
+                       for _, comp := range s {
+                               ret = append(ret, url.PathEscape(comp))
+                       }
+                       return path.Join(ret...)
+               },
+               url.PathUnescape,
+       )
+       test(
+               []string{"a_1-b_c2", "d 3. (e, f).g"},
                 "a_1-b_c2/d 3. (e, f).g",
+               func(s []string) string {
+                       var ret []string
+                       for _, comp := range s {
+                               ret = append(ret, url.PathEscape(comp))
+                       }
+                       return path.Join(ret...)
+               },
+               url.PathUnescape,
         )
  }
  
-func TestTrailingPathForEmptyInfoName(t *testing.T) {
-       qt.Check(t, trailingPath("", []string{`ノ┬─┬ノ ︵ ( \o°o)\`}), qt.Equals, "%E3%83%8E%E2%94%AC%E2%94%80%E2%94%AC%E3%83%8E+%EF%B8%B5+%28+%5Co%C2%B0o%29%5C")
-       qt.Check(t, trailingPath("", []string{"hello", "world"}), qt.Equals, "hello/world")
-       qt.Check(t, trailingPath("war", []string{"and", "peace"}), qt.Equals, "war/and/peace")
+func TestEscapePathForEmptyInfoName(t *testing.T) {
+       qt.Check(t, defaultPathEscaper([]string{`ノ┬─┬ノ ︵ ( \o°o)\`}), qt.Equals, "%E3%83%8E%E2%94%AC%E2%94%80%E2%94%AC%E3%83%8E+%EF%B8%B5+%28+%5Co%C2%B0o%29%5C")
+       qt.Check(t, defaultPathEscaper([]string{"hello", "world"}), qt.Equals, "hello/world")
+       qt.Check(t, defaultPathEscaper([]string{"war", "and", "peace"}), qt.Equals, "war/and/peace")
  }
author	Matt Joiner <anacrolix@gmail.com>
	Tue, 26 Apr 2022 00:57:40 +0000 (10:57 +1000)
committer	Matt Joiner <anacrolix@gmail.com>
	Tue, 26 Apr 2022 00:57:40 +0000 (10:57 +1000)
spec.go		patch \| blob \| history
torrent.go		patch \| blob \| history
webseed/client.go		patch \| blob \| history
webseed/request.go		patch \| blob \| history
webseed/request_test.go		patch \| blob \| history