]> Sergey Matveev's repositories - btrtrc.git/blob - webseed/client.go
Drop support for go 1.20
[btrtrc.git] / webseed / client.go
1 package webseed
2
3 import (
4         "bytes"
5         "context"
6         "errors"
7         "fmt"
8         "io"
9         "log"
10         "net/http"
11         "strings"
12
13         "github.com/RoaringBitmap/roaring"
14
15         "github.com/anacrolix/torrent/common"
16         "github.com/anacrolix/torrent/metainfo"
17         "github.com/anacrolix/torrent/segments"
18 )
19
20 type RequestSpec = segments.Extent
21
22 type requestPartResult struct {
23         resp *http.Response
24         err  error
25 }
26
27 type requestPart struct {
28         req    *http.Request
29         e      segments.Extent
30         result chan requestPartResult
31         start  func()
32         // Wrap http response bodies for such things as download rate limiting.
33         responseBodyWrapper ResponseBodyWrapper
34 }
35
36 type Request struct {
37         cancel func()
38         Result chan RequestResult
39 }
40
41 func (r Request) Cancel() {
42         r.cancel()
43 }
44
45 type Client struct {
46         HttpClient *http.Client
47         Url        string
48         fileIndex  segments.Index
49         info       *metainfo.Info
50         // The pieces we can request with the Url. We're more likely to ban/block at the file-level
51         // given that's how requests are mapped to webseeds, but the torrent.Client works at the piece
52         // level. We can map our file-level adjustments to the pieces here. This probably need to be
53         // private in the future, if Client ever starts removing pieces.
54         Pieces              roaring.Bitmap
55         ResponseBodyWrapper ResponseBodyWrapper
56         PathEscaper         PathEscaper
57 }
58
59 type ResponseBodyWrapper func(io.Reader) io.Reader
60
61 func (me *Client) SetInfo(info *metainfo.Info) {
62         if !strings.HasSuffix(me.Url, "/") && info.IsDir() {
63                 // In my experience, this is a non-conforming webseed. For example the
64                 // http://ia600500.us.archive.org/1/items URLs in archive.org torrents.
65                 return
66         }
67         me.fileIndex = segments.NewIndex(common.LengthIterFromUpvertedFiles(info.UpvertedFiles()))
68         me.info = info
69         me.Pieces.AddRange(0, uint64(info.NumPieces()))
70 }
71
72 type RequestResult struct {
73         Bytes []byte
74         Err   error
75 }
76
77 func (ws *Client) NewRequest(r RequestSpec) Request {
78         ctx, cancel := context.WithCancel(context.Background())
79         var requestParts []requestPart
80         if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool {
81                 req, err := newRequest(
82                         ws.Url, i, ws.info, e.Start, e.Length,
83                         ws.PathEscaper,
84                 )
85                 if err != nil {
86                         panic(err)
87                 }
88                 req = req.WithContext(ctx)
89                 part := requestPart{
90                         req:                 req,
91                         result:              make(chan requestPartResult, 1),
92                         e:                   e,
93                         responseBodyWrapper: ws.ResponseBodyWrapper,
94                 }
95                 part.start = func() {
96                         go func() {
97                                 resp, err := ws.HttpClient.Do(req)
98                                 part.result <- requestPartResult{
99                                         resp: resp,
100                                         err:  err,
101                                 }
102                         }()
103                 }
104                 requestParts = append(requestParts, part)
105                 return true
106         }) {
107                 panic("request out of file bounds")
108         }
109         req := Request{
110                 cancel: cancel,
111                 Result: make(chan RequestResult, 1),
112         }
113         go func() {
114                 b, err := readRequestPartResponses(ctx, requestParts)
115                 req.Result <- RequestResult{
116                         Bytes: b,
117                         Err:   err,
118                 }
119         }()
120         return req
121 }
122
123 type ErrBadResponse struct {
124         Msg      string
125         Response *http.Response
126 }
127
128 func (me ErrBadResponse) Error() string {
129         return me.Msg
130 }
131
132 func recvPartResult(ctx context.Context, buf io.Writer, part requestPart) error {
133         result := <-part.result
134         // Make sure there's no further results coming, it should be a one-shot channel.
135         close(part.result)
136         if result.err != nil {
137                 return result.err
138         }
139         defer result.resp.Body.Close()
140         var body io.Reader = result.resp.Body
141         if part.responseBodyWrapper != nil {
142                 body = part.responseBodyWrapper(body)
143         }
144         // Prevent further accidental use
145         result.resp.Body = nil
146         if ctx.Err() != nil {
147                 return ctx.Err()
148         }
149         switch result.resp.StatusCode {
150         case http.StatusPartialContent:
151                 copied, err := io.Copy(buf, body)
152                 if err != nil {
153                         return err
154                 }
155                 if copied != part.e.Length {
156                         return fmt.Errorf("got %v bytes, expected %v", copied, part.e.Length)
157                 }
158                 return nil
159         case http.StatusOK:
160                 // This number is based on
161                 // https://archive.org/download/BloodyPitOfHorror/BloodyPitOfHorror.asr.srt. It seems that
162                 // archive.org might be using a webserver implementation that refuses to do partial
163                 // responses to small files.
164                 if part.e.Start < 48<<10 {
165                         if part.e.Start != 0 {
166                                 log.Printf("resp status ok but requested range [url=%q, range=%q]",
167                                         part.req.URL,
168                                         part.req.Header.Get("Range"))
169                         }
170                         // Instead of discarding, we could try receiving all the chunks present in the response
171                         // body. I don't know how one would handle multiple chunk requests resulting in an OK
172                         // response for the same file. The request algorithm might be need to be smarter for
173                         // that.
174                         discarded, _ := io.CopyN(io.Discard, body, part.e.Start)
175                         if discarded != 0 {
176                                 log.Printf("discarded %v bytes in webseed request response part", discarded)
177                         }
178                         _, err := io.CopyN(buf, body, part.e.Length)
179                         return err
180                 } else {
181                         return ErrBadResponse{"resp status ok but requested range", result.resp}
182                 }
183         case http.StatusServiceUnavailable:
184                 return ErrTooFast
185         default:
186                 return ErrBadResponse{
187                         fmt.Sprintf("unhandled response status code (%v)", result.resp.StatusCode),
188                         result.resp,
189                 }
190         }
191 }
192
193 var ErrTooFast = errors.New("making requests too fast")
194
195 func readRequestPartResponses(ctx context.Context, parts []requestPart) (_ []byte, err error) {
196         var buf bytes.Buffer
197         for _, part := range parts {
198                 part.start()
199                 err = recvPartResult(ctx, &buf, part)
200                 if err != nil {
201                         err = fmt.Errorf("reading %q at %q: %w", part.req.URL, part.req.Header.Get("Range"), err)
202                         break
203                 }
204         }
205         return buf.Bytes(), err
206 }