webseed/client.go

   1 package webseed
   2
   3 import (
   4         "bytes"
   5         "context"
   6         "errors"
   7         "fmt"
   8         "io"
   9         "log"
  10         "net/http"
  11         "strings"
  12
  13         "github.com/RoaringBitmap/roaring"
  14         "github.com/anacrolix/torrent/common"
  15         "github.com/anacrolix/torrent/metainfo"
  16         "github.com/anacrolix/torrent/segments"
  17 )
  18
  19 type RequestSpec = segments.Extent
  20
  21 type requestPartResult struct {
  22         resp *http.Response
  23         err  error
  24 }
  25
  26 type requestPart struct {
  27         req    *http.Request
  28         e      segments.Extent
  29         result chan requestPartResult
  30         start  func()
  31         // Wrap http response bodies for such things as download rate limiting.
  32         responseBodyWrapper ResponseBodyWrapper
  33 }
  34
  35 type Request struct {
  36         cancel func()
  37         Result chan RequestResult
  38 }
  39
  40 func (r Request) Cancel() {
  41         r.cancel()
  42 }
  43
  44 type Client struct {
  45         HttpClient *http.Client
  46         Url        string
  47         fileIndex  segments.Index
  48         info       *metainfo.Info
  49         // The pieces we can request with the Url. We're more likely to ban/block at the file-level
  50         // given that's how requests are mapped to webseeds, but the torrent.Client works at the piece
  51         // level. We can map our file-level adjustments to the pieces here. This probably need to be
  52         // private in the future, if Client ever starts removing pieces.
  53         Pieces              roaring.Bitmap
  54         ResponseBodyWrapper ResponseBodyWrapper
  55 }
  56
  57 type ResponseBodyWrapper func(io.Reader) io.Reader
  58
  59 func (me *Client) SetInfo(info *metainfo.Info) {
  60         if !strings.HasSuffix(me.Url, "/") && info.IsDir() {
  61                 // In my experience, this is a non-conforming webseed. For example the
  62                 // http://ia600500.us.archive.org/1/items URLs in archive.org torrents.
  63                 return
  64         }
  65         me.fileIndex = segments.NewIndex(common.LengthIterFromUpvertedFiles(info.UpvertedFiles()))
  66         me.info = info
  67         me.Pieces.AddRange(0, uint64(info.NumPieces()))
  68 }
  69
  70 type RequestResult struct {
  71         Bytes []byte
  72         Err   error
  73 }
  74
  75 func (ws *Client) NewRequest(r RequestSpec) Request {
  76         ctx, cancel := context.WithCancel(context.Background())
  77         var requestParts []requestPart
  78         if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool {
  79                 req, err := NewRequest(ws.Url, i, ws.info, e.Start, e.Length)
  80                 if err != nil {
  81                         panic(err)
  82                 }
  83                 req = req.WithContext(ctx)
  84                 part := requestPart{
  85                         req:                 req,
  86                         result:              make(chan requestPartResult, 1),
  87                         e:                   e,
  88                         responseBodyWrapper: ws.ResponseBodyWrapper,
  89                 }
  90                 part.start = func() {
  91                         go func() {
  92                                 resp, err := ws.HttpClient.Do(req)
  93                                 part.result <- requestPartResult{
  94                                         resp: resp,
  95                                         err:  err,
  96                                 }
  97                         }()
  98                 }
  99                 requestParts = append(requestParts, part)
 100                 return true
 101         }) {
 102                 panic("request out of file bounds")
 103         }
 104         req := Request{
 105                 cancel: cancel,
 106                 Result: make(chan RequestResult, 1),
 107         }
 108         go func() {
 109                 b, err := readRequestPartResponses(ctx, requestParts)
 110                 req.Result <- RequestResult{
 111                         Bytes: b,
 112                         Err:   err,
 113                 }
 114         }()
 115         return req
 116 }
 117
 118 type ErrBadResponse struct {
 119         Msg      string
 120         Response *http.Response
 121 }
 122
 123 func (me ErrBadResponse) Error() string {
 124         return me.Msg
 125 }
 126
 127 func recvPartResult(ctx context.Context, buf io.Writer, part requestPart) error {
 128         result := <-part.result
 129         // Make sure there's no further results coming, it should be a one-shot channel.
 130         close(part.result)
 131         if result.err != nil {
 132                 return result.err
 133         }
 134         defer result.resp.Body.Close()
 135         var body io.Reader = result.resp.Body
 136         if part.responseBodyWrapper != nil {
 137                 body = part.responseBodyWrapper(body)
 138         }
 139         // Prevent further accidental use
 140         result.resp.Body = nil
 141         if ctx.Err() != nil {
 142                 return ctx.Err()
 143         }
 144         switch result.resp.StatusCode {
 145         case http.StatusPartialContent:
 146                 copied, err := io.Copy(buf, body)
 147                 if err != nil {
 148                         return err
 149                 }
 150                 if copied != part.e.Length {
 151                         return fmt.Errorf("got %v bytes, expected %v", copied, part.e.Length)
 152                 }
 153                 return nil
 154         case http.StatusOK:
 155                 // This number is based on
 156                 // https://archive.org/download/BloodyPitOfHorror/BloodyPitOfHorror.asr.srt. It seems that
 157                 // archive.org might be using a webserver implementation that refuses to do partial
 158                 // responses to small files.
 159                 if part.e.Start < 48<<10 {
 160                         if part.e.Start != 0 {
 161                                 log.Printf("resp status ok but requested range [url=%q, range=%q]",
 162                                         part.req.URL,
 163                                         part.req.Header.Get("Range"))
 164                         }
 165                         // Instead of discarding, we could try receiving all the chunks present in the response
 166                         // body. I don't know how one would handle multiple chunk requests resulting in an OK
 167                         // response for the same file. The request algorithm might be need to be smarter for
 168                         // that.
 169                         discarded, _ := io.CopyN(io.Discard, body, part.e.Start)
 170                         if discarded != 0 {
 171                                 log.Printf("discarded %v bytes in webseed request response part", discarded)
 172                         }
 173                         _, err := io.CopyN(buf, body, part.e.Length)
 174                         return err
 175                 } else {
 176                         return ErrBadResponse{"resp status ok but requested range", result.resp}
 177                 }
 178         case http.StatusServiceUnavailable:
 179                 return ErrTooFast
 180         default:
 181                 return ErrBadResponse{
 182                         fmt.Sprintf("unhandled response status code (%v)", result.resp.StatusCode),
 183                         result.resp,
 184                 }
 185         }
 186 }
 187
 188 var ErrTooFast = errors.New("making requests too fast")
 189
 190 func readRequestPartResponses(ctx context.Context, parts []requestPart) (_ []byte, err error) {
 191         var buf bytes.Buffer
 192         for _, part := range parts {
 193                 part.start()
 194                 err = recvPartResult(ctx, &buf, part)
 195                 if err != nil {
 196                         err = fmt.Errorf("reading %q at %q: %w", part.req.URL, part.req.Header.Get("Range"), err)
 197                         break
 198                 }
 199         }
 200         return buf.Bytes(), err
 201 }