12 "github.com/RoaringBitmap/roaring"
13 "github.com/anacrolix/torrent/common"
14 "github.com/anacrolix/torrent/metainfo"
15 "github.com/anacrolix/torrent/segments"
18 type RequestSpec = segments.Extent
20 type requestPartResult struct {
25 type requestPart struct {
28 result chan requestPartResult
33 Result chan RequestResult
36 func (r Request) Cancel() {
41 HttpClient *http.Client
43 fileIndex segments.Index
45 // The pieces we can request with the Url. We're more likely to ban/block at the file-level
46 // given that's how requests are mapped to webseeds, but the torrent.Client works at the piece
47 // level. We can map our file-level adjustments to the pieces here. This probably need to be
48 // private in the future, if Client ever starts removing pieces.
52 func (me *Client) SetInfo(info *metainfo.Info) {
53 if !strings.HasSuffix(me.Url, "/") && info.IsDir() {
54 // In my experience, this is a non-conforming webseed. For example the
55 // http://ia600500.us.archive.org/1/items URLs in archive.org torrents.
58 me.fileIndex = segments.NewIndex(common.LengthIterFromUpvertedFiles(info.UpvertedFiles()))
60 me.Pieces.AddRange(0, uint64(info.NumPieces()))
63 type RequestResult struct {
68 func (ws *Client) NewRequest(r RequestSpec) Request {
69 ctx, cancel := context.WithCancel(context.Background())
70 var requestParts []requestPart
71 if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool {
72 req, err := NewRequest(ws.Url, i, ws.info, e.Start, e.Length)
76 req = req.WithContext(ctx)
79 result: make(chan requestPartResult, 1),
83 resp, err := ws.HttpClient.Do(req)
84 part.result <- requestPartResult{
89 requestParts = append(requestParts, part)
92 panic("request out of file bounds")
96 Result: make(chan RequestResult, 1),
99 b, err := readRequestPartResponses(ctx, requestParts)
100 req.Result <- RequestResult{
108 type ErrBadResponse struct {
110 Response *http.Response
113 func (me ErrBadResponse) Error() string {
117 func recvPartResult(ctx context.Context, buf io.Writer, part requestPart) error {
118 result := <-part.result
119 if result.err != nil {
122 defer result.resp.Body.Close()
123 if ctx.Err() != nil {
126 switch result.resp.StatusCode {
127 case http.StatusPartialContent:
128 copied, err := io.Copy(buf, result.resp.Body)
132 if copied != part.e.Length {
133 return fmt.Errorf("got %v bytes, expected %v", copied, part.e.Length)
137 // This number is based on
138 // https://archive.org/download/BloodyPitOfHorror/BloodyPitOfHorror.asr.srt. It seems that
139 // archive.org might be using a webserver implementation that refuses to do partial
140 // responses to small files.
141 if part.e.Start < 48<<10 {
142 if part.e.Start != 0 {
143 log.Printf("resp status ok but requested range [url=%q, range=%q]",
145 part.req.Header.Get("Range"))
147 // Instead of discarding, we could try receiving all the chunks present in the response
148 // body. I don't know how one would handle multiple chunk requests resulting in an OK
149 // response for the same file. The request algorithm might be need to be smarter for
151 discarded, _ := io.CopyN(io.Discard, result.resp.Body, part.e.Start)
153 log.Printf("discarded %v bytes in webseed request response part", discarded)
155 _, err := io.CopyN(buf, result.resp.Body, part.e.Length)
158 return ErrBadResponse{"resp status ok but requested range", result.resp}
161 return ErrBadResponse{
162 fmt.Sprintf("unhandled response status code (%v)", result.resp.StatusCode),
168 func readRequestPartResponses(ctx context.Context, parts []requestPart) ([]byte, error) {
169 ctx, cancel := context.WithCancel(ctx)
172 firstErr := make(chan error, 1)
174 for _, part := range parts {
175 err := recvPartResult(ctx, &buf, part)
177 // Ensure no further unnecessary response reads occur.
180 case firstErr <- fmt.Errorf("reading %q at %q: %w", part.req.URL, part.req.Header.Get("Range"), err):
186 case firstErr <- nil:
190 // This can't be merged into the return statement, because buf.Bytes is called first!
192 return buf.Bytes(), err