From: Matt Joiner <anacrolix@gmail.com>
Date: Sat, 2 Mar 2024 02:00:28 +0000 (+1100)
Subject: Improve segment handling for discontiguous extents
X-Git-Tag: v1.56.0~62^2~1
X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=188f5bb314d333803ba0ce37f94dc7451ecdd9e1;p=btrtrc.git

Improve segment handling for discontiguous extents
---

diff --git a/metainfo/file-tree.go b/metainfo/file-tree.go
index 0d7e1caf..2b651bd4 100644
--- a/metainfo/file-tree.go
+++ b/metainfo/file-tree.go
@@ -11,12 +11,15 @@ import (
 
 const FileTreePropertiesKey = ""
 
+type FileTreeFile struct {
+	Length     int64  `bencode:"length"`
+	PiecesRoot string `bencode:"pieces root"`
+}
+
+// The fields here don't need bencode tags as the marshalling is done manually.
 type FileTree struct {
-	File struct {
-		Length     int64  `bencode:"length"`
-		PiecesRoot string `bencode:"pieces root"`
-	}
-	Dir map[string]FileTree
+	File FileTreeFile
+	Dir  map[string]FileTree
 }
 
 func (ft *FileTree) UnmarshalBencode(bytes []byte) (err error) {
@@ -107,7 +110,7 @@ func (ft *FileTree) Walk(path []string, f func(path []string, ft *FileTree)) {
 }
 
 func (ft *FileTree) PiecesRootAsByteArray() (ret g.Option[[32]byte]) {
-	if ft.File.Length == 0 {
+	if ft.File.PiecesRoot == "" {
 		return
 	}
 	n := copy(ret.Value[:], ft.File.PiecesRoot)
diff --git a/metainfo/info.go b/metainfo/info.go
index 8b8749c7..5d6300ec 100644
--- a/metainfo/info.go
+++ b/metainfo/info.go
@@ -168,7 +168,13 @@ func (info *Info) UpvertedFiles() (files []FileInfo) {
 			Path: nil,
 		}}
 	}
-	return info.Files
+	var offset int64
+	for _, fi := range info.Files {
+		fi.TorrentOffset = offset
+		offset += fi.Length
+		files = append(files, fi)
+	}
+	return
 }
 
 func (info *Info) Piece(index int) Piece {
diff --git a/segments/index.go b/segments/index.go
index 888e90a8..d7ae744b 100644
--- a/segments/index.go
+++ b/segments/index.go
@@ -1,6 +1,7 @@
 package segments
 
 import (
+	g "github.com/anacrolix/generics"
 	"sort"
 )
 
@@ -21,15 +22,19 @@ func NewIndexFromSegments(segments []Extent) Index {
 	return Index{segments}
 }
 
-func (me Index) iterSegments() func() (Length, bool) {
-	return func() (Length, bool) {
+func (me Index) iterSegments() func() (Extent, bool) {
+	var lastEnd g.Option[Int]
+	return func() (ret Extent, ok bool) {
 		if len(me.segments) == 0 {
-			return 0, false
-		} else {
-			l := me.segments[0].Length
-			me.segments = me.segments[1:]
-			return l, true
+			return
 		}
+		cur := me.segments[0]
+		me.segments = me.segments[1:]
+		ret.Start = cur.Start - lastEnd.UnwrapOr(cur.Start)
+		ret.Length = cur.Length
+		lastEnd.Set(cur.End())
+		ok = true
+		return
 	}
 }
 
@@ -41,11 +46,16 @@ func (me Index) Locate(e Extent, output Callback) bool {
 		return _e.End() > e.Start
 	})
 	if first == len(me.segments) {
-		return false
+		return e.Length == 0
 	}
 	e.Start -= me.segments[first].Start
+	// The extent is before the first segment.
+	if e.Start < 0 {
+		e.Length += e.Start
+		e.Start = 0
+	}
 	me.segments = me.segments[first:]
-	return Scan(me.iterSegments(), e, func(i int, e Extent) bool {
+	return ScanConsecutive(me.iterSegments(), e, func(i int, e Extent) bool {
 		return output(i+first, e)
 	})
 }
diff --git a/segments/segments.go b/segments/segments.go
index 38700425..35e7e4c8 100644
--- a/segments/segments.go
+++ b/segments/segments.go
@@ -13,33 +13,48 @@ func (e Extent) End() Int {
 }
 
 type (
-	Callback   = func(segmentIndex int, segmentBounds Extent) bool
-	LengthIter = func() (Length, bool)
+	Callback              = func(segmentIndex int, segmentBounds Extent) bool
+	LengthIter            = func() (Length, bool)
+	ConsecutiveExtentIter = func() (Extent, bool)
 )
 
 // Returns true if callback returns false early, or all segments in the haystack for the needle are
 // found.
 func Scan(haystack LengthIter, needle Extent, callback Callback) bool {
+	return ScanConsecutive(
+		func() (Extent, bool) {
+			l, ok := haystack()
+			return Extent{0, l}, ok
+		},
+		needle,
+		callback,
+	)
+}
+
+// Returns true if callback returns false early, or all segments in the haystack for the needle are
+// found.
+func ScanConsecutive(haystack ConsecutiveExtentIter, needle Extent, callback Callback) bool {
 	i := 0
+	// Extents have been found in the haystack and we're waiting for the needle to end. This is kind
+	// of for backwards compatibility for some tests that expect to have zero-length extents.
+	startedNeedle := false
 	for needle.Length != 0 {
 		l, ok := haystack()
 		if !ok {
 			return false
 		}
-		if needle.Start < l || needle.Start == l && l == 0 {
-			e1 := Extent{
-				Start:  needle.Start,
-				Length: min(l, needle.End()) - needle.Start,
-			}
-			if e1.Length >= 0 {
-				if !callback(i, e1) {
-					return true
-				}
-				needle.Start = 0
-				needle.Length -= e1.Length
+
+		e1 := Extent{
+			Start: max(needle.Start-l.Start, 0),
+		}
+		e1.Length = max(min(l.Length, needle.End()-l.Start)-e1.Start, 0)
+		needle.Start = max(0, needle.Start-l.End())
+		needle.Length -= e1.Length + l.Start
+		if e1.Length > 0 || (startedNeedle && needle.Length != 0) {
+			if !callback(i, e1) {
+				return true
 			}
-		} else {
-			needle.Start -= l
+			startedNeedle = true
 		}
 		i++
 	}
diff --git a/segments/segments_test.go b/segments/segments_test.go
index 9ce9164b..e757654f 100644
--- a/segments/segments_test.go
+++ b/segments/segments_test.go
@@ -1,9 +1,8 @@
 package segments
 
 import (
+	qt "github.com/frankban/quicktest"
 	"testing"
-
-	"github.com/stretchr/testify/assert"
 )
 
 func LengthIterFromSlice(ls []Length) LengthIter {
@@ -36,14 +35,21 @@ func (me *collectExtents) scanCallback(i int, e Extent) bool {
 
 type newLocater func(LengthIter) Locater
 
-func assertLocate(t *testing.T, nl newLocater, ls []Length, needle Extent, firstExpectedIndex int, expectedExtents []Extent) {
+func assertLocate(
+	t *testing.T,
+	nl newLocater,
+	ls []Length,
+	needle Extent,
+	firstExpectedIndex int,
+	expectedExtents []Extent,
+) {
 	var actual collectExtents
 	var expected collectExtents
 	for i, e := range expectedExtents {
 		expected.scanCallback(firstExpectedIndex+i, e)
 	}
 	nl(LengthIterFromSlice(ls))(needle, actual.scanCallback)
-	assert.EqualValues(t, expected, actual)
+	qt.Check(t, actual, qt.DeepEquals, expected)
 }
 
 func testLocater(t *testing.T, newLocater newLocater) {
diff --git a/storage/file-misc.go b/storage/file-misc.go
index 8966ecbd..67514420 100644
--- a/storage/file-misc.go
+++ b/storage/file-misc.go
@@ -1,34 +1,20 @@
 package storage
 
-import "github.com/anacrolix/torrent/metainfo"
+import (
+	"github.com/anacrolix/torrent/segments"
+)
 
-type requiredLength struct {
-	fileIndex int
-	length    int64
-}
-
-func extentCompleteRequiredLengths(info *metainfo.Info, off, n int64) (ret []requiredLength) {
-	if n == 0 {
-		return
-	}
-	for i, fi := range info.UpvertedFiles() {
-		if off >= fi.Length {
-			off -= fi.Length
-			continue
-		}
-		n1 := n
-		if off+n1 > fi.Length {
-			n1 = fi.Length - off
-		}
-		ret = append(ret, requiredLength{
-			fileIndex: i,
-			length:    off + n1,
-		})
-		n -= n1
-		if n == 0 {
-			return
-		}
-		off = 0
-	}
-	panic("extent exceeds torrent bounds")
+// Returns the minimum file lengths required for the given extent to exist on disk. Returns false if
+// the extent is not covered by the files in the index.
+func minFileLengthsForTorrentExtent(
+	fileSegmentsIndex segments.Index,
+	off, n int64,
+	each func(fileIndex int, length int64) bool,
+) bool {
+	return fileSegmentsIndex.Locate(segments.Extent{
+		Start:  off,
+		Length: n,
+	}, func(fileIndex int, segmentBounds segments.Extent) bool {
+		return each(fileIndex, segmentBounds.Start+segmentBounds.Length)
+	})
 }
diff --git a/storage/file-misc_test.go b/storage/file-misc_test.go
index f74196d0..9e97ce00 100644
--- a/storage/file-misc_test.go
+++ b/storage/file-misc_test.go
@@ -1,6 +1,9 @@
 package storage
 
 import (
+	"github.com/anacrolix/torrent/common"
+	"github.com/anacrolix/torrent/segments"
+	qt "github.com/frankban/quicktest"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -8,6 +11,60 @@ import (
 	"github.com/anacrolix/torrent/metainfo"
 )
 
+type requiredLength struct {
+	FileIndex int
+	Length    int64
+}
+
+// The required file indices and file lengths for the given extent to be "complete". This is the
+// outdated interface used by some tests.
+func extentCompleteRequiredLengths(info *metainfo.Info, off, n int64) (ret []requiredLength) {
+	index := segments.NewIndexFromSegments(common.TorrentOffsetFileSegments(info))
+	minFileLengthsForTorrentExtent(index, off, n, func(fileIndex int, length int64) bool {
+		ret = append(ret, requiredLength{fileIndex, length})
+		return true
+	})
+	return
+}
+
+func TestExtentCompleteRequiredLengthsV2InfoWithGaps(t *testing.T) {
+	info := &metainfo.Info{
+		MetaVersion: 2,
+		PieceLength: 2,
+		FileTree: metainfo.FileTree{
+			Dir: map[string]metainfo.FileTree{
+				"a": {
+					File: metainfo.FileTreeFile{
+						Length: 2,
+					},
+				},
+				"b": {
+					File: metainfo.FileTreeFile{Length: 3},
+				},
+				// Here there's a gap where v2 torrents piece align, so the next file offset starts
+				// at 6.
+				"c": {
+					File: metainfo.FileTreeFile{Length: 4},
+				},
+			},
+		},
+	}
+	c := qt.New(t)
+	check := func(off, n int64, expected ...requiredLength) {
+		c.Check(extentCompleteRequiredLengths(info, off, n), qt.DeepEquals, expected)
+	}
+	check(0, 0)
+	check(0, 1, requiredLength{FileIndex: 0, Length: 1})
+	check(0, 2, requiredLength{FileIndex: 0, Length: 2})
+	check(0, 3, requiredLength{FileIndex: 0, Length: 2}, requiredLength{FileIndex: 1, Length: 1})
+	check(2, 2, requiredLength{FileIndex: 1, Length: 2})
+	check(4, 1, requiredLength{FileIndex: 1, Length: 3})
+	check(5, 0)
+	check(4, 2, requiredLength{FileIndex: 1, Length: 3})
+	check(5, 1)
+	check(6, 4, requiredLength{FileIndex: 2, Length: 4})
+}
+
 func TestExtentCompleteRequiredLengths(t *testing.T) {
 	info := &metainfo.Info{
 		Files: []metainfo.FileInfo{
@@ -15,23 +72,27 @@ func TestExtentCompleteRequiredLengths(t *testing.T) {
 			{Path: []string{"b"}, Length: 3},
 		},
 	}
+	c := qt.New(t)
+	check := func(off, n int64, expected ...requiredLength) {
+		c.Check(extentCompleteRequiredLengths(info, off, n), qt.DeepEquals, expected)
+	}
 	assert.Empty(t, extentCompleteRequiredLengths(info, 0, 0))
 	assert.EqualValues(t, []requiredLength{
-		{fileIndex: 0, length: 1},
+		{FileIndex: 0, Length: 1},
 	}, extentCompleteRequiredLengths(info, 0, 1))
 	assert.EqualValues(t, []requiredLength{
-		{fileIndex: 0, length: 2},
+		{FileIndex: 0, Length: 2},
 	}, extentCompleteRequiredLengths(info, 0, 2))
 	assert.EqualValues(t, []requiredLength{
-		{fileIndex: 0, length: 2},
-		{fileIndex: 1, length: 1},
+		{FileIndex: 0, Length: 2},
+		{FileIndex: 1, Length: 1},
 	}, extentCompleteRequiredLengths(info, 0, 3))
 	assert.EqualValues(t, []requiredLength{
-		{fileIndex: 1, length: 2},
+		{FileIndex: 1, Length: 2},
 	}, extentCompleteRequiredLengths(info, 2, 2))
 	assert.EqualValues(t, []requiredLength{
-		{fileIndex: 1, length: 3},
+		{FileIndex: 1, Length: 3},
 	}, extentCompleteRequiredLengths(info, 4, 1))
 	assert.Len(t, extentCompleteRequiredLengths(info, 5, 0), 0)
-	assert.Panics(t, func() { extentCompleteRequiredLengths(info, 6, 1) })
+	check(6, 1)
 }