From: Matt Joiner Date: Fri, 1 Aug 2025 08:20:13 +0000 (+1000) Subject: Switch segments to use iterators X-Git-Tag: v1.59.0~2^2~72 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=68f9141f1d7120325de85654e9fbaccbfc67a20d;p=btrtrc.git Switch segments to use iterators Trying to reduce allocations --- diff --git a/segments/index.go b/segments/index.go index acbba430..c897411e 100644 --- a/segments/index.go +++ b/segments/index.go @@ -1,8 +1,9 @@ package segments import ( + "cmp" "iter" - "sort" + "slices" g "github.com/anacrolix/generics" "github.com/anacrolix/missinggo/v2/panicif" @@ -10,7 +11,7 @@ import ( func NewIndex(segments LengthIter) (ret Index) { var start Length - for l, ok := segments(); ok; l, ok = segments() { + for l := range segments { ret.segments = append(ret.segments, Extent{start, l}) start += l } @@ -25,64 +26,48 @@ func NewIndexFromSegments(segments []Extent) Index { return Index{segments} } -func (me Index) iterSegments() func() (Extent, bool) { - var lastEnd g.Option[Int] - return func() (ret Extent, ok bool) { - if len(me.segments) == 0 { - return +// Yields segments as extents with Start relative to the previous segment's end. +func (me Index) iterSegments(startIndex int) iter.Seq[Extent] { + return func(yield func(Extent) bool) { + var lastEnd g.Option[Int] + for _, cur := range me.segments[startIndex:] { + ret := Extent{ + // Why ignore initial start on the first segment? + Start: cur.Start - lastEnd.UnwrapOr(cur.Start), + Length: cur.Length, + } + lastEnd.Set(cur.End()) + if !yield(ret) { + return + } } - cur := me.segments[0] - me.segments = me.segments[1:] - ret.Start = cur.Start - lastEnd.UnwrapOr(cur.Start) - ret.Length = cur.Length - lastEnd.Set(cur.End()) - ok = true - return } } -// Returns true if the callback returns false early, or extents are found in the index for all parts -// of the given extent. TODO: This might not handle discontiguous extents. To be tested. Needed for -// BitTorrent v2 possibly. -func (me Index) Locate(e Extent, output Callback) bool { - first := sort.Search(len(me.segments), func(i int) bool { - _e := me.segments[i] - return _e.End() > e.Start - }) - if first == len(me.segments) { - return e.Length == 0 - } - e.Start -= me.segments[first].Start - // The extent is before the first segment. - if e.Start < 0 { - e.Length += e.Start - e.Start = 0 - } - me.segments = me.segments[first:] - return ScanConsecutive(me.iterSegments(), e, func(i int, e Extent) bool { - return output(i+first, e) - }) -} - func (me Index) LocateIter(e Extent) iter.Seq2[int, Extent] { return func(yield func(int, Extent) bool) { - first := sort.Search(len(me.segments), func(i int) bool { - _e := me.segments[i] - return _e.End() > e.Start + // We find the first segment that ends after the start of the target extent. + first, eq := slices.BinarySearchFunc(me.segments, e.Start, func(elem Extent, target Int) int { + return cmp.Compare(elem.End(), target+1) }) + //fmt.Printf("binary search for %v in %v returned %v\n", e.Start, me.segments, first) if first == len(me.segments) { return } + _ = eq e.Start -= me.segments[first].Start // The extent is before the first segment. if e.Start < 0 { e.Length += e.Start e.Start = 0 } - me.segments = me.segments[first:] - ScanConsecutive(me.iterSegments(), e, func(i int, e Extent) bool { - return yield(i+first, e) - }) + i := first + for cons := range scanConsecutive(me.iterSegments(first), e) { + if !yield(i, cons) { + return + } + i++ + } } } diff --git a/segments/segments.go b/segments/segments.go index 83f1ea5c..eb7fedb2 100644 --- a/segments/segments.go +++ b/segments/segments.go @@ -1,5 +1,9 @@ package segments +import ( + "iter" +) + type Int = int64 type Length = Int @@ -14,56 +18,37 @@ func (e Extent) End() Int { type ( Callback = func(segmentIndex int, segmentBounds Extent) bool - LengthIter = func() (Length, bool) - ConsecutiveExtentIter = func() (Extent, bool) + LengthIter = iter.Seq[Length] + ConsecutiveExtentIter = iter.Seq[Extent] ) -// Returns true if callback returns false early, or all segments in the haystack for the needle are -// found. -func Scan(haystack LengthIter, needle Extent, callback Callback) bool { - return ScanConsecutive( - func() (Extent, bool) { - l, ok := haystack() - return Extent{0, l}, ok - }, - needle, - callback, - ) -} - -// Returns true if callback returns false early, or all segments in the haystack for the needle are -// found. TODO: Does this handle discontiguous extents? -func ScanConsecutive(haystack ConsecutiveExtentIter, needle Extent, callback Callback) bool { - i := 0 - // Extents have been found in the haystack, and we're waiting for the needle to end. This is - // kind of for backwards compatibility for some tests that expect to have zero-length extents. - startedNeedle := false - for needle.Length != 0 { - l, ok := haystack() - if !ok { - return false - } +// TODO: Does this handle discontiguous extents? +func scanConsecutive(haystack ConsecutiveExtentIter, needle Extent) iter.Seq[Extent] { + return func(yield func(Extent) bool) { + // Extents have been found in the haystack, and we're waiting for the needle to end. This is + // kind of for backwards compatibility for some tests that expect to have zero-length extents. + startedNeedle := false + next, stop := iter.Pull(haystack) + defer stop() + for needle.Length != 0 { + l, ok := next() + if !ok { + return + } - e1 := Extent{ - Start: max(needle.Start-l.Start, 0), - } - e1.Length = max(min(l.Length, needle.End()-l.Start)-e1.Start, 0) - needle.Start = max(0, needle.Start-l.End()) - needle.Length -= e1.Length + l.Start - if e1.Length > 0 || (startedNeedle && needle.Length != 0) { - if !callback(i, e1) { - return true + e1 := Extent{ + Start: max(needle.Start-l.Start, 0), + } + e1.Length = max(min(l.Length, needle.End()-l.Start)-e1.Start, 0) + needle.Start = max(0, needle.Start-l.End()) + needle.Length -= e1.Length + l.Start + if e1.Length > 0 || (startedNeedle && needle.Length != 0) { + if !yield(e1) { + return + } + startedNeedle = true } - startedNeedle = true } - i++ - } - return true -} - -func LocaterFromLengthIter(li LengthIter) Locater { - return func(e Extent, c Callback) bool { - return Scan(li, e, c) } } diff --git a/segments/segments_test.go b/segments/segments_test.go index 73359649..4723926d 100644 --- a/segments/segments_test.go +++ b/segments/segments_test.go @@ -1,22 +1,14 @@ package segments import ( + "slices" "testing" "github.com/go-quicktest/qt" ) func LengthIterFromSlice(ls []Length) LengthIter { - return func() (Length, bool) { - switch len(ls) { - case 0: - return -1, false - default: - l := ls[0] - ls = ls[1:] - return l, true - } - } + return slices.Values(ls) } type ScanCallbackValue struct { @@ -104,16 +96,33 @@ func testLocater(t *testing.T, newLocater newLocater) { {0, 0}, {0, 2}, }) -} - -func TestScan(t *testing.T) { - testLocater(t, LocaterFromLengthIter) -} - -func TestIndex(t *testing.T) { - testLocater(t, func(li LengthIter) Locater { - return NewIndex(li).Locate - }) + checkContiguous(t, newLocater, + []Length{2, 0, 1, 0, 0, 1}, + Extent{3, 2}, + 5, + []Extent{ + {0, 1}, + }) + checkContiguous(t, newLocater, + []Length{2, 0, 1, 0, 0, 1}, + Extent{2, 2}, + 2, + []Extent{ + {0, 1}, + {0, 0}, + {0, 0}, + {0, 1}, + }) + checkContiguous(t, newLocater, + []Length{}, + Extent{1, 1}, + 0, + []Extent{}) + checkContiguous(t, newLocater, + []Length{0}, + Extent{1, 1}, + 0, + []Extent{}) } func TestIndexLocateIter(t *testing.T) {