From: Matt Joiner <anacrolix@gmail.com>
Date: Mon, 25 Mar 2024 02:35:35 +0000 (+1100)
Subject: Support v2 hashes on files that don't have piece layers
X-Git-Tag: v1.56.0~29
X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=60d79eb5a1fc85f574f38e28015fffcf0860ee32;p=btrtrc.git

Support v2 hashes on files that don't have piece layers
---

diff --git a/merkle/hash.go b/merkle/hash.go
index 5984547a..ad3b55a3 100644
--- a/merkle/hash.go
+++ b/merkle/hash.go
@@ -16,11 +16,12 @@ func NewHash() *Hash {
 type Hash struct {
 	blocks    [][32]byte
 	nextBlock hash.Hash
-	written   int
+	// How many bytes have been written to nextBlock so far.
+	nextBlockWritten int
 }
 
 func (h *Hash) remaining() int {
-	return BlockSize - h.written
+	return BlockSize - h.nextBlockWritten
 }
 
 func (h *Hash) Write(p []byte) (n int, err error) {
@@ -28,12 +29,12 @@ func (h *Hash) Write(p []byte) (n int, err error) {
 		var n1 int
 		n1, err = h.nextBlock.Write(p[:min(len(p), h.remaining())])
 		n += n1
-		h.written += n1
+		h.nextBlockWritten += n1
 		p = p[n1:]
 		if h.remaining() == 0 {
 			h.blocks = append(h.blocks, h.nextBlockSum())
 			h.nextBlock.Reset()
-			h.written = 0
+			h.nextBlockWritten = 0
 		}
 		if err != nil {
 			break
@@ -49,11 +50,25 @@ func (h *Hash) nextBlockSum() (sum [32]byte) {
 	return
 }
 
-func (h *Hash) Sum(b []byte) []byte {
+func (h *Hash) curBlocks() [][32]byte {
 	blocks := h.blocks
-	if h.written != 0 {
+	if h.nextBlockWritten != 0 {
 		blocks = append(blocks, h.nextBlockSum())
 	}
+	return blocks
+}
+
+func (h *Hash) Sum(b []byte) []byte {
+	sum := RootWithPadHash(h.curBlocks(), [32]byte{})
+	return append(b, sum[:]...)
+}
+
+// Sums by extending with zero hashes for blocks missing to meet the given length. Necessary for
+// piece layers hashes for file tail blocks that don't pad to the piece length.
+func (h *Hash) SumMinLength(b []byte, length int) []byte {
+	blocks := h.curBlocks()
+	minBlocks := (length + BlockSize - 1) / BlockSize
+	blocks = append(blocks, make([][32]byte, minBlocks-len(blocks))...)
 	sum := RootWithPadHash(blocks, [32]byte{})
 	return append(b, sum[:]...)
 }
@@ -61,7 +76,7 @@ func (h *Hash) Sum(b []byte) []byte {
 func (h *Hash) Reset() {
 	h.blocks = h.blocks[:0]
 	h.nextBlock.Reset()
-	h.written = 0
+	h.nextBlockWritten = 0
 }
 
 func (h *Hash) Size() int {
diff --git a/peerconn.go b/peerconn.go
index df71abc9..a8912f4a 100644
--- a/peerconn.go
+++ b/peerconn.go
@@ -1269,6 +1269,11 @@ func (pc *PeerConn) requestMissingHashes() {
 file:
 	for _, file := range info.UpvertedFiles() {
 		fileNumPieces := int((file.Length + info.PieceLength - 1) / info.PieceLength)
+		// We would be requesting the leaves, the file must be short enough that we can just do with
+		// the pieces root as the piece hash.
+		if fileNumPieces <= 1 {
+			continue
+		}
 		curFileBeginPiece := nextFileBeginPiece
 		nextFileBeginPiece += fileNumPieces
 		haveAllHashes := true
@@ -1284,11 +1289,6 @@ file:
 		if haveAllHashes {
 			continue
 		}
-		// We would be requesting the leaves, the file must be short enough that we can just do with
-		// the pieces root as the piece hash.
-		if fileNumPieces <= 1 {
-			continue
-		}
 		piecesRoot := file.PiecesRoot.Unwrap()
 		proofLayers := pp.Integer(0)
 		for index := 0; index < fileNumPieces; index += 512 {
diff --git a/piece.go b/piece.go
index 73d1e0c1..604da298 100644
--- a/piece.go
+++ b/piece.go
@@ -58,6 +58,8 @@ func (p *Piece) Storage() storage.Piece {
 	var pieceHash g.Option[[]byte]
 	if p.hash != nil {
 		pieceHash.Set(p.hash.Bytes())
+	} else if !p.hasPieceLayer() {
+		pieceHash.Set(p.mustGetOnlyFile().piecesRoot.UnwrapPtr()[:])
 	} else if p.hashV2.Ok {
 		pieceHash.Set(p.hashV2.Value[:])
 	}
@@ -292,9 +294,15 @@ func (p *Piece) setV2Hash(v2h [32]byte) {
 
 // Can't do certain things if we don't know the piece hash.
 func (p *Piece) haveHash() bool {
-	return p.hash != nil || p.hashV2.Ok
+	if p.hash != nil {
+		return true
+	}
+	if !p.hasPieceLayer() {
+		return true
+	}
+	return p.hashV2.Ok
 }
 
-func pieceStateAllowsMessageWrites(p *Piece, pc *PeerConn) bool {
-	return (pc.shouldRequestHashes() && !p.haveHash()) || !p.t.ignorePieceForRequests(p.index)
+func (p *Piece) hasPieceLayer() bool {
+	return int64(p.length()) > p.t.info.PieceLength
 }
diff --git a/torrent.go b/torrent.go
index e0ce8184..2ced6bcb 100644
--- a/torrent.go
+++ b/torrent.go
@@ -410,18 +410,6 @@ func (t *Torrent) makePieces() {
 		beginFile := pieceFirstFileIndex(piece.torrentBeginOffset(), files)
 		endFile := pieceEndFileIndex(piece.torrentEndOffset(), files)
 		piece.files = files[beginFile:endFile]
-		if t.info.FilesArePieceAligned() {
-			numFiles := len(piece.files)
-			if numFiles != 1 {
-				panic(fmt.Sprintf("%v:%v", beginFile, endFile))
-			}
-			if t.info.HasV2() {
-				file := piece.mustGetOnlyFile()
-				if file.numPieces() == 1 {
-					piece.hashV2.Set(file.piecesRoot.Unwrap())
-				}
-			}
-		}
 	}
 }
 
@@ -431,6 +419,9 @@ func (t *Torrent) addPieceLayersLocked(layers map[string]string) (errs []error)
 	}
 files:
 	for _, f := range *t.files {
+		if f.numPieces() <= 1 {
+			continue
+		}
 		if !f.piecesRoot.Ok {
 			err := fmt.Errorf("no piece root set for file %v", f)
 			errs = append(errs, err)
@@ -446,14 +437,14 @@ files:
 				errs = append(errs, err)
 				continue files
 			}
-		} else if f.length > t.info.PieceLength {
-			// BEP 52 is pretty strongly worded about this, even though we should be able to
-			// recover: If a v2 torrent is added by magnet link or infohash, we need to fetch piece
-			// layers ourselves anyway, and that's how we can recover from this.
-			t.logger.Levelf(log.Warning, "no piece layers for file %q", f)
-			continue
 		} else {
-			hashes = [][32]byte{f.piecesRoot.Value}
+			if f.length > t.info.PieceLength {
+				// BEP 52 is pretty strongly worded about this, even though we should be able to
+				// recover: If a v2 torrent is added by magnet link or infohash, we need to fetch
+				// piece layers ourselves anyway, and that's how we can recover from this.
+				t.logger.Levelf(log.Warning, "no piece layers for file %q", f)
+			}
+			continue files
 		}
 		if len(hashes) != f.numPieces() {
 			errs = append(
@@ -700,7 +691,7 @@ func (t *Torrent) pieceState(index pieceIndex) (ret PieceState) {
 	if !ret.Complete && t.piecePartiallyDownloaded(index) {
 		ret.Partial = true
 	}
-	if t.info.HasV2() && !p.hashV2.Ok {
+	if t.info.HasV2() && !p.hashV2.Ok && p.hasPieceLayer() {
 		ret.MissingPieceLayerHash = true
 	}
 	return
@@ -1165,45 +1156,65 @@ func (t *Torrent) hashPiece(piece pieceIndex) (
 			return
 		}
 		h := pieceHash.New()
-		differingPeers, err = t.hashPieceWithSpecificHash(piece, h, t.info.FilesArePieceAligned())
-		var sum [20]byte
-		n := len(h.Sum(sum[:0]))
-		if n != 20 {
-			panic(n)
+		differingPeers, err = t.hashPieceWithSpecificHash(piece, h)
+		// For a hybrid torrent, we work with the v2 files, but if we use a v1 hash, we can assume that
+		// the pieces are padded with zeroes.
+		if t.info.FilesArePieceAligned() {
+			paddingLen := p.Info().V1Length() - p.Info().Length()
+			written, err := io.CopyN(h, zeroReader, paddingLen)
+			if written != paddingLen {
+				panic(fmt.Sprintf(
+					"piece %v: wrote %v bytes of padding, expected %v, error: %v",
+					piece,
+					written,
+					paddingLen,
+					err,
+				))
+			}
 		}
+		var sum [20]byte
+		sumExactly(sum[:], h.Sum)
 		correct = sum == *p.hash
 	} else if p.hashV2.Ok {
 		h := merkle.NewHash()
-		differingPeers, err = t.hashPieceWithSpecificHash(piece, h, false)
+		differingPeers, err = t.hashPieceWithSpecificHash(piece, h)
 		var sum [32]byte
-		n := len(h.Sum(sum[:0]))
-		if n != 32 {
-			panic(n)
-		}
+		// What about the final piece in a torrent? From BEP 52: "The layer is chosen so that one
+		// hash covers piece length bytes.". Note that if a piece doesn't have a hash in piece
+		// layers it's because it's not larger than the piece length.
+		sumExactly(sum[:], func(b []byte) []byte {
+			return h.SumMinLength(b, int(t.info.PieceLength))
+		})
 		correct = sum == p.hashV2.Value
 	} else {
-		panic("no hash")
+		expected := p.mustGetOnlyFile().piecesRoot.Unwrap()
+		h := merkle.NewHash()
+		differingPeers, err = t.hashPieceWithSpecificHash(piece, h)
+		var sum [32]byte
+		// This is *not* padded to piece length.
+		sumExactly(sum[:], h.Sum)
+		correct = sum == expected
 	}
 	return
 }
 
-func (t *Torrent) hashPieceWithSpecificHash(piece pieceIndex, h hash.Hash, padV1 bool) (
+func sumExactly(dst []byte, sum func(b []byte) []byte) {
+	n := len(sum(dst[:0]))
+	if n != len(dst) {
+		panic(n)
+	}
+}
+
+func (t *Torrent) hashPieceWithSpecificHash(piece pieceIndex, h hash.Hash) (
 	// These are peers that sent us blocks that differ from what we hash here.
 	differingPeers map[bannableAddr]struct{},
 	err error,
 ) {
 	p := t.piece(piece)
-	p.waitNoPendingWrites()
 	storagePiece := p.Storage()
 
-	const logPieceContents = false
 	smartBanWriter := t.smartBanBlockCheckingWriter(piece)
-	writers := []io.Writer{h, smartBanWriter}
-	var examineBuf bytes.Buffer
-	if logPieceContents {
-		writers = append(writers, &examineBuf)
-	}
-	multiWriter := io.MultiWriter(writers...)
+	multiWriter := io.MultiWriter(h, smartBanWriter)
 	{
 		var written int64
 		written, err = storagePiece.WriteTo(multiWriter)
@@ -1217,24 +1228,6 @@ func (t *Torrent) hashPieceWithSpecificHash(piece pieceIndex, h hash.Hash, padV1
 	// Flush before writing padding, since we would not have recorded the padding blocks.
 	smartBanWriter.Flush()
 	differingPeers = smartBanWriter.badPeers
-	// For a hybrid torrent, we work with the v2 files, but if we use a v1 hash, we can assume that
-	// the pieces are padded with zeroes.
-	if padV1 {
-		paddingLen := p.Info().V1Length() - p.Info().Length()
-		written, err := io.CopyN(multiWriter, zeroReader, paddingLen)
-		if written != paddingLen {
-			panic(fmt.Sprintf(
-				"piece %v: wrote %v bytes of padding, expected %v, error: %v",
-				piece,
-				written,
-				paddingLen,
-				err,
-			))
-		}
-	}
-	if logPieceContents {
-		t.logger.WithNames("hashing").Levelf(log.Debug, "hashed %q with copy err %v", examineBuf.Bytes(), err)
-	}
 	return
 }
 
@@ -2623,7 +2616,7 @@ func (t *Torrent) queueInitialPieceCheck(i pieceIndex) {
 
 func (t *Torrent) queuePieceCheck(pieceIndex pieceIndex) {
 	piece := t.piece(pieceIndex)
-	if piece.hash == nil && !piece.hashV2.Ok {
+	if !piece.haveHash() {
 		return
 	}
 	if piece.queuedForHash() {
@@ -3233,7 +3226,8 @@ file:
 		for i := f.BeginPieceIndex(); i < f.EndPieceIndex(); i++ {
 			hashOpt := t.piece(i).hashV2
 			if !hashOpt.Ok {
-				// All hashes must be present. This implementation should handle missing files, so move on to the next file.
+				// All hashes must be present. This implementation should handle missing files, so
+				// move on to the next file.
 				continue file
 			}
 			value.Write(hashOpt.Value[:])