From 5c818c3fb3d481288493e203535b393c06431012 Mon Sep 17 00:00:00 2001
From: Matt Joiner <anacrolix@gmail.com>
Date: Sun, 24 Mar 2024 21:55:30 +1100
Subject: [PATCH] Pad v1 piece hashes for v2 files

---
 metainfo/piece.go       | 22 ++++++++--
 peerconn.go             | 10 ++++-
 piece.go                |  7 ++--
 storage/file_test.go    |  1 +
 storage/issue95_test.go | 17 ++++----
 storage/issue96_test.go |  1 +
 torrent.go              | 90 ++++++++++++++++++++++++++++-------------
 zero-reader.go          | 11 +++++
 8 files changed, 112 insertions(+), 47 deletions(-)
 create mode 100644 zero-reader.go

diff --git a/metainfo/piece.go b/metainfo/piece.go
index c1c71221..c4101a6d 100644
--- a/metainfo/piece.go
+++ b/metainfo/piece.go
@@ -35,10 +35,26 @@ func (p Piece) Length() int64 {
 		}
 		return ret
 	}
-	if p.i == p.Info.NumPieces()-1 {
-		return p.Info.TotalLength() - int64(p.i)*p.Info.PieceLength
+	return p.V1Length()
+}
+
+func (p Piece) V1Length() int64 {
+	i := p.i
+	lastPiece := p.Info.NumPieces() - 1
+	switch {
+	case 0 <= i && i < lastPiece:
+		return p.Info.PieceLength
+	case lastPiece >= 0 && i == lastPiece:
+		files := p.Info.UpvertedFiles()
+		lastFile := files[len(files)-1]
+		length := lastFile.TorrentOffset + lastFile.Length - int64(i)*p.Info.PieceLength
+		if length <= 0 || length > p.Info.PieceLength {
+			panic(length)
+		}
+		return length
+	default:
+		panic(i)
 	}
-	return p.Info.PieceLength
 }
 
 func (p Piece) Offset() int64 {
diff --git a/peerconn.go b/peerconn.go
index 6aa8f210..df71abc9 100644
--- a/peerconn.go
+++ b/peerconn.go
@@ -1168,7 +1168,15 @@ func (c *PeerConn) pexEvent(t pexEventType) (_ pexEvent, err error) {
 }
 
 func (pc *PeerConn) String() string {
-	return fmt.Sprintf("%T %p [id=%+q, exts=%v, v=%q]", pc, pc, pc.PeerID, pc.PeerExtensionBytes, pc.PeerClientName.Load())
+	return fmt.Sprintf(
+		"%T %p [flags=%v id=%+q, exts=%v, v=%q]",
+		pc,
+		pc,
+		pc.connectionFlags(),
+		pc.PeerID,
+		pc.PeerExtensionBytes,
+		pc.PeerClientName.Load(),
+	)
 }
 
 // Returns the pieces the peer could have based on their claims. If we don't know how many pieces
diff --git a/piece.go b/piece.go
index c65ba9a0..73d1e0c1 100644
--- a/piece.go
+++ b/piece.go
@@ -11,14 +11,13 @@ import (
 	"github.com/anacrolix/torrent/metainfo"
 	pp "github.com/anacrolix/torrent/peer_protocol"
 	"github.com/anacrolix/torrent/storage"
-	infohash_v2 "github.com/anacrolix/torrent/types/infohash-v2"
 )
 
 type Piece struct {
 	// The completed piece SHA1 hash, from the metainfo "pieces" field. Nil if the info is not V1
 	// compatible.
 	hash   *metainfo.Hash
-	hashV2 g.Option[infohash_v2.T]
+	hashV2 g.Option[[32]byte]
 	t      *Torrent
 	index  pieceIndex
 	files  []*File
@@ -52,7 +51,7 @@ func (p *Piece) String() string {
 }
 
 func (p *Piece) Info() metainfo.Piece {
-	return p.t.info.Piece(int(p.index))
+	return p.t.info.Piece(p.index)
 }
 
 func (p *Piece) Storage() storage.Piece {
@@ -60,7 +59,7 @@ func (p *Piece) Storage() storage.Piece {
 	if p.hash != nil {
 		pieceHash.Set(p.hash.Bytes())
 	} else if p.hashV2.Ok {
-		pieceHash.Set(p.hashV2.Value.Bytes())
+		pieceHash.Set(p.hashV2.Value[:])
 	}
 	return p.t.storage.PieceWithHash(p.Info(), pieceHash)
 }
diff --git a/storage/file_test.go b/storage/file_test.go
index a6c69fa2..aada9193 100644
--- a/storage/file_test.go
+++ b/storage/file_test.go
@@ -22,6 +22,7 @@ func TestShortFile(t *testing.T) {
 		Name:        "a",
 		Length:      2,
 		PieceLength: missinggo.MiB,
+		Pieces:      make([]byte, 20),
 	}
 	ts, err := s.OpenTorrent(info, metainfo.Hash{})
 	assert.NoError(t, err)
diff --git a/storage/issue95_test.go b/storage/issue95_test.go
index 61acf17e..5b484b30 100644
--- a/storage/issue95_test.go
+++ b/storage/issue95_test.go
@@ -13,22 +13,19 @@ import (
 // Two different torrents opened from the same storage. Closing one should not
 // break the piece completion on the other.
 func testIssue95(t *testing.T, ci ClientImpl) {
-	i1 := &metainfo.Info{
-		Files:  []metainfo.FileInfo{{Path: []string{"a"}}},
-		Pieces: make([]byte, 20),
+	info := metainfo.Info{
+		Files:       []metainfo.FileInfo{{Path: []string{"a"}, Length: 1}},
+		Pieces:      make([]byte, 20),
+		PieceLength: 1,
 	}
 	c := NewClient(ci)
-	t1, err := c.OpenTorrent(i1, metainfo.HashBytes([]byte("a")))
+	t1, err := c.OpenTorrent(&info, metainfo.HashBytes([]byte("a")))
 	require.NoError(t, err)
 	defer t1.Close()
-	i2 := &metainfo.Info{
-		Files:  []metainfo.FileInfo{{Path: []string{"a"}}},
-		Pieces: make([]byte, 20),
-	}
-	t2, err := c.OpenTorrent(i2, metainfo.HashBytes([]byte("b")))
+	t2, err := c.OpenTorrent(&info, metainfo.HashBytes([]byte("b")))
 	require.NoError(t, err)
 	defer t2.Close()
-	t2p := t2.Piece(i2.Piece(0))
+	t2p := t2.Piece(info.Piece(0))
 	assert.NoError(t, t1.Close())
 	assert.NotPanics(t, func() { t2p.Completion() })
 }
diff --git a/storage/issue96_test.go b/storage/issue96_test.go
index d9857881..cac5d96b 100644
--- a/storage/issue96_test.go
+++ b/storage/issue96_test.go
@@ -17,6 +17,7 @@ func testMarkedCompleteMissingOnRead(t *testing.T, csf func(string) ClientImplCl
 	info := &metainfo.Info{
 		PieceLength: 1,
 		Files:       []metainfo.FileInfo{{Path: []string{"a"}, Length: 1}},
+		Pieces:      make([]byte, 20),
 	}
 	ts, err := cs.OpenTorrent(info, metainfo.Hash{})
 	require.NoError(t, err)
diff --git a/torrent.go b/torrent.go
index 39bffd77..e0ce8184 100644
--- a/torrent.go
+++ b/torrent.go
@@ -1153,10 +1153,7 @@ func (t *Torrent) hashPiece(piece pieceIndex) (
 	p.waitNoPendingWrites()
 	storagePiece := p.Storage()
 
-	var h hash.Hash
 	if p.hash != nil {
-		h = pieceHash.New()
-
 		// Does the backend want to do its own hashing?
 		if i, ok := storagePiece.PieceImpl.(storage.SelfHashing); ok {
 			var sum metainfo.Hash
@@ -1167,12 +1164,37 @@ func (t *Torrent) hashPiece(piece pieceIndex) (
 			// in pieceHasher regardless.
 			return
 		}
-
+		h := pieceHash.New()
+		differingPeers, err = t.hashPieceWithSpecificHash(piece, h, t.info.FilesArePieceAligned())
+		var sum [20]byte
+		n := len(h.Sum(sum[:0]))
+		if n != 20 {
+			panic(n)
+		}
+		correct = sum == *p.hash
 	} else if p.hashV2.Ok {
-		h = merkle.NewHash()
+		h := merkle.NewHash()
+		differingPeers, err = t.hashPieceWithSpecificHash(piece, h, false)
+		var sum [32]byte
+		n := len(h.Sum(sum[:0]))
+		if n != 32 {
+			panic(n)
+		}
+		correct = sum == p.hashV2.Value
 	} else {
 		panic("no hash")
 	}
+	return
+}
+
+func (t *Torrent) hashPieceWithSpecificHash(piece pieceIndex, h hash.Hash, padV1 bool) (
+	// These are peers that sent us blocks that differ from what we hash here.
+	differingPeers map[bannableAddr]struct{},
+	err error,
+) {
+	p := t.piece(piece)
+	p.waitNoPendingWrites()
+	storagePiece := p.Storage()
 
 	const logPieceContents = false
 	smartBanWriter := t.smartBanBlockCheckingWriter(piece)
@@ -1181,32 +1203,37 @@ func (t *Torrent) hashPiece(piece pieceIndex) (
 	if logPieceContents {
 		writers = append(writers, &examineBuf)
 	}
-	var written int64
-	written, err = storagePiece.WriteTo(io.MultiWriter(writers...))
-	if err == nil && written != int64(p.length()) {
-		err = fmt.Errorf("wrote %v bytes from storage, piece has length %v", written, p.length())
-	}
-	if logPieceContents {
-		t.logger.WithDefaultLevel(log.Debug).Printf("hashed %q with copy err %v", examineBuf.Bytes(), err)
+	multiWriter := io.MultiWriter(writers...)
+	{
+		var written int64
+		written, err = storagePiece.WriteTo(multiWriter)
+		if err == nil && written != int64(p.length()) {
+			err = fmt.Errorf("wrote %v bytes from storage, piece has length %v", written, p.length())
+			// Skip smart banning since we can't blame them for storage issues. A short write would
+			// ban peers for all recorded blocks that weren't just written.
+			return
+		}
 	}
+	// Flush before writing padding, since we would not have recorded the padding blocks.
 	smartBanWriter.Flush()
 	differingPeers = smartBanWriter.badPeers
-	if p.hash != nil {
-		var sum [20]byte
-		n := len(h.Sum(sum[:0]))
-		if n != 20 {
-			panic(n)
-		}
-		correct = sum == *p.hash
-	} else if p.hashV2.Ok {
-		var sum [32]byte
-		n := len(h.Sum(sum[:0]))
-		if n != 32 {
-			panic(n)
+	// For a hybrid torrent, we work with the v2 files, but if we use a v1 hash, we can assume that
+	// the pieces are padded with zeroes.
+	if padV1 {
+		paddingLen := p.Info().V1Length() - p.Info().Length()
+		written, err := io.CopyN(multiWriter, zeroReader, paddingLen)
+		if written != paddingLen {
+			panic(fmt.Sprintf(
+				"piece %v: wrote %v bytes of padding, expected %v, error: %v",
+				piece,
+				written,
+				paddingLen,
+				err,
+			))
 		}
-		correct = sum == p.hashV2.Value
-	} else {
-		panic("no hash")
+	}
+	if logPieceContents {
+		t.logger.WithNames("hashing").Levelf(log.Debug, "hashed %q with copy err %v", examineBuf.Bytes(), err)
 	}
 	return
 }
@@ -2420,7 +2447,12 @@ func (t *Torrent) pieceHashed(piece pieceIndex, passed bool, hashIoErr error) {
 					// single peer for a piece, and we never progress that piece to completion, we
 					// will never smart-ban them. Discovered in
 					// https://github.com/anacrolix/torrent/issues/715.
-					t.logger.Levelf(log.Warning, "banning %v for being sole dirtier of piece %v after failed piece check", c, piece)
+					t.logger.Levelf(
+						log.Warning,
+						"banning %v for being sole dirtier of piece %v after failed piece check",
+						c,
+						piece,
+					)
 					c.ban()
 				}
 			}
@@ -2543,7 +2575,7 @@ func (t *Torrent) pieceHasher(index pieceIndex) {
 	switch copyErr {
 	case nil, io.EOF:
 	default:
-		t.logger.Levelf(
+		t.logger.WithNames("hashing").Levelf(
 			log.Warning,
 			"error hashing piece %v: %v", index, copyErr)
 	}
diff --git a/zero-reader.go b/zero-reader.go
new file mode 100644
index 00000000..1d0a899b
--- /dev/null
+++ b/zero-reader.go
@@ -0,0 +1,11 @@
+package torrent
+
+var zeroReader zeroReaderType
+
+type zeroReaderType struct{}
+
+func (me zeroReaderType) Read(b []byte) (n int, err error) {
+	clear(b)
+	n = len(b)
+	return
+}
-- 
2.51.0