- https://www.bittorrent.org/beps/bep_0055.html
- https://github.com/anacrolix/torrent/issues/685
- https://stackoverflow.com/questions/38786438/libutp-%C2%B5tp-and-nat-traversal-udp-hole-punching
+
+### BitTorrent v2
+
+- https://www.bittorrent.org/beps/bep_0052.html
+
+The canonical infohash to use for a torrent will be the v1 infohash, or the short form of the v2 infohash if v1 is not supported. This will apply everywhere that both infohashes are present. If only one 20 byte hash is present, it is always the v1 hash (except in code that interfaces with things that only work with 20 byte hashes, like the DHT).
\ No newline at end of file
"github.com/anacrolix/torrent/metainfo"
"github.com/anacrolix/torrent/mse"
pp "github.com/anacrolix/torrent/peer_protocol"
+ infohash_v2 "github.com/anacrolix/torrent/types/infohash-v2"
request_strategy "github.com/anacrolix/torrent/request-strategy"
"github.com/anacrolix/torrent/storage"
"github.com/anacrolix/torrent/tracker"
}
t = &Torrent{
- cl: cl,
- infoHash: opts.InfoHash,
+ cl: cl,
+ infoHash: opts.InfoHash,
+ infoHashV2: opts.InfoHashV2,
peers: prioritizedPeers{
om: gbtree.New(32),
getPrio: func(p PeerInfo) peerPriority {
}
type AddTorrentOpts struct {
- InfoHash infohash.T
- Storage storage.ClientImpl
- ChunkSize pp.Integer
- InfoBytes []byte
+ InfoHash infohash.T
+ InfoHashV2 g.Option[infohash_v2.T]
+ Storage storage.ClientImpl
+ ChunkSize pp.Integer
+ InfoBytes []byte
}
// Add or merge a torrent spec. Returns new if the torrent wasn't already in the client. See also
// Torrent.MergeSpec.
func (cl *Client) AddTorrentSpec(spec *TorrentSpec) (t *Torrent, new bool, err error) {
t, new = cl.AddTorrentOpt(AddTorrentOpts{
- InfoHash: spec.InfoHash,
- Storage: spec.Storage,
- ChunkSize: spec.ChunkSize,
+ InfoHash: spec.InfoHash,
+ InfoHashV2: spec.InfoHashV2,
+ Storage: spec.Storage,
+ ChunkSize: spec.ChunkSize,
})
modSpec := *spec
if new {
t.maybeNewConns()
t.dataDownloadDisallowed.SetBool(spec.DisallowDataDownload)
t.dataUploadDisallowed = spec.DisallowDataUpload
- return nil
+ return t.AddPieceLayers(spec.PieceLayers)
}
func (cl *Client) dropTorrent(infoHash metainfo.Hash, wg *sync.WaitGroup) (err error) {
--- /dev/null
+// This is an alternate to cmd/torrent which has become bloated with awful argument parsing. Since
+// this is my most complicated binary, I will try to build something that satisfies only what I need
+// here.
+package main
+
+import (
+ "github.com/anacrolix/torrent/metainfo"
+ "os"
+)
+
+type argError struct {
+ err error
+}
+
+func assertOk(err error) {
+ if err != nil {
+ panic(err)
+ }
+}
+
+func bail(str string) {
+ panic(str)
+}
+
+func main() {
+ args := os.Args[1:]
+ map[string]func(){
+ "metainfo": func() {
+ map[string]func(){
+ "validate-v2": func() {
+ mi, err := metainfo.LoadFromFile(args[2])
+ assertOk(err)
+ info, err := mi.UnmarshalInfo()
+ assertOk(err)
+ if !info.HasV2() {
+ bail("not a v2 torrent")
+ }
+ err = metainfo.ValidatePieceLayers(mi.PieceLayers, &info.FileTree, info.PieceLength)
+ assertOk(err)
+ },
+ }[args[1]]()
+ },
+ }[args[0]]()
+}
package torrent
import (
+ "crypto/sha256"
"github.com/RoaringBitmap/roaring"
+ g "github.com/anacrolix/generics"
"github.com/anacrolix/missinggo/v2/bitmap"
"github.com/anacrolix/torrent/metainfo"
fi metainfo.FileInfo
displayPath string
prio piecePriority
+ piecesRoot g.Option[[sha256.Size]byte]
+}
+
+func (f *File) String() string {
+ return f.Path()
}
func (f *File) Torrent() *Torrent {
}
// The FileInfo from the metainfo.Info to which this file corresponds.
-func (f File) FileInfo() metainfo.FileInfo {
+func (f *File) FileInfo() metainfo.FileInfo {
return f.fi
}
// The file's path components joined by '/'.
-func (f File) Path() string {
+func (f *File) Path() string {
return f.path
}
}
return pieceIndex((f.offset + f.length + int64(f.t.usualPieceSize()) - 1) / int64(f.t.usualPieceSize()))
}
+
+func (f *File) numPieces() int {
+ return f.EndPieceIndex() - f.BeginPieceIndex()
+}
module github.com/anacrolix/torrent
-go 1.21.4
-
-toolchain go1.21.7
+go 1.22
require (
github.com/RoaringBitmap/roaring v1.2.3
--- /dev/null
+package merkle
+
+import (
+ "crypto/sha256"
+ "fmt"
+ g "github.com/anacrolix/generics"
+ "math/bits"
+)
+
+func Root(hashes [][sha256.Size]byte) [sha256.Size]byte {
+ if len(hashes) <= 1 {
+ return hashes[0]
+ }
+ numHashes := uint(len(hashes))
+ if numHashes != RoundUpToPowerOfTwo(uint(len(hashes))) {
+ panic(fmt.Sprintf("expected power of two number of hashes, got %d", numHashes))
+ }
+ var next [][sha256.Size]byte
+ for i := 0; i < len(hashes); i += 2 {
+ left := hashes[i]
+ right := hashes[i+1]
+ h := sha256.Sum256(append(left[:], right[:]...))
+ next = append(next, h)
+ }
+ return Root(next)
+}
+
+func CompactLayerToSliceHashes(compactLayer string) (hashes [][sha256.Size]byte, err error) {
+ g.MakeSliceWithLength(&hashes, len(compactLayer)/sha256.Size)
+ for i := range hashes {
+ n := copy(hashes[i][:], compactLayer[i*sha256.Size:])
+ if n != sha256.Size {
+ err = fmt.Errorf("compact layer has incomplete hash at index %d", i)
+ return
+ }
+ }
+ return
+}
+
+func RoundUpToPowerOfTwo(n uint) (ret uint) {
+ return 1 << bits.Len(n-1)
+}
--- /dev/null
+package metainfo
+
+import (
+ "fmt"
+ "github.com/anacrolix/torrent/merkle"
+)
+
+func ValidatePieceLayers(
+ pieceLayers map[string]string,
+ fileTree *FileTree,
+ pieceLength int64,
+) (err error) {
+ fileTree.Walk(nil, func(path []string, ft *FileTree) {
+ if err != nil {
+ return
+ }
+ if ft.IsDir() {
+ return
+ }
+ piecesRoot := ft.PiecesRootAsByteArray()
+ if !piecesRoot.Ok {
+ return
+ }
+ filePieceLayers, ok := pieceLayers[string(piecesRoot.Value[:])]
+ if !ok {
+ // BEP 52: "For each file in the file tree that is larger than the piece size it
+ // contains one string value.". The reference torrent creator in
+ // https://blog.libtorrent.org/2020/09/bittorrent-v2/ also has this. I'm not sure what
+ // harm it causes if it's present anyway, possibly it won't be useful to us.
+ if ft.File.Length > pieceLength {
+ err = fmt.Errorf("no piece layers for file %q", path)
+ }
+ return
+ }
+ var layerHashes [][32]byte
+ layerHashes, err = merkle.CompactLayerToSliceHashes(filePieceLayers)
+ padHash := HashForPiecePad(pieceLength)
+ for uint(len(layerHashes)) < merkle.RoundUpToPowerOfTwo(uint(len(layerHashes))) {
+ layerHashes = append(layerHashes, padHash)
+ }
+ var root [32]byte
+ root = merkle.Root(layerHashes)
+ if root != piecesRoot.Value {
+ err = fmt.Errorf("file %q: expected hash %x got %x", path, piecesRoot.Value, root)
+ return
+ }
+ })
+ return
+}
+
+// Returns the padding hash for the hash layer corresponding to a piece. It can't be zero because
+// that's the bottom-most layer (the hashes for the smallest blocks).
+func HashForPiecePad(pieceLength int64) (hash [32]byte) {
+ // This should be a power of two, and probably checked elsewhere.
+ blocksPerPiece := pieceLength / (1 << 14)
+ blockHashes := make([][32]byte, blocksPerPiece)
+ return merkle.Root(blockHashes)
+}
package metainfo
+import (
+ g "github.com/anacrolix/generics"
+ "github.com/anacrolix/torrent/bencode"
+ "golang.org/x/exp/maps"
+ "sort"
+)
+
+const FileTreePropertiesKey = ""
+
type FileTree struct {
File struct {
Length int64 `bencode:"length"`
}
Dir map[string]FileTree
}
+
+func (ft *FileTree) UnmarshalBencode(bytes []byte) (err error) {
+ var dir map[string]bencode.Bytes
+ err = bencode.Unmarshal(bytes, &dir)
+ if err != nil {
+ return
+ }
+ if propBytes, ok := dir[""]; ok {
+ err = bencode.Unmarshal(propBytes, &ft.File)
+ if err != nil {
+ return
+ }
+ }
+ delete(dir, "")
+ g.MakeMapWithCap(&ft.Dir, len(dir))
+ for key, bytes := range dir {
+ var sub FileTree
+ err = sub.UnmarshalBencode(bytes)
+ if err != nil {
+ return
+ }
+ ft.Dir[key] = sub
+ }
+ return
+}
+
+var _ bencode.Unmarshaler = (*FileTree)(nil)
+
+func (ft *FileTree) NumEntries() (num int) {
+ num = len(ft.Dir)
+ if g.MapContains(ft.Dir, FileTreePropertiesKey) {
+ num--
+ }
+ return
+}
+
+func (ft *FileTree) IsDir() bool {
+ return ft.NumEntries() != 0
+}
+
+func (ft *FileTree) orderedKeys() []string {
+ keys := maps.Keys(ft.Dir)
+ sort.Strings(keys)
+ return keys
+}
+
+func (ft *FileTree) UpvertedFiles(path []string, out func(fi FileInfo)) {
+ if ft.IsDir() {
+ for _, key := range ft.orderedKeys() {
+ if key == FileTreePropertiesKey {
+ continue
+ }
+ sub := g.MapMustGet(ft.Dir, key)
+ sub.UpvertedFiles(append(path, key), out)
+ }
+ } else {
+ out(FileInfo{
+ Length: ft.File.Length,
+ Path: append([]string(nil), path...),
+ // BEP 52 requires paths be UTF-8 if possible.
+ PathUtf8: append([]string(nil), path...),
+ PiecesRoot: ft.PiecesRootAsByteArray(),
+ })
+ }
+}
+
+func (ft *FileTree) Walk(path []string, f func(path []string, ft *FileTree)) {
+ f(path, ft)
+ for key, sub := range ft.Dir {
+ if key == FileTreePropertiesKey {
+ continue
+ }
+ sub.Walk(append(path, key), f)
+ }
+}
+
+func (ft *FileTree) PiecesRootAsByteArray() (ret g.Option[[32]byte]) {
+ if ft.File.Length == 0 {
+ return
+ }
+ n := copy(ret.Value[:], ft.File.PiecesRoot)
+ if n != 32 {
+ // Must be 32 bytes for meta version 2 and non-empty files. See BEP 52.
+ panic(n)
+ }
+ ret.Ok = true
+ return
+}
package metainfo
-import "strings"
+import (
+ g "github.com/anacrolix/generics"
+ "strings"
+)
// Information specific to a single file inside the MetaInfo structure.
type FileInfo struct {
PathUtf8 []string `bencode:"path.utf-8,omitempty"`
ExtendedFileAttrs
+
+ // BEP 52. This isn't encoded in a v1 FileInfo, but is exposed here for APIs that expect to deal
+ // v1 files.
+ PiecesRoot g.Option[[32]byte] `bencode:"-"`
}
func (fi *FileInfo) DisplayPath(info *Info) string {
}
func (info *Info) TotalLength() (ret int64) {
- if info.IsDir() {
- for _, fi := range info.Files {
- ret += fi.Length
- }
- } else {
- ret = info.Length
+ for _, fi := range info.UpvertedFiles() {
+ ret += fi.Length
}
return
}
-func (info *Info) NumPieces() int {
+func (info *Info) NumPieces() (num int) {
+ if info.HasV2() {
+ info.FileTree.Walk(nil, func(path []string, ft *FileTree) {
+ num += int((ft.File.Length + info.PieceLength - 1) / info.PieceLength)
+ })
+ return
+ }
return len(info.Pieces) / 20
}
+// Whether all files share the same top-level directory name. If they don't, Info.Name is usually used.
func (info *Info) IsDir() bool {
+ if info.HasV2() {
+ return info.FileTree.IsDir()
+ }
+ // I wonder if we should check for the existence of Info.Length here instead.
return len(info.Files) != 0
}
// The files field, converted up from the old single-file in the parent info
// dict if necessary. This is a helper to avoid having to conditionally handle
// single and multi-file torrent infos.
-func (info *Info) UpvertedFiles() []FileInfo {
+func (info *Info) UpvertedFiles() (files []FileInfo) {
+ if info.HasV2() {
+ info.FileTree.UpvertedFiles(nil, func(fi FileInfo) {
+ files = append(files, fi)
+ })
+ return
+ }
if len(info.Files) == 0 {
return []FileInfo{{
Length: info.Length,
}
return info.Name
}
+
+// Whether the Info can be used as a v2 info dict, including having a V2 infohash.
+func (info *Info) HasV2() bool {
+ return info.MetaVersion == 2
+}
+
+func (info *Info) HasV1() bool {
+ // See Upgrade Path in BEP 52.
+ return info.MetaVersion == 0 || info.MetaVersion == 1 || info.Files != nil || info.Length != 0 || len(info.Pieces) != 0
+}
+
+func (info *Info) FilesArePieceAligned() bool {
+ return info.HasV2()
+}
return
}
-func (mi MetaInfo) HashInfoBytes() (infoHash Hash) {
+func (mi *MetaInfo) HashInfoBytes() (infoHash Hash) {
return HashBytes(mi.InfoBytes)
}
package metainfo
import (
+ "github.com/davecgh/go-spew/spew"
"io"
"os"
"path"
err := bencode.Unmarshal([]byte("d5:nodes0:e"), &mi)
c.Assert(err, qt.IsNil)
}
+
+func TestUnmarshalV2Metainfo(t *testing.T) {
+ c := qt.New(t)
+ mi, err := LoadFromFile("../testdata/bittorrent-v2-test.torrent")
+ c.Assert(err, qt.IsNil)
+ info, err := mi.UnmarshalInfo()
+ c.Assert(err, qt.IsNil)
+ spew.Dump(info)
+ c.Check(info.NumPieces(), qt.Not(qt.Equals), 0)
+ err = ValidatePieceLayers(mi.PieceLayers, &info.FileTree, info.PieceLength)
+ c.Check(err, qt.IsNil)
+}
if info.TotalLength() != 0 {
return errors.New("zero piece length")
}
- } else {
+ } else if !info.HasV2() {
+ // TotalLength returns different values for V1 and V2 depending on whether v1 pad files are
+ // counted. Split the interface into several methods?
if int((info.TotalLength()+info.PieceLength-1)/info.PieceLength) != info.NumPieces() {
return errors.New("piece count and file lengths are at odds")
}
import (
"fmt"
+ g "github.com/anacrolix/generics"
+ infohash_v2 "github.com/anacrolix/torrent/types/infohash-v2"
"sync"
"github.com/anacrolix/chansync"
)
type Piece struct {
- // The completed piece SHA1 hash, from the metainfo "pieces" field.
- hash *metainfo.Hash
- t *Torrent
- index pieceIndex
- files []*File
+ // The completed piece SHA1 hash, from the metainfo "pieces" field. Nil if the info is not V1
+ // compatible.
+ hash *metainfo.Hash
+ hashV2 g.Option[infohash_v2.T]
+ t *Torrent
+ index pieceIndex
+ files []*File
readerCond chansync.BroadcastCond
}
func (p *Piece) torrentEndOffset() int64 {
- return p.torrentBeginOffset() + int64(p.length())
+ return p.torrentBeginOffset() + int64(p.t.usualPieceSize())
}
func (p *Piece) SetPriority(prio piecePriority) {
func (p *Piece) availability() int {
return len(p.t.connsWithAllPieces) + p.relativeAvailability
}
+
+// For v2 torrents, files are aligned to pieces so there should always only be a single file for a
+// given piece.
+func (p *Piece) mustGetOnlyFile() *File {
+ if len(p.files) != 1 {
+ panic(len(p.files))
+ }
+ return p.files[0]
+}
import (
"fmt"
+ g "github.com/anacrolix/generics"
+ infohash_v2 "github.com/anacrolix/torrent/types/infohash-v2"
"github.com/anacrolix/torrent/metainfo"
pp "github.com/anacrolix/torrent/peer_protocol"
// The tiered tracker URIs.
Trackers [][]string
// TODO: Move into a "new" Torrent opt type.
- InfoHash metainfo.Hash
- InfoBytes []byte
+ InfoHash metainfo.Hash
+ InfoHashV2 g.Option[infohash_v2.T]
+ InfoBytes []byte
// The name to use if the Name field from the Info isn't available.
DisplayName string
// WebSeed URLs. For additional options add the URLs separately with Torrent.AddWebSeeds
PeerAddrs []string
// The combination of the "xs" and "as" fields in magnet links, for now.
Sources []string
+ // BEP 52 "piece layers" from metainfo
+ PieceLayers map[string]string
// The chunk size to use for outbound requests. Defaults to 16KiB if not set. Can only be set
// for new Torrents. TODO: Move into a "new" Torrent opt type.
if err != nil {
err = fmt.Errorf("unmarshalling info: %w", err)
}
+ var v2Infohash g.Option[infohash_v2.T]
+ if info.HasV2() {
+ v2Infohash.Set(infohash_v2.HashBytes(mi.InfoBytes))
+ }
return &TorrentSpec{
Trackers: mi.UpvertedAnnounceList(),
InfoHash: mi.HashInfoBytes(),
+ InfoHashV2: v2Infohash,
+ PieceLayers: mi.PieceLayers,
InfoBytes: mi.InfoBytes,
DisplayName: info.Name,
Webseeds: mi.UrlList,
}
func (t *Torrent) initFiles() {
+ info := t.info
var offset int64
t.files = new([]*File)
for _, fi := range t.info.UpvertedFiles() {
*t.files = append(*t.files, &File{
t,
- strings.Join(append([]string{t.info.BestName()}, fi.BestPath()...), "/"),
+ strings.Join(append([]string{info.BestName()}, fi.BestPath()...), "/"),
offset,
fi.Length,
fi,
- fi.DisplayPath(t.info),
+ fi.DisplayPath(info),
PiecePriorityNone,
+ fi.PiecesRoot,
})
offset += fi.Length
+ if info.FilesArePieceAligned() {
+ offset = (offset + info.PieceLength - 1) / info.PieceLength * info.PieceLength
+ }
}
}
"crypto/sha1"
"errors"
"fmt"
+ "github.com/anacrolix/torrent/merkle"
+ infohash_v2 "github.com/anacrolix/torrent/types/infohash-v2"
"io"
"math/rand"
"net/netip"
dataUploadDisallowed bool
userOnWriteChunkErr func(error)
- closed chansync.SetOnce
- onClose []func()
- infoHash metainfo.Hash
- pieces []Piece
+ closed chansync.SetOnce
+ onClose []func()
+
+ infoHash metainfo.Hash
+ infoHashV2 g.Option[infohash_v2.T]
+
+ pieces []Piece
// The order pieces are requested if there's no stronger reason like availability or priority.
pieceRequestOrder []int
return len(t.metadataBytes)
}
-func infoPieceHashes(info *metainfo.Info) (ret [][]byte) {
- for i := 0; i < len(info.Pieces); i += sha1.Size {
- ret = append(ret, info.Pieces[i:i+sha1.Size])
- }
- return
-}
-
func (t *Torrent) makePieces() {
- hashes := infoPieceHashes(t.info)
- t.pieces = make([]Piece, len(hashes))
- for i, hash := range hashes {
+ t.pieces = make([]Piece, t.info.NumPieces())
+ for i := range t.pieces {
piece := &t.pieces[i]
piece.t = t
- piece.index = pieceIndex(i)
+ piece.index = i
piece.noPendingWrites.L = &piece.pendingWritesMutex
- piece.hash = (*metainfo.Hash)(unsafe.Pointer(&hash[0]))
+ if t.info.HasV1() {
+ piece.hash = (*metainfo.Hash)(unsafe.Pointer(
+ unsafe.SliceData(t.info.Pieces[i*sha1.Size : (i+1)*sha1.Size])))
+ }
files := *t.files
beginFile := pieceFirstFileIndex(piece.torrentBeginOffset(), files)
endFile := pieceEndFileIndex(piece.torrentEndOffset(), files)
piece.files = files[beginFile:endFile]
+ if t.info.FilesArePieceAligned() {
+ numFiles := len(piece.files)
+ if numFiles != 1 {
+ panic(fmt.Sprintf("%v:%v", beginFile, endFile))
+ }
+ }
+ }
+}
+
+func (t *Torrent) AddPieceLayers(layers map[string]string) (err error) {
+ if layers == nil {
+ return
+ }
+ for _, f := range *t.files {
+ if !f.piecesRoot.Ok {
+ err = fmt.Errorf("no piece root set for file %v", f)
+ return
+ }
+ compactLayer, ok := layers[string(f.piecesRoot.Value[:])]
+ if !ok {
+ continue
+ }
+ var hashes [][32]byte
+ hashes, err = merkle.CompactLayerToSliceHashes(compactLayer)
+ if err != nil {
+ err = fmt.Errorf("bad piece layers for file %q: %w", f, err)
+ return
+ }
+ if len(hashes) != f.numPieces() {
+ err = fmt.Errorf("file %q: got %v hashes expected %v", f, len(hashes), f.numPieces())
+ return
+ }
+ for i := range f.numPieces() {
+ p := t.piece(f.BeginPieceIndex() + i)
+ p.hashV2.Set(hashes[i])
+ }
}
+ return nil
}
// Returns the index of the first file containing the piece. files must be
// ordered by offset.
func pieceEndFileIndex(pieceEndOffset int64, files []*File) int {
for i, f := range files {
- if f.offset+f.length >= pieceEndOffset {
- return i + 1
+ if f.offset >= pieceEndOffset {
+ return i
}
}
- return 0
+ return len(files)
}
func (t *Torrent) cacheLength() {
// There will be no variance amongst pieces. Only pain.
return 0
}
+ if t.info.FilesArePieceAligned() {
+ p := t.piece(piece)
+ file := p.mustGetOnlyFile()
+ if piece == file.EndPieceIndex()-1 {
+ return pp.Integer(file.length - (p.torrentBeginOffset() - file.offset))
+ }
+ return pp.Integer(t.usualPieceSize())
+ }
if piece == t.numPieces()-1 {
ret := pp.Integer(t.length() % t.info.PieceLength)
if ret != 0 {
func (t *Torrent) queuePieceCheck(pieceIndex pieceIndex) {
piece := t.piece(pieceIndex)
+ if piece.hash == nil && !piece.hashV2.Ok {
+ return
+ }
if piece.queuedForHash() {
return
}