From: Matt Joiner Date: Tue, 23 Jul 2024 08:58:04 +0000 (+1000) Subject: Be more pedantic about trailing data in metainfo.Load and bencode.Unmarshal X-Git-Tag: v1.57.0~17 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=32c406f9fec12c66bf62c1ad6e0c38be4f425b75;p=btrtrc.git Be more pedantic about trailing data in metainfo.Load and bencode.Unmarshal Fixes https://github.com/anacrolix/torrent/issues/963. --- diff --git a/bencode/api.go b/bencode/api.go index 3c379abb..ece87eb8 100644 --- a/bencode/api.go +++ b/bencode/api.go @@ -139,12 +139,15 @@ func MustMarshal(v interface{}) []byte { // behaviour (inspired by Rust's serde here). func Unmarshal(data []byte, v interface{}) (err error) { buf := bytes.NewReader(data) - e := Decoder{r: buf} - err = e.Decode(v) - if err == nil && buf.Len() != 0 { - err = ErrUnusedTrailingBytes{buf.Len()} + dec := Decoder{r: buf} + err = dec.Decode(v) + if err != nil { + return + } + if buf.Len() != 0 { + return ErrUnusedTrailingBytes{buf.Len()} } - return + return dec.ReadEOF() } type ErrUnusedTrailingBytes struct { diff --git a/bencode/decode.go b/bencode/decode.go index ce1f8b46..d6fffdc4 100644 --- a/bencode/decode.go +++ b/bencode/decode.go @@ -69,6 +69,23 @@ func (d *Decoder) Decode(v interface{}) (err error) { return } +// Check for EOF in the decoder input stream. Used to assert the input ends on a clean message +// boundary. +func (d *Decoder) ReadEOF() error { + _, err := d.r.ReadByte() + if err == nil { + err := d.r.UnreadByte() + if err != nil { + panic(err) + } + return errors.New("expected EOF") + } + if err == io.EOF { + return nil + } + return fmt.Errorf("expected EOF, got %w", err) +} + func checkForUnexpectedEOF(err error, offset int64) { if err == io.EOF { panic(&SyntaxError{ @@ -582,8 +599,8 @@ func (d *Decoder) parseUnmarshaler(v reflect.Value) bool { return true } -// Returns true if there was a value and it's now stored in 'v', otherwise -// there was an end symbol ("e") and no value was stored. +// Returns true if there was a value and it's now stored in 'v'. Otherwise, there was an end symbol +// ("e") and no value was stored. func (d *Decoder) parseValue(v reflect.Value) (bool, error) { // we support one level of indirection at the moment if v.Kind() == reflect.Ptr { diff --git a/bencode/decode_test.go b/bencode/decode_test.go index a350beea..16fa4487 100644 --- a/bencode/decode_test.go +++ b/bencode/decode_test.go @@ -2,10 +2,12 @@ package bencode import ( "bytes" + "encoding/json" "fmt" "io" "math/big" "reflect" + "strings" "testing" qt "github.com/frankban/quicktest" @@ -265,3 +267,26 @@ func TestDecodeStringIntoBoolPtr(t *testing.T) { check("d7:private1:Fe", false, false) check("d7:private11:bunnyfoofooe", false, true) } + +// To set expectations about how our Decoder should work. +func TestJsonDecoderBehaviour(t *testing.T) { + c := qt.New(t) + test := func(input string, items int, finalErr error) { + d := json.NewDecoder(strings.NewReader(input)) + actualItems := 0 + var firstErr error + for { + var discard any + firstErr = d.Decode(&discard) + if firstErr != nil { + break + } + actualItems++ + } + c.Check(firstErr, qt.Equals, finalErr) + c.Check(actualItems, qt.Equals, items) + } + test("", 0, io.EOF) + test("{}", 1, io.EOF) + test("{} {", 1, io.ErrUnexpectedEOF) +} diff --git a/bencode/scanner.go b/bencode/scanner.go index 967d5a5d..34b85505 100644 --- a/bencode/scanner.go +++ b/bencode/scanner.go @@ -22,11 +22,28 @@ func (me *scanner) ReadByte() (byte, error) { me.unread = false return me.b[0], nil } - n, err := me.r.Read(me.b[:]) - if n == 1 { - err = nil + for { + n, err := me.r.Read(me.b[:]) + switch n { + case 0: + // io.Reader.Read says to try again if there's no error and no bytes returned. We can't + // signal that the caller should do this method's interface. + if err != nil { + return 0, err + } + panic(err) + case 1: + // There's no way to signal that the byte is valid unless error is nil. + return me.b[0], nil + default: + if err != nil { + // I can't see why Read would return more bytes than expected, but the error should + // tell us why. + panic(err) + } + panic(n) + } } - return me.b[0], err } func (me *scanner) UnreadByte() error { diff --git a/metainfo/metainfo.go b/metainfo/metainfo.go index 383b5ca9..56d4ce59 100644 --- a/metainfo/metainfo.go +++ b/metainfo/metainfo.go @@ -2,6 +2,7 @@ package metainfo import ( "bufio" + "fmt" "io" "net/url" "os" @@ -38,7 +39,11 @@ func Load(r io.Reader) (*MetaInfo, error) { if err != nil { return nil, err } - return &mi, nil + err = d.ReadEOF() + if err != nil { + err = fmt.Errorf("error after decoding metainfo: %w", err) + } + return &mi, err } // Convenience function for loading a MetaInfo from a file. diff --git a/metainfo/metainfo_test.go b/metainfo/metainfo_test.go index 4d7c2b34..6a97f911 100644 --- a/metainfo/metainfo_test.go +++ b/metainfo/metainfo_test.go @@ -48,6 +48,9 @@ func TestFile(t *testing.T) { testFile(t, "testdata/continuum.torrent") testFile(t, "testdata/23516C72685E8DB0C8F15553382A927F185C4F01.torrent") testFile(t, "testdata/trackerless.torrent") + _, err := LoadFromFile("testdata/minimal-trailing-newline.torrent") + c := qt.New(t) + c.Check(err, qt.ErrorMatches, ".*expected EOF") } // Ensure that the correct number of pieces are generated when hashing files. diff --git a/metainfo/testdata/flat-url-list.torrent b/metainfo/testdata/flat-url-list.torrent index 216ab38c..47f5f8b6 100644 Binary files a/metainfo/testdata/flat-url-list.torrent and b/metainfo/testdata/flat-url-list.torrent differ diff --git a/metainfo/testdata/minimal-trailing-newline.torrent b/metainfo/testdata/minimal-trailing-newline.torrent new file mode 100644 index 00000000..7673daa9 --- /dev/null +++ b/metainfo/testdata/minimal-trailing-newline.torrent @@ -0,0 +1 @@ +de diff --git a/metainfo/testdata/trackerless.torrent b/metainfo/testdata/trackerless.torrent index 65372766..e8644faa 100644 --- a/metainfo/testdata/trackerless.torrent +++ b/metainfo/testdata/trackerless.torrent @@ -1 +1 @@ -d7:comment19:This is just a test10:created by12:Johnny Bravo13:creation datei1430648794e8:encoding5:UTF-84:infod6:lengthi1128e4:name12:testfile.bin12:piece lengthi32768e6:pieces20:Ոë =‘UŒäiÎ^æ °Eâ?ÇÒe5:nodesl35:udp://tracker.openbittorrent.com:8035:udp://tracker.openbittorrent.com:80ee +d7:comment19:This is just a test10:created by12:Johnny Bravo13:creation datei1430648794e8:encoding5:UTF-84:infod6:lengthi1128e4:name12:testfile.bin12:piece lengthi32768e6:pieces20:Ոë =‘UŒäiÎ^æ °Eâ?ÇÒe5:nodesl35:udp://tracker.openbittorrent.com:8035:udp://tracker.openbittorrent.com:80ee \ No newline at end of file