From 32c406f9fec12c66bf62c1ad6e0c38be4f425b75 Mon Sep 17 00:00:00 2001 From: Matt Joiner Date: Tue, 23 Jul 2024 18:58:04 +1000 Subject: [PATCH] Be more pedantic about trailing data in metainfo.Load and bencode.Unmarshal Fixes https://github.com/anacrolix/torrent/issues/963. --- bencode/api.go | 13 +++++---- bencode/decode.go | 21 +++++++++++++-- bencode/decode_test.go | 25 ++++++++++++++++++ bencode/scanner.go | 25 +++++++++++++++--- metainfo/metainfo.go | 7 ++++- metainfo/metainfo_test.go | 3 +++ metainfo/testdata/flat-url-list.torrent | Bin 2739 -> 2738 bytes .../testdata/minimal-trailing-newline.torrent | 1 + metainfo/testdata/trackerless.torrent | 2 +- 9 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 metainfo/testdata/minimal-trailing-newline.torrent diff --git a/bencode/api.go b/bencode/api.go index 3c379abb..ece87eb8 100644 --- a/bencode/api.go +++ b/bencode/api.go @@ -139,12 +139,15 @@ func MustMarshal(v interface{}) []byte { // behaviour (inspired by Rust's serde here). func Unmarshal(data []byte, v interface{}) (err error) { buf := bytes.NewReader(data) - e := Decoder{r: buf} - err = e.Decode(v) - if err == nil && buf.Len() != 0 { - err = ErrUnusedTrailingBytes{buf.Len()} + dec := Decoder{r: buf} + err = dec.Decode(v) + if err != nil { + return + } + if buf.Len() != 0 { + return ErrUnusedTrailingBytes{buf.Len()} } - return + return dec.ReadEOF() } type ErrUnusedTrailingBytes struct { diff --git a/bencode/decode.go b/bencode/decode.go index ce1f8b46..d6fffdc4 100644 --- a/bencode/decode.go +++ b/bencode/decode.go @@ -69,6 +69,23 @@ func (d *Decoder) Decode(v interface{}) (err error) { return } +// Check for EOF in the decoder input stream. Used to assert the input ends on a clean message +// boundary. +func (d *Decoder) ReadEOF() error { + _, err := d.r.ReadByte() + if err == nil { + err := d.r.UnreadByte() + if err != nil { + panic(err) + } + return errors.New("expected EOF") + } + if err == io.EOF { + return nil + } + return fmt.Errorf("expected EOF, got %w", err) +} + func checkForUnexpectedEOF(err error, offset int64) { if err == io.EOF { panic(&SyntaxError{ @@ -582,8 +599,8 @@ func (d *Decoder) parseUnmarshaler(v reflect.Value) bool { return true } -// Returns true if there was a value and it's now stored in 'v', otherwise -// there was an end symbol ("e") and no value was stored. +// Returns true if there was a value and it's now stored in 'v'. Otherwise, there was an end symbol +// ("e") and no value was stored. func (d *Decoder) parseValue(v reflect.Value) (bool, error) { // we support one level of indirection at the moment if v.Kind() == reflect.Ptr { diff --git a/bencode/decode_test.go b/bencode/decode_test.go index a350beea..16fa4487 100644 --- a/bencode/decode_test.go +++ b/bencode/decode_test.go @@ -2,10 +2,12 @@ package bencode import ( "bytes" + "encoding/json" "fmt" "io" "math/big" "reflect" + "strings" "testing" qt "github.com/frankban/quicktest" @@ -265,3 +267,26 @@ func TestDecodeStringIntoBoolPtr(t *testing.T) { check("d7:private1:Fe", false, false) check("d7:private11:bunnyfoofooe", false, true) } + +// To set expectations about how our Decoder should work. +func TestJsonDecoderBehaviour(t *testing.T) { + c := qt.New(t) + test := func(input string, items int, finalErr error) { + d := json.NewDecoder(strings.NewReader(input)) + actualItems := 0 + var firstErr error + for { + var discard any + firstErr = d.Decode(&discard) + if firstErr != nil { + break + } + actualItems++ + } + c.Check(firstErr, qt.Equals, finalErr) + c.Check(actualItems, qt.Equals, items) + } + test("", 0, io.EOF) + test("{}", 1, io.EOF) + test("{} {", 1, io.ErrUnexpectedEOF) +} diff --git a/bencode/scanner.go b/bencode/scanner.go index 967d5a5d..34b85505 100644 --- a/bencode/scanner.go +++ b/bencode/scanner.go @@ -22,11 +22,28 @@ func (me *scanner) ReadByte() (byte, error) { me.unread = false return me.b[0], nil } - n, err := me.r.Read(me.b[:]) - if n == 1 { - err = nil + for { + n, err := me.r.Read(me.b[:]) + switch n { + case 0: + // io.Reader.Read says to try again if there's no error and no bytes returned. We can't + // signal that the caller should do this method's interface. + if err != nil { + return 0, err + } + panic(err) + case 1: + // There's no way to signal that the byte is valid unless error is nil. + return me.b[0], nil + default: + if err != nil { + // I can't see why Read would return more bytes than expected, but the error should + // tell us why. + panic(err) + } + panic(n) + } } - return me.b[0], err } func (me *scanner) UnreadByte() error { diff --git a/metainfo/metainfo.go b/metainfo/metainfo.go index 383b5ca9..56d4ce59 100644 --- a/metainfo/metainfo.go +++ b/metainfo/metainfo.go @@ -2,6 +2,7 @@ package metainfo import ( "bufio" + "fmt" "io" "net/url" "os" @@ -38,7 +39,11 @@ func Load(r io.Reader) (*MetaInfo, error) { if err != nil { return nil, err } - return &mi, nil + err = d.ReadEOF() + if err != nil { + err = fmt.Errorf("error after decoding metainfo: %w", err) + } + return &mi, err } // Convenience function for loading a MetaInfo from a file. diff --git a/metainfo/metainfo_test.go b/metainfo/metainfo_test.go index 4d7c2b34..6a97f911 100644 --- a/metainfo/metainfo_test.go +++ b/metainfo/metainfo_test.go @@ -48,6 +48,9 @@ func TestFile(t *testing.T) { testFile(t, "testdata/continuum.torrent") testFile(t, "testdata/23516C72685E8DB0C8F15553382A927F185C4F01.torrent") testFile(t, "testdata/trackerless.torrent") + _, err := LoadFromFile("testdata/minimal-trailing-newline.torrent") + c := qt.New(t) + c.Check(err, qt.ErrorMatches, ".*expected EOF") } // Ensure that the correct number of pieces are generated when hashing files. diff --git a/metainfo/testdata/flat-url-list.torrent b/metainfo/testdata/flat-url-list.torrent index 216ab38cacceff5f5bfd42e1958ce966dad53a1c..47f5f8b68ec6a624ac17178c67f26e5871ec86b2 100644 GIT binary patch delta 7 Ocmdlix=D1yCN2OBwgUVB delta 9 Qcmdlax>