]> Sergey Matveev's repositories - btrtrc.git/commitdiff
Be more pedantic about trailing data in metainfo.Load and bencode.Unmarshal
authorMatt Joiner <anacrolix@gmail.com>
Tue, 23 Jul 2024 08:58:04 +0000 (18:58 +1000)
committerMatt Joiner <anacrolix@gmail.com>
Tue, 23 Jul 2024 08:58:04 +0000 (18:58 +1000)
Fixes https://github.com/anacrolix/torrent/issues/963.

bencode/api.go
bencode/decode.go
bencode/decode_test.go
bencode/scanner.go
metainfo/metainfo.go
metainfo/metainfo_test.go
metainfo/testdata/flat-url-list.torrent
metainfo/testdata/minimal-trailing-newline.torrent [new file with mode: 0644]
metainfo/testdata/trackerless.torrent

index 3c379abbf837355da04bf1cd9cd7522f0b110619..ece87eb8394ec43211eac8a3a513338e056a772d 100644 (file)
@@ -139,12 +139,15 @@ func MustMarshal(v interface{}) []byte {
 // behaviour (inspired by Rust's serde here).
 func Unmarshal(data []byte, v interface{}) (err error) {
        buf := bytes.NewReader(data)
-       e := Decoder{r: buf}
-       err = e.Decode(v)
-       if err == nil && buf.Len() != 0 {
-               err = ErrUnusedTrailingBytes{buf.Len()}
+       dec := Decoder{r: buf}
+       err = dec.Decode(v)
+       if err != nil {
+               return
+       }
+       if buf.Len() != 0 {
+               return ErrUnusedTrailingBytes{buf.Len()}
        }
-       return
+       return dec.ReadEOF()
 }
 
 type ErrUnusedTrailingBytes struct {
index ce1f8b465f0ddb2404611bd7bac10e68d399fc91..d6fffdc4b03fc36b51f1e3eb0dbb3ed450747237 100644 (file)
@@ -69,6 +69,23 @@ func (d *Decoder) Decode(v interface{}) (err error) {
        return
 }
 
+// Check for EOF in the decoder input stream. Used to assert the input ends on a clean message
+// boundary.
+func (d *Decoder) ReadEOF() error {
+       _, err := d.r.ReadByte()
+       if err == nil {
+               err := d.r.UnreadByte()
+               if err != nil {
+                       panic(err)
+               }
+               return errors.New("expected EOF")
+       }
+       if err == io.EOF {
+               return nil
+       }
+       return fmt.Errorf("expected EOF, got %w", err)
+}
+
 func checkForUnexpectedEOF(err error, offset int64) {
        if err == io.EOF {
                panic(&SyntaxError{
@@ -582,8 +599,8 @@ func (d *Decoder) parseUnmarshaler(v reflect.Value) bool {
        return true
 }
 
-// Returns true if there was a value and it's now stored in 'v', otherwise
-// there was an end symbol ("e") and no value was stored.
+// Returns true if there was a value and it's now stored in 'v'. Otherwise, there was an end symbol
+// ("e") and no value was stored.
 func (d *Decoder) parseValue(v reflect.Value) (bool, error) {
        // we support one level of indirection at the moment
        if v.Kind() == reflect.Ptr {
index a350beea31d47393b700917464b995a94c061f05..16fa448773bb9b9e31a2bd0b8e93d69f68151e76 100644 (file)
@@ -2,10 +2,12 @@ package bencode
 
 import (
        "bytes"
+       "encoding/json"
        "fmt"
        "io"
        "math/big"
        "reflect"
+       "strings"
        "testing"
 
        qt "github.com/frankban/quicktest"
@@ -265,3 +267,26 @@ func TestDecodeStringIntoBoolPtr(t *testing.T) {
        check("d7:private1:Fe", false, false)
        check("d7:private11:bunnyfoofooe", false, true)
 }
+
+// To set expectations about how our Decoder should work.
+func TestJsonDecoderBehaviour(t *testing.T) {
+       c := qt.New(t)
+       test := func(input string, items int, finalErr error) {
+               d := json.NewDecoder(strings.NewReader(input))
+               actualItems := 0
+               var firstErr error
+               for {
+                       var discard any
+                       firstErr = d.Decode(&discard)
+                       if firstErr != nil {
+                               break
+                       }
+                       actualItems++
+               }
+               c.Check(firstErr, qt.Equals, finalErr)
+               c.Check(actualItems, qt.Equals, items)
+       }
+       test("", 0, io.EOF)
+       test("{}", 1, io.EOF)
+       test("{} {", 1, io.ErrUnexpectedEOF)
+}
index 967d5a5d36235d900bbbe2eb99d2e2e3d7e73955..34b85505752bf03f102c07f7165366e95c0d9292 100644 (file)
@@ -22,11 +22,28 @@ func (me *scanner) ReadByte() (byte, error) {
                me.unread = false
                return me.b[0], nil
        }
-       n, err := me.r.Read(me.b[:])
-       if n == 1 {
-               err = nil
+       for {
+               n, err := me.r.Read(me.b[:])
+               switch n {
+               case 0:
+                       // io.Reader.Read says to try again if there's no error and no bytes returned. We can't
+                       // signal that the caller should do this method's interface.
+                       if err != nil {
+                               return 0, err
+                       }
+                       panic(err)
+               case 1:
+                       // There's no way to signal that the byte is valid unless error is nil.
+                       return me.b[0], nil
+               default:
+                       if err != nil {
+                               // I can't see why Read would return more bytes than expected, but the error should
+                               // tell us why.
+                               panic(err)
+                       }
+                       panic(n)
+               }
        }
-       return me.b[0], err
 }
 
 func (me *scanner) UnreadByte() error {
index 383b5ca9b9e8b90f97ba99c36869ee473197610a..56d4ce59ce3039b07122702990dc5b0639629af3 100644 (file)
@@ -2,6 +2,7 @@ package metainfo
 
 import (
        "bufio"
+       "fmt"
        "io"
        "net/url"
        "os"
@@ -38,7 +39,11 @@ func Load(r io.Reader) (*MetaInfo, error) {
        if err != nil {
                return nil, err
        }
-       return &mi, nil
+       err = d.ReadEOF()
+       if err != nil {
+               err = fmt.Errorf("error after decoding metainfo: %w", err)
+       }
+       return &mi, err
 }
 
 // Convenience function for loading a MetaInfo from a file.
index 4d7c2b343b6812a5bda49a907788a23074a16b1c..6a97f9117291b77395bd8ca91d2ec48b327d82e9 100644 (file)
@@ -48,6 +48,9 @@ func TestFile(t *testing.T) {
        testFile(t, "testdata/continuum.torrent")
        testFile(t, "testdata/23516C72685E8DB0C8F15553382A927F185C4F01.torrent")
        testFile(t, "testdata/trackerless.torrent")
+       _, err := LoadFromFile("testdata/minimal-trailing-newline.torrent")
+       c := qt.New(t)
+       c.Check(err, qt.ErrorMatches, ".*expected EOF")
 }
 
 // Ensure that the correct number of pieces are generated when hashing files.
index 216ab38cacceff5f5bfd42e1958ce966dad53a1c..47f5f8b68ec6a624ac17178c67f26e5871ec86b2 100644 (file)
Binary files a/metainfo/testdata/flat-url-list.torrent and b/metainfo/testdata/flat-url-list.torrent differ
diff --git a/metainfo/testdata/minimal-trailing-newline.torrent b/metainfo/testdata/minimal-trailing-newline.torrent
new file mode 100644 (file)
index 0000000..7673daa
--- /dev/null
@@ -0,0 +1 @@
+de
index 65372766f53640de6bd92c554107aee2958a65e0..e8644faa5e41cb605f47d0e49d0c510ca6814fe0 100644 (file)
@@ -1 +1 @@
-d7:comment19:This is just a test10:created by12:Johnny Bravo13:creation datei1430648794e8:encoding5:UTF-84:infod6:lengthi1128e4:name12:testfile.bin12:piece lengthi32768e6:pieces20:Õ\88ë        =\91U\8cäiÎ^æ °Eâ?ÇÒe5:nodesl35:udp://tracker.openbittorrent.com:8035:udp://tracker.openbittorrent.com:80ee
+d7:comment19:This is just a test10:created by12:Johnny Bravo13:creation datei1430648794e8:encoding5:UTF-84:infod6:lengthi1128e4:name12:testfile.bin12:piece lengthi32768e6:pieces20:Õ\88ë        =\91U\8cäiÎ^æ °Eâ?ÇÒe5:nodesl35:udp://tracker.openbittorrent.com:8035:udp://tracker.openbittorrent.com:80ee
\ No newline at end of file