]> Sergey Matveev's repositories - btrtrc.git/blobdiff - bencode/decode.go
Attribute accepted connection to holepunching when connect message is late
[btrtrc.git] / bencode / decode.go
index 081a44a715ea7fc7e31d95790a1c8f39a863b9ae..3839b849c21155cffb15132d12b310d3c3936900 100644 (file)
@@ -12,7 +12,16 @@ import (
        "sync"
 )
 
+// The default bencode string length limit. This is a poor attempt to prevent excessive memory
+// allocation when parsing, but also leaves the window open to implement a better solution.
+const DefaultDecodeMaxStrLen = 1<<27 - 1 // ~128MiB
+
+type MaxStrLen = int64
+
 type Decoder struct {
+       // Maximum parsed bencode string length. Defaults to DefaultMaxStrLen if zero.
+       MaxStrLen MaxStrLen
+
        r interface {
                io.ByteScanner
                io.Reader
@@ -28,14 +37,21 @@ func (d *Decoder) Decode(v interface{}) (err error) {
                        return
                }
                r := recover()
+               if r == nil {
+                       return
+               }
                _, ok := r.(runtime.Error)
                if ok {
                        panic(r)
                }
-               err, ok = r.(error)
-               if !ok && r != nil {
+               if err, ok = r.(error); !ok {
                        panic(r)
                }
+               // Errors thrown from deeper in parsing are unexpected. At value boundaries, errors should
+               // be returned directly (at least until all the panic nonsense is removed entirely).
+               if err == io.EOF {
+                       err = io.ErrUnexpectedEOF
+               }
        }()
 
        pv := reflect.ValueOf(v)
@@ -101,17 +117,29 @@ func (d *Decoder) throwSyntaxError(offset int64, err error) {
        })
 }
 
-// called when 'i' was consumed
-func (d *Decoder) parseInt(v reflect.Value) {
-       start := d.Offset - 1
+// Assume the 'i' is already consumed. Read and validate the rest of an int into the buffer.
+func (d *Decoder) readInt() error {
+       // start := d.Offset - 1
        d.readUntil('e')
-       if d.buf.Len() == 0 {
-               panic(&SyntaxError{
-                       Offset: start,
-                       What:   errors.New("empty integer value"),
-               })
-       }
+       if err := d.checkBufferedInt(); err != nil {
+               return err
+       }
+       // if d.buf.Len() == 0 {
+       //      panic(&SyntaxError{
+       //              Offset: start,
+       //              What:   errors.New("empty integer value"),
+       //      })
+       // }
+       return nil
+}
 
+// called when 'i' was consumed, for the integer type in v.
+func (d *Decoder) parseInt(v reflect.Value) error {
+       start := d.Offset - 1
+
+       if err := d.readInt(); err != nil {
+               return err
+       }
        s := bytesAsString(d.buf.Bytes())
 
        switch v.Kind() {
@@ -120,10 +148,10 @@ func (d *Decoder) parseInt(v reflect.Value) {
                checkForIntParseError(err, start)
 
                if v.OverflowInt(n) {
-                       panic(&UnmarshalTypeError{
-                               Value: "integer " + s,
-                               Type:  v.Type(),
-                       })
+                       return &UnmarshalTypeError{
+                               BencodeTypeName:     "int",
+                               UnmarshalTargetType: v.Type(),
+                       }
                }
                v.SetInt(n)
        case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
@@ -131,33 +159,63 @@ func (d *Decoder) parseInt(v reflect.Value) {
                checkForIntParseError(err, start)
 
                if v.OverflowUint(n) {
-                       panic(&UnmarshalTypeError{
-                               Value: "integer " + s,
-                               Type:  v.Type(),
-                       })
+                       return &UnmarshalTypeError{
+                               BencodeTypeName:     "int",
+                               UnmarshalTargetType: v.Type(),
+                       }
                }
                v.SetUint(n)
        case reflect.Bool:
                v.SetBool(s != "0")
        default:
-               panic(&UnmarshalTypeError{
-                       Value: "integer " + s,
-                       Type:  v.Type(),
-               })
+               return &UnmarshalTypeError{
+                       BencodeTypeName:     "int",
+                       UnmarshalTargetType: v.Type(),
+               }
        }
        d.buf.Reset()
+       return nil
 }
 
-func (d *Decoder) parseString(v reflect.Value) error {
-       start := d.Offset - 1
+func (d *Decoder) checkBufferedInt() error {
+       b := d.buf.Bytes()
+       if len(b) <= 1 {
+               return nil
+       }
+       if b[0] == '-' {
+               b = b[1:]
+       }
+       if b[0] < '1' || b[0] > '9' {
+               return errors.New("invalid leading digit")
+       }
+       return nil
+}
 
-       // read the string length first
+func (d *Decoder) parseStringLength() (int, error) {
+       // We should have already consumed the first byte of the length into the Decoder buf.
+       start := d.Offset - 1
        d.readUntil(':')
+       if err := d.checkBufferedInt(); err != nil {
+               return 0, err
+       }
+       // Really the limit should be the uint size for the platform. But we can't pass in an allocator,
+       // or limit total memory use in Go, the best we might hope to do is limit the size of a single
+       // decoded value (by reading it in in-place and then operating on a view).
        length, err := strconv.ParseInt(bytesAsString(d.buf.Bytes()), 10, 0)
        checkForIntParseError(err, start)
+       if int64(length) > d.getMaxStrLen() {
+               err = fmt.Errorf("parsed string length %v exceeds limit (%v)", length, DefaultDecodeMaxStrLen)
+       }
+       d.buf.Reset()
+       return int(length), err
+}
 
+func (d *Decoder) parseString(v reflect.Value) error {
+       length, err := d.parseStringLength()
+       if err != nil {
+               return err
+       }
        defer d.buf.Reset()
-
        read := func(b []byte) {
                n, err := io.ReadFull(d.r, b)
                d.Offset += int64(n)
@@ -188,85 +246,99 @@ func (d *Decoder) parseString(v reflect.Value) error {
                if v.Type().Elem().Kind() != reflect.Uint8 {
                        break
                }
-               d.buf.Grow(int(length))
+               d.buf.Grow(length)
                b := d.buf.Bytes()[:length]
                read(b)
                reflect.Copy(v, reflect.ValueOf(b))
                return nil
+       case reflect.Bool:
+               d.buf.Grow(length)
+               b := d.buf.Bytes()[:length]
+               read(b)
+               x, err := strconv.ParseBool(bytesAsString(b))
+               if err != nil {
+                       x = length != 0
+               }
+               v.SetBool(x)
+               return nil
        }
-       d.buf.Grow(int(length))
+       // Can't move this into default clause because some cases above fail through to here after
+       // additional checks.
+       d.buf.Grow(length)
        read(d.buf.Bytes()[:length])
        // I believe we return here to support "ignore_unmarshal_type_error".
        return &UnmarshalTypeError{
-               Value: "string",
-               Type:  v.Type(),
+               BencodeTypeName:     "string",
+               UnmarshalTargetType: v.Type(),
        }
 }
 
 // Info for parsing a dict value.
 type dictField struct {
-       Value reflect.Value // Storage for the parsed value.
-       // True if field value should be parsed into Value. If false, the value
-       // should be parsed and discarded.
-       Ok                       bool
-       Set                      func() // Call this after parsing into Value.
-       IgnoreUnmarshalTypeError bool
+       Type reflect.Type
+       Get  func(value reflect.Value) func(reflect.Value)
+       Tags tag
 }
 
 // Returns specifics for parsing a dict field value.
-func getDictField(dict reflect.Value, key string) dictField {
+func getDictField(dict reflect.Type, key string) (_ dictField, err error) {
        // get valuev as a map value or as a struct field
-       switch dict.Kind() {
+       switch k := dict.Kind(); k {
        case reflect.Map:
-               value := reflect.New(dict.Type().Elem()).Elem()
                return dictField{
-                       Value: value,
-                       Ok:    true,
-                       Set: func() {
-                               if dict.IsNil() {
-                                       dict.Set(reflect.MakeMap(dict.Type()))
+                       Type: dict.Elem(),
+                       Get: func(mapValue reflect.Value) func(reflect.Value) {
+                               return func(value reflect.Value) {
+                                       if mapValue.IsNil() {
+                                               mapValue.Set(reflect.MakeMap(dict))
+                                       }
+                                       // Assigns the value into the map.
+                                       // log.Printf("map type: %v", mapValue.Type())
+                                       mapValue.SetMapIndex(reflect.ValueOf(key).Convert(dict.Key()), value)
                                }
-                               // Assigns the value into the map.
-                               dict.SetMapIndex(reflect.ValueOf(key).Convert(dict.Type().Key()), value)
                        },
-               }
+               }, nil
        case reflect.Struct:
-               sf, ok := getStructFieldForKey(dict.Type(), key)
-               if !ok {
-                       return dictField{}
-               }
-               if sf.r.PkgPath != "" {
-                       panic(&UnmarshalFieldError{
-                               Key:   key,
-                               Type:  dict.Type(),
-                               Field: sf.r,
-                       })
-               }
-               return dictField{
-                       Value:                    dict.FieldByIndex(sf.r.Index),
-                       Ok:                       true,
-                       Set:                      func() {},
-                       IgnoreUnmarshalTypeError: sf.tag.IgnoreUnmarshalTypeError(),
-               }
+               return getStructFieldForKey(dict, key), nil
+               // if sf.r.PkgPath != "" {
+               //      panic(&UnmarshalFieldError{
+               //              Key:   key,
+               //              Type:  dict.Type(),
+               //              Field: sf.r,
+               //      })
+               // }
        default:
-               return dictField{}
+               err = fmt.Errorf("can't assign bencode dict items into a %v", k)
+               return
        }
 }
 
-type structField struct {
-       r   reflect.StructField
-       tag tag
-}
-
 var (
        structFieldsMu sync.Mutex
-       structFields   = map[reflect.Type]map[string]structField{}
+       structFields   = map[reflect.Type]map[string]dictField{}
 )
 
-func parseStructFields(struct_ reflect.Type, each func(string, structField)) {
-       for i, n := 0, struct_.NumField(); i < n; i++ {
+func parseStructFields(struct_ reflect.Type, each func(key string, df dictField)) {
+       for _i, n := 0, struct_.NumField(); _i < n; _i++ {
+               i := _i
                f := struct_.Field(i)
                if f.Anonymous {
+                       t := f.Type
+                       if t.Kind() == reflect.Ptr {
+                               t = t.Elem()
+                       }
+                       parseStructFields(t, func(key string, df dictField) {
+                               innerGet := df.Get
+                               df.Get = func(value reflect.Value) func(reflect.Value) {
+                                       anonPtr := value.Field(i)
+                                       if anonPtr.Kind() == reflect.Ptr && anonPtr.IsNil() {
+                                               anonPtr.Set(reflect.New(f.Type.Elem()))
+                                               anonPtr = anonPtr.Elem()
+                                       }
+                                       return innerGet(anonPtr)
+                               }
+                               each(key, df)
+                       })
                        continue
                }
                tagStr := f.Tag.Get("bencode")
@@ -278,78 +350,104 @@ func parseStructFields(struct_ reflect.Type, each func(string, structField)) {
                if key == "" {
                        key = f.Name
                }
-               each(key, structField{f, tag})
+               each(key, dictField{f.Type, func(value reflect.Value) func(reflect.Value) {
+                       return value.Field(i).Set
+               }, tag})
        }
 }
 
 func saveStructFields(struct_ reflect.Type) {
-       m := make(map[string]structField)
-       parseStructFields(struct_, func(key string, sf structField) {
+       m := make(map[string]dictField)
+       parseStructFields(struct_, func(key string, sf dictField) {
                m[key] = sf
        })
        structFields[struct_] = m
 }
 
-func getStructFieldForKey(struct_ reflect.Type, key string) (f structField, ok bool) {
+func getStructFieldForKey(struct_ reflect.Type, key string) (f dictField) {
        structFieldsMu.Lock()
        if _, ok := structFields[struct_]; !ok {
                saveStructFields(struct_)
        }
-       f, ok = structFields[struct_][key]
+       f, ok := structFields[struct_][key]
        structFieldsMu.Unlock()
+       if !ok {
+               var discard interface{}
+               return dictField{
+                       Type: reflect.TypeOf(discard),
+                       Get:  func(reflect.Value) func(reflect.Value) { return func(reflect.Value) {} },
+                       Tags: nil,
+               }
+       }
        return
 }
 
 func (d *Decoder) parseDict(v reflect.Value) error {
-       // so, at this point 'd' byte was consumed, let's just read key/value
-       // pairs one by one
+       // At this point 'd' byte was consumed, now read key/value pairs
        for {
                var keyStr string
                keyValue := reflect.ValueOf(&keyStr).Elem()
                ok, err := d.parseValue(keyValue)
                if err != nil {
-                       return fmt.Errorf("error parsing dict key: %s", err)
+                       return fmt.Errorf("error parsing dict key: %w", err)
                }
                if !ok {
                        return nil
                }
 
-               df := getDictField(v, keyStr)
+               df, err := getDictField(v.Type(), keyStr)
+               if err != nil {
+                       return fmt.Errorf("parsing bencode dict into %v: %w", v.Type(), err)
+               }
 
                // now we need to actually parse it
-               if df.Ok {
-                       // log.Printf("parsing ok struct field for key %q", keyStr)
-                       ok, err = d.parseValue(df.Value)
-               } else {
+               if df.Type == nil {
                        // Discard the value, there's nowhere to put it.
                        var if_ interface{}
                        if_, ok = d.parseValueInterface()
                        if if_ == nil {
-                               err = fmt.Errorf("error parsing value for key %q", keyStr)
+                               return fmt.Errorf("error parsing value for key %q", keyStr)
                        }
+                       if !ok {
+                               return fmt.Errorf("missing value for key %q", keyStr)
+                       }
+                       continue
                }
+               setValue := reflect.New(df.Type).Elem()
+               // log.Printf("parsing into %v", setValue.Type())
+               ok, err = d.parseValue(setValue)
                if err != nil {
-                       if _, ok := err.(*UnmarshalTypeError); !ok || !df.IgnoreUnmarshalTypeError {
-                               return fmt.Errorf("parsing value for key %q: %s", keyStr, err)
+                       var target *UnmarshalTypeError
+                       if !(errors.As(err, &target) && df.Tags.IgnoreUnmarshalTypeError()) {
+                               return fmt.Errorf("parsing value for key %q: %w", keyStr, err)
                        }
                }
                if !ok {
                        return fmt.Errorf("missing value for key %q", keyStr)
                }
-               if df.Ok {
-                       df.Set()
-               }
+               df.Get(v)(setValue)
        }
 }
 
 func (d *Decoder) parseList(v reflect.Value) error {
        switch v.Kind() {
-       case reflect.Array, reflect.Slice:
        default:
-               return &UnmarshalTypeError{
-                       Value: "list",
-                       Type:  v.Type(),
+               // If the list is a singleton of the expected type, use that value. See
+               // https://github.com/anacrolix/torrent/issues/297.
+               l := reflect.New(reflect.SliceOf(v.Type()))
+               if err := d.parseList(l.Elem()); err != nil {
+                       return err
+               }
+               if l.Elem().Len() != 1 {
+                       return &UnmarshalTypeError{
+                               BencodeTypeName:     "list",
+                               UnmarshalTargetType: v.Type(),
+                       }
                }
+               v.Set(l.Elem().Index(0))
+               return nil
+       case reflect.Array, reflect.Slice:
+               // We can work with this. Normal case, fallthrough.
        }
 
        i := 0
@@ -439,7 +537,6 @@ func (d *Decoder) readOneValue() bool {
        }
 
        return true
-
 }
 
 func (d *Decoder) parseUnmarshaler(v reflect.Value) bool {
@@ -488,7 +585,7 @@ func (d *Decoder) parseValue(v reflect.Value) (bool, error) {
 
        b, err := d.r.ReadByte()
        if err != nil {
-               panic(err)
+               return false, err
        }
        d.Offset++
 
@@ -500,8 +597,7 @@ func (d *Decoder) parseValue(v reflect.Value) (bool, error) {
        case 'l':
                return true, d.parseList(v)
        case 'i':
-               d.parseInt(v)
-               return true, nil
+               return true, d.parseInt(v)
        default:
                if b >= '0' && b <= '9' {
                        // It's a string.
@@ -553,16 +649,13 @@ func (d *Decoder) parseValueInterface() (interface{}, bool) {
        }
 }
 
+// Called after 'i', for an arbitrary integer size.
 func (d *Decoder) parseIntInterface() (ret interface{}) {
        start := d.Offset - 1
-       d.readUntil('e')
-       if d.buf.Len() == 0 {
-               panic(&SyntaxError{
-                       Offset: start,
-                       What:   errors.New("empty integer value"),
-               })
-       }
 
+       if err := d.readInt(); err != nil {
+               panic(err)
+       }
        n, err := strconv.ParseInt(d.buf.String(), 10, 64)
        if ne, ok := err.(*strconv.NumError); ok && ne.Err == strconv.ErrRange {
                i := new(big.Int)
@@ -583,33 +676,36 @@ func (d *Decoder) parseIntInterface() (ret interface{}) {
        return
 }
 
-func (d *Decoder) parseStringInterface() interface{} {
-       start := d.Offset - 1
-
-       // read the string length first
-       d.readUntil(':')
-       length, err := strconv.ParseInt(d.buf.String(), 10, 64)
-       checkForIntParseError(err, start)
-
-       d.buf.Reset()
-       n, err := io.CopyN(&d.buf, d.r, length)
-       d.Offset += n
+func (d *Decoder) readBytes(length int) []byte {
+       b, err := io.ReadAll(io.LimitReader(d.r, int64(length)))
        if err != nil {
-               checkForUnexpectedEOF(err, d.Offset)
-               panic(&SyntaxError{
-                       Offset: d.Offset,
-                       What:   errors.New("unexpected I/O error: " + err.Error()),
-               })
+               panic(err)
        }
+       if len(b) != length {
+               panic(fmt.Errorf("read %v bytes expected %v", len(b), length))
+       }
+       return b
+}
 
-       s := d.buf.String()
-       d.buf.Reset()
-       return s
+func (d *Decoder) parseStringInterface() string {
+       length, err := d.parseStringLength()
+       if err != nil {
+               panic(err)
+       }
+       b := d.readBytes(int(length))
+       d.Offset += int64(len(b))
+       if err != nil {
+               panic(&SyntaxError{Offset: d.Offset, What: err})
+       }
+       return bytesAsString(b)
 }
 
 func (d *Decoder) parseDictInterface() interface{} {
        dict := make(map[string]interface{})
+       var lastKey string
+       lastKeyOk := false
        for {
+               start := d.Offset
                keyi, ok := d.parseValueInterface()
                if !ok {
                        break
@@ -622,29 +718,35 @@ func (d *Decoder) parseDictInterface() interface{} {
                                What:   errors.New("non-string key in a dict"),
                        })
                }
-
+               if lastKeyOk && key <= lastKey {
+                       d.throwSyntaxError(start, fmt.Errorf("dict keys unsorted: %q <= %q", key, lastKey))
+               }
+               start = d.Offset
                valuei, ok := d.parseValueInterface()
                if !ok {
-                       break
+                       d.throwSyntaxError(start, fmt.Errorf("dict elem missing value [key=%v]", key))
                }
 
+               lastKey = key
+               lastKeyOk = true
                dict[key] = valuei
        }
        return dict
 }
 
-func (d *Decoder) parseListInterface() interface{} {
-       var list []interface{}
-       for {
-               valuei, ok := d.parseValueInterface()
-               if !ok {
-                       break
-               }
-
+func (d *Decoder) parseListInterface() (list []interface{}) {
+       list = []interface{}{}
+       valuei, ok := d.parseValueInterface()
+       for ok {
                list = append(list, valuei)
+               valuei, ok = d.parseValueInterface()
        }
-       if list == nil {
-               list = make([]interface{}, 0, 0)
+       return
+}
+
+func (d *Decoder) getMaxStrLen() int64 {
+       if d.MaxStrLen == 0 {
+               return DefaultDecodeMaxStrLen
        }
-       return list
+       return d.MaxStrLen
 }