]> Sergey Matveev's repositories - btrtrc.git/blobdiff - bencode/decode.go
Attribute accepted connection to holepunching when connect message is late
[btrtrc.git] / bencode / decode.go
index 82cad32bbe4b44f27b6f8978ff70aa30123abd5b..3839b849c21155cffb15132d12b310d3c3936900 100644 (file)
@@ -1,27 +1,56 @@
 package bencode
 
-import "reflect"
-import "runtime"
-import "bufio"
-import "bytes"
-import "strconv"
-import "strings"
-import "io"
-
-type decoder struct {
-       *bufio.Reader
-       offset int64
+import (
+       "bytes"
+       "errors"
+       "fmt"
+       "io"
+       "math/big"
+       "reflect"
+       "runtime"
+       "strconv"
+       "sync"
+)
+
+// The default bencode string length limit. This is a poor attempt to prevent excessive memory
+// allocation when parsing, but also leaves the window open to implement a better solution.
+const DefaultDecodeMaxStrLen = 1<<27 - 1 // ~128MiB
+
+type MaxStrLen = int64
+
+type Decoder struct {
+       // Maximum parsed bencode string length. Defaults to DefaultMaxStrLen if zero.
+       MaxStrLen MaxStrLen
+
+       r interface {
+               io.ByteScanner
+               io.Reader
+       }
+       // Sum of bytes used to Decode values.
+       Offset int64
        buf    bytes.Buffer
-       key    string
 }
 
-func (d *decoder) decode(v interface{}) (err error) {
+func (d *Decoder) Decode(v interface{}) (err error) {
        defer func() {
-               if e := recover(); e != nil {
-                       if _, ok := e.(runtime.Error); ok {
-                               panic(e)
-                       }
-                       err = e.(error)
+               if err != nil {
+                       return
+               }
+               r := recover()
+               if r == nil {
+                       return
+               }
+               _, ok := r.(runtime.Error)
+               if ok {
+                       panic(r)
+               }
+               if err, ok = r.(error); !ok {
+                       panic(r)
+               }
+               // Errors thrown from deeper in parsing are unexpected. At value boundaries, errors should
+               // be returned directly (at least until all the panic nonsense is removed entirely).
+               if err == io.EOF {
+                       err = io.ErrUnexpectedEOF
                }
        }()
 
@@ -30,35 +59,41 @@ func (d *decoder) decode(v interface{}) (err error) {
                return &UnmarshalInvalidArgError{reflect.TypeOf(v)}
        }
 
-       d.parse_value(pv.Elem())
-       return nil
+       ok, err := d.parseValue(pv.Elem())
+       if err != nil {
+               return
+       }
+       if !ok {
+               d.throwSyntaxError(d.Offset-1, errors.New("unexpected 'e'"))
+       }
+       return
 }
 
-func check_for_unexpected_eof(err error, offset int64) {
+func checkForUnexpectedEOF(err error, offset int64) {
        if err == io.EOF {
                panic(&SyntaxError{
                        Offset: offset,
-                       what:   "unexpected EOF",
+                       What:   io.ErrUnexpectedEOF,
                })
        }
 }
 
-func (d *decoder) read_byte() byte {
-       b, err := d.ReadByte()
+func (d *Decoder) readByte() byte {
+       b, err := d.r.ReadByte()
        if err != nil {
-               check_for_unexpected_eof(err, d.offset)
+               checkForUnexpectedEOF(err, d.Offset)
                panic(err)
        }
 
-       d.offset++
+       d.Offset++
        return b
 }
 
 // reads data writing it to 'd.buf' until 'sep' byte is encountered, 'sep' byte
 // is consumed, but not included into the 'd.buf'
-func (d *decoder) read_until(sep byte) {
+func (d *Decoder) readUntil(sep byte) {
        for {
-               b := d.read_byte()
+               b := d.readByte()
                if b == sep {
                        return
                }
@@ -66,243 +101,375 @@ func (d *decoder) read_until(sep byte) {
        }
 }
 
-func check_for_int_parse_error(err error, offset int64) {
+func checkForIntParseError(err error, offset int64) {
        if err != nil {
                panic(&SyntaxError{
                        Offset: offset,
-                       what:   err.Error(),
+                       What:   err,
                })
        }
 }
 
-// called when 'i' was consumed
-func (d *decoder) parse_int(v reflect.Value) {
-       start := d.offset - 1
-       d.read_until('e')
-       if d.buf.Len() == 0 {
-               panic(&SyntaxError{
-                       Offset: start,
-                       what:   "empty integer value",
-               })
+func (d *Decoder) throwSyntaxError(offset int64, err error) {
+       panic(&SyntaxError{
+               Offset: offset,
+               What:   err,
+       })
+}
+
+// Assume the 'i' is already consumed. Read and validate the rest of an int into the buffer.
+func (d *Decoder) readInt() error {
+       // start := d.Offset - 1
+       d.readUntil('e')
+       if err := d.checkBufferedInt(); err != nil {
+               return err
+       }
+       // if d.buf.Len() == 0 {
+       //      panic(&SyntaxError{
+       //              Offset: start,
+       //              What:   errors.New("empty integer value"),
+       //      })
+       // }
+       return nil
+}
+
+// called when 'i' was consumed, for the integer type in v.
+func (d *Decoder) parseInt(v reflect.Value) error {
+       start := d.Offset - 1
+
+       if err := d.readInt(); err != nil {
+               return err
        }
+       s := bytesAsString(d.buf.Bytes())
 
        switch v.Kind() {
        case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
-               n, err := strconv.ParseInt(d.buf.String(), 10, 64)
-               check_for_int_parse_error(err, start)
+               n, err := strconv.ParseInt(s, 10, 64)
+               checkForIntParseError(err, start)
 
                if v.OverflowInt(n) {
-                       panic(&UnmarshalTypeError{
-                               Value: "integer " + d.buf.String(),
-                               Type:  v.Type(),
-                       })
+                       return &UnmarshalTypeError{
+                               BencodeTypeName:     "int",
+                               UnmarshalTargetType: v.Type(),
+                       }
                }
                v.SetInt(n)
        case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
-               n, err := strconv.ParseUint(d.buf.String(), 10, 64)
-               check_for_int_parse_error(err, start)
+               n, err := strconv.ParseUint(s, 10, 64)
+               checkForIntParseError(err, start)
 
                if v.OverflowUint(n) {
-                       panic(&UnmarshalTypeError{
-                               Value: "integer " + d.buf.String(),
-                               Type:  v.Type(),
-                       })
+                       return &UnmarshalTypeError{
+                               BencodeTypeName:     "int",
+                               UnmarshalTargetType: v.Type(),
+                       }
                }
                v.SetUint(n)
        case reflect.Bool:
-               if d.buf.Len() == 1 && d.buf.Bytes()[0] == '0' {
-                       v.SetBool(false)
-               }
-               v.SetBool(true)
+               v.SetBool(s != "0")
        default:
-               panic(&UnmarshalTypeError{
-                       Value: "integer " + d.buf.String(),
-                       Type:  v.Type(),
-               })
+               return &UnmarshalTypeError{
+                       BencodeTypeName:     "int",
+                       UnmarshalTargetType: v.Type(),
+               }
        }
        d.buf.Reset()
+       return nil
 }
 
-func (d *decoder) parse_string(v reflect.Value) {
-       start := d.offset - 1
-
-       // read the string length first
-       d.read_until(':')
-       length, err := strconv.ParseInt(d.buf.String(), 10, 64)
-       check_for_int_parse_error(err, start)
+func (d *Decoder) checkBufferedInt() error {
+       b := d.buf.Bytes()
+       if len(b) <= 1 {
+               return nil
+       }
+       if b[0] == '-' {
+               b = b[1:]
+       }
+       if b[0] < '1' || b[0] > '9' {
+               return errors.New("invalid leading digit")
+       }
+       return nil
+}
 
+func (d *Decoder) parseStringLength() (int, error) {
+       // We should have already consumed the first byte of the length into the Decoder buf.
+       start := d.Offset - 1
+       d.readUntil(':')
+       if err := d.checkBufferedInt(); err != nil {
+               return 0, err
+       }
+       // Really the limit should be the uint size for the platform. But we can't pass in an allocator,
+       // or limit total memory use in Go, the best we might hope to do is limit the size of a single
+       // decoded value (by reading it in in-place and then operating on a view).
+       length, err := strconv.ParseInt(bytesAsString(d.buf.Bytes()), 10, 0)
+       checkForIntParseError(err, start)
+       if int64(length) > d.getMaxStrLen() {
+               err = fmt.Errorf("parsed string length %v exceeds limit (%v)", length, DefaultDecodeMaxStrLen)
+       }
        d.buf.Reset()
-       n, err := io.CopyN(&d.buf, d, length)
-       d.offset += n
+       return int(length), err
+}
+
+func (d *Decoder) parseString(v reflect.Value) error {
+       length, err := d.parseStringLength()
        if err != nil {
-               check_for_unexpected_eof(err, d.offset)
-               panic(&SyntaxError{
-                       Offset: d.offset,
-                       what:   "unexpected I/O error: " + err.Error(),
-               })
+               return err
+       }
+       defer d.buf.Reset()
+       read := func(b []byte) {
+               n, err := io.ReadFull(d.r, b)
+               d.Offset += int64(n)
+               if err != nil {
+                       checkForUnexpectedEOF(err, d.Offset)
+                       panic(&SyntaxError{
+                               Offset: d.Offset,
+                               What:   errors.New("unexpected I/O error: " + err.Error()),
+                       })
+               }
        }
 
        switch v.Kind() {
        case reflect.String:
-               v.SetString(d.buf.String())
+               b := make([]byte, length)
+               read(b)
+               v.SetString(bytesAsString(b))
+               return nil
        case reflect.Slice:
                if v.Type().Elem().Kind() != reflect.Uint8 {
-                       panic(&UnmarshalTypeError{
-                               Value: "string",
-                               Type:  v.Type(),
-                       })
+                       break
                }
-               sl := make([]byte, len(d.buf.Bytes()))
-               copy(sl, d.buf.Bytes())
-               v.Set(reflect.ValueOf(sl))
-       default:
-               panic(&UnmarshalTypeError{
-                       Value: "string",
-                       Type:  v.Type(),
-               })
+               b := make([]byte, length)
+               read(b)
+               v.SetBytes(b)
+               return nil
+       case reflect.Array:
+               if v.Type().Elem().Kind() != reflect.Uint8 {
+                       break
+               }
+               d.buf.Grow(length)
+               b := d.buf.Bytes()[:length]
+               read(b)
+               reflect.Copy(v, reflect.ValueOf(b))
+               return nil
+       case reflect.Bool:
+               d.buf.Grow(length)
+               b := d.buf.Bytes()[:length]
+               read(b)
+               x, err := strconv.ParseBool(bytesAsString(b))
+               if err != nil {
+                       x = length != 0
+               }
+               v.SetBool(x)
+               return nil
+       }
+       // Can't move this into default clause because some cases above fail through to here after
+       // additional checks.
+       d.buf.Grow(length)
+       read(d.buf.Bytes()[:length])
+       // I believe we return here to support "ignore_unmarshal_type_error".
+       return &UnmarshalTypeError{
+               BencodeTypeName:     "string",
+               UnmarshalTargetType: v.Type(),
        }
+}
 
-       d.buf.Reset()
+// Info for parsing a dict value.
+type dictField struct {
+       Type reflect.Type
+       Get  func(value reflect.Value) func(reflect.Value)
+       Tags tag
 }
 
-func (d *decoder) parse_dict(v reflect.Value) {
-       switch v.Kind() {
+// Returns specifics for parsing a dict field value.
+func getDictField(dict reflect.Type, key string) (_ dictField, err error) {
+       // get valuev as a map value or as a struct field
+       switch k := dict.Kind(); k {
        case reflect.Map:
-               t := v.Type()
-               if t.Key().Kind() != reflect.String {
-                       panic(&UnmarshalTypeError{
-                               Value: "object",
-                               Type:  t,
-                       })
-               }
-               if v.IsNil() {
-                       v.Set(reflect.MakeMap(t))
-               }
+               return dictField{
+                       Type: dict.Elem(),
+                       Get: func(mapValue reflect.Value) func(reflect.Value) {
+                               return func(value reflect.Value) {
+                                       if mapValue.IsNil() {
+                                               mapValue.Set(reflect.MakeMap(dict))
+                                       }
+                                       // Assigns the value into the map.
+                                       // log.Printf("map type: %v", mapValue.Type())
+                                       mapValue.SetMapIndex(reflect.ValueOf(key).Convert(dict.Key()), value)
+                               }
+                       },
+               }, nil
        case reflect.Struct:
+               return getStructFieldForKey(dict, key), nil
+               // if sf.r.PkgPath != "" {
+               //      panic(&UnmarshalFieldError{
+               //              Key:   key,
+               //              Type:  dict.Type(),
+               //              Field: sf.r,
+               //      })
+               // }
        default:
-               panic(&UnmarshalTypeError{
-                       Value: "object",
-                       Type:  v.Type(),
-               })
+               err = fmt.Errorf("can't assign bencode dict items into a %v", k)
+               return
        }
+}
 
-       var map_elem reflect.Value
-
-       // so, at this point 'd' byte was consumed, let's just read key/value
-       // pairs one by one
-       for {
-               var valuev reflect.Value
-               keyv := reflect.ValueOf(&d.key).Elem()
-               if !d.parse_value(keyv) {
-                       return
-               }
-
-               // get valuev as a map value or as a struct field
-               switch v.Kind() {
-               case reflect.Map:
-                       elem_type := v.Type().Elem()
-                       if !map_elem.IsValid() {
-                               map_elem = reflect.New(elem_type).Elem()
-                       } else {
-                               map_elem.Set(reflect.Zero(elem_type))
+var (
+       structFieldsMu sync.Mutex
+       structFields   = map[reflect.Type]map[string]dictField{}
+)
+
+func parseStructFields(struct_ reflect.Type, each func(key string, df dictField)) {
+       for _i, n := 0, struct_.NumField(); _i < n; _i++ {
+               i := _i
+               f := struct_.Field(i)
+               if f.Anonymous {
+                       t := f.Type
+                       if t.Kind() == reflect.Ptr {
+                               t = t.Elem()
                        }
-                       valuev = map_elem
-               case reflect.Struct:
-                       var f reflect.StructField
-                       var ok bool
-
-                       t := v.Type()
-                       for i, n := 0, t.NumField(); i < n; i++ {
-                               f = t.Field(i)
-                               tag := f.Tag.Get("bencode")
-                               if tag == "-" {
-                                       continue
-                               }
-                               if f.Anonymous {
-                                       continue
+                       parseStructFields(t, func(key string, df dictField) {
+                               innerGet := df.Get
+                               df.Get = func(value reflect.Value) func(reflect.Value) {
+                                       anonPtr := value.Field(i)
+                                       if anonPtr.Kind() == reflect.Ptr && anonPtr.IsNil() {
+                                               anonPtr.Set(reflect.New(f.Type.Elem()))
+                                               anonPtr = anonPtr.Elem()
+                                       }
+                                       return innerGet(anonPtr)
                                }
+                               each(key, df)
+                       })
+                       continue
+               }
+               tagStr := f.Tag.Get("bencode")
+               if tagStr == "-" {
+                       continue
+               }
+               tag := parseTag(tagStr)
+               key := tag.Key()
+               if key == "" {
+                       key = f.Name
+               }
+               each(key, dictField{f.Type, func(value reflect.Value) func(reflect.Value) {
+                       return value.Field(i).Set
+               }, tag})
+       }
+}
 
-                               tag_name, _ := parse_tag(tag)
-                               if tag_name == d.key {
-                                       ok = true
-                                       break
-                               }
+func saveStructFields(struct_ reflect.Type) {
+       m := make(map[string]dictField)
+       parseStructFields(struct_, func(key string, sf dictField) {
+               m[key] = sf
+       })
+       structFields[struct_] = m
+}
 
-                               if f.Name == d.key {
-                                       ok = true
-                                       break
-                               }
+func getStructFieldForKey(struct_ reflect.Type, key string) (f dictField) {
+       structFieldsMu.Lock()
+       if _, ok := structFields[struct_]; !ok {
+               saveStructFields(struct_)
+       }
+       f, ok := structFields[struct_][key]
+       structFieldsMu.Unlock()
+       if !ok {
+               var discard interface{}
+               return dictField{
+                       Type: reflect.TypeOf(discard),
+                       Get:  func(reflect.Value) func(reflect.Value) { return func(reflect.Value) {} },
+                       Tags: nil,
+               }
+       }
+       return
+}
 
-                               if strings.EqualFold(f.Name, d.key) {
-                                       ok = true
-                                       break
-                               }
-                       }
+func (d *Decoder) parseDict(v reflect.Value) error {
+       // At this point 'd' byte was consumed, now read key/value pairs
+       for {
+               var keyStr string
+               keyValue := reflect.ValueOf(&keyStr).Elem()
+               ok, err := d.parseValue(keyValue)
+               if err != nil {
+                       return fmt.Errorf("error parsing dict key: %w", err)
+               }
+               if !ok {
+                       return nil
+               }
 
-                       if ok {
-                               if f.PkgPath != "" {
-                                       panic(&UnmarshalFieldError{
-                                               Key:   d.key,
-                                               Type:  v.Type(),
-                                               Field: f,
-                                       })
-                               } else {
-                                       valuev = v.FieldByIndex(f.Index)
-                               }
-                       } else {
-                               _, ok := d.parse_value_interface()
-                               if !ok {
-                                       panic(&SyntaxError{
-                                               Offset: d.offset,
-                                               what:   "unexpected end of dict, no matching value for a given key",
-                                       })
-                               }
-                               continue
-                       }
+               df, err := getDictField(v.Type(), keyStr)
+               if err != nil {
+                       return fmt.Errorf("parsing bencode dict into %v: %w", v.Type(), err)
                }
 
                // now we need to actually parse it
-               if !d.parse_value(valuev) {
-                       panic(&SyntaxError{
-                               Offset: d.offset,
-                               what:   "unexpected end of dict, no matching value for a given key",
-                       })
+               if df.Type == nil {
+                       // Discard the value, there's nowhere to put it.
+                       var if_ interface{}
+                       if_, ok = d.parseValueInterface()
+                       if if_ == nil {
+                               return fmt.Errorf("error parsing value for key %q", keyStr)
+                       }
+                       if !ok {
+                               return fmt.Errorf("missing value for key %q", keyStr)
+                       }
+                       continue
                }
-
-               if v.Kind() == reflect.Map {
-                       v.SetMapIndex(keyv, valuev)
+               setValue := reflect.New(df.Type).Elem()
+               // log.Printf("parsing into %v", setValue.Type())
+               ok, err = d.parseValue(setValue)
+               if err != nil {
+                       var target *UnmarshalTypeError
+                       if !(errors.As(err, &target) && df.Tags.IgnoreUnmarshalTypeError()) {
+                               return fmt.Errorf("parsing value for key %q: %w", keyStr, err)
+                       }
+               }
+               if !ok {
+                       return fmt.Errorf("missing value for key %q", keyStr)
                }
+               df.Get(v)(setValue)
        }
 }
 
-func (d *decoder) parse_list(v reflect.Value) {
+func (d *Decoder) parseList(v reflect.Value) error {
        switch v.Kind() {
-       case reflect.Array, reflect.Slice:
        default:
-               panic(&UnmarshalTypeError{
-                       Value: "array",
-                       Type:  v.Type(),
-               })
+               // If the list is a singleton of the expected type, use that value. See
+               // https://github.com/anacrolix/torrent/issues/297.
+               l := reflect.New(reflect.SliceOf(v.Type()))
+               if err := d.parseList(l.Elem()); err != nil {
+                       return err
+               }
+               if l.Elem().Len() != 1 {
+                       return &UnmarshalTypeError{
+                               BencodeTypeName:     "list",
+                               UnmarshalTargetType: v.Type(),
+                       }
+               }
+               v.Set(l.Elem().Index(0))
+               return nil
+       case reflect.Array, reflect.Slice:
+               // We can work with this. Normal case, fallthrough.
        }
 
        i := 0
-       for {
+       for ; ; i++ {
                if v.Kind() == reflect.Slice && i >= v.Len() {
                        v.Set(reflect.Append(v, reflect.Zero(v.Type().Elem())))
                }
 
-               ok := false
                if i < v.Len() {
-                       ok = d.parse_value(v.Index(i))
+                       ok, err := d.parseValue(v.Index(i))
+                       if err != nil {
+                               return err
+                       }
+                       if !ok {
+                               break
+                       }
                } else {
-                       _, ok = d.parse_value_interface()
-               }
-
-               if !ok {
-                       break
+                       _, ok := d.parseValueInterface()
+                       if !ok {
+                               break
+                       }
                }
-
-               i++
        }
 
        if i < v.Len() {
@@ -319,92 +486,82 @@ func (d *decoder) parse_list(v reflect.Value) {
        if i == 0 && v.Kind() == reflect.Slice {
                v.Set(reflect.MakeSlice(v.Type(), 0, 0))
        }
+       return nil
 }
 
-func (d *decoder) read_one_value() bool {
-       b, err := d.ReadByte()
+func (d *Decoder) readOneValue() bool {
+       b, err := d.r.ReadByte()
        if err != nil {
                panic(err)
        }
        if b == 'e' {
-               d.UnreadByte()
+               d.r.UnreadByte()
                return false
        } else {
-               d.offset++
+               d.Offset++
                d.buf.WriteByte(b)
        }
 
        switch b {
        case 'd', 'l':
                // read until there is nothing to read
-               for d.read_one_value() {
+               for d.readOneValue() {
                }
                // consume 'e' as well
-               b = d.read_byte()
+               b = d.readByte()
                d.buf.WriteByte(b)
        case 'i':
-               d.read_until('e')
+               d.readUntil('e')
                d.buf.WriteString("e")
        default:
                if b >= '0' && b <= '9' {
                        start := d.buf.Len() - 1
-                       d.read_until(':')
-                       length, err := strconv.ParseInt(d.buf.String()[start:], 10, 64)
-                       check_for_int_parse_error(err, d.offset-1)
+                       d.readUntil(':')
+                       length, err := strconv.ParseInt(bytesAsString(d.buf.Bytes()[start:]), 10, 64)
+                       checkForIntParseError(err, d.Offset-1)
 
                        d.buf.WriteString(":")
-                       n, err := io.CopyN(&d.buf, d, length)
-                       d.offset += n
+                       n, err := io.CopyN(&d.buf, d.r, length)
+                       d.Offset += n
                        if err != nil {
-                               check_for_unexpected_eof(err, d.offset)
+                               checkForUnexpectedEOF(err, d.Offset)
                                panic(&SyntaxError{
-                                       Offset: d.offset,
-                                       what:   "unexpected I/O error: " + err.Error(),
+                                       Offset: d.Offset,
+                                       What:   errors.New("unexpected I/O error: " + err.Error()),
                                })
                        }
                        break
                }
 
-               // unknown value
-               panic(&SyntaxError{
-                       Offset: d.offset - 1,
-                       what:   "unknown value type (invalid bencode?)",
-               })
+               d.raiseUnknownValueType(b, d.Offset-1)
        }
 
        return true
-
 }
 
-func (d *decoder) parse_unmarshaler(v reflect.Value) bool {
-       m, ok := v.Interface().(Unmarshaler)
-       if !ok {
-               // T doesn't work, try *T
-               if v.Kind() != reflect.Ptr && v.CanAddr() {
-                       m, ok = v.Addr().Interface().(Unmarshaler)
-                       if ok {
-                               v = v.Addr()
-                       }
+func (d *Decoder) parseUnmarshaler(v reflect.Value) bool {
+       if !v.Type().Implements(unmarshalerType) {
+               if v.Addr().Type().Implements(unmarshalerType) {
+                       v = v.Addr()
+               } else {
+                       return false
                }
        }
-       if ok && (v.Kind() != reflect.Ptr || !v.IsNil()) {
-               if d.read_one_value() {
-                       err := m.UnmarshalBencode(d.buf.Bytes())
-                       d.buf.Reset()
-                       if err != nil {
-                               panic(&UnmarshalerError{v.Type(), err})
-                       }
-                       return true
-               }
-               d.buf.Reset()
+       d.buf.Reset()
+       if !d.readOneValue() {
+               return false
        }
-
-       return false
+       m := v.Interface().(Unmarshaler)
+       err := m.UnmarshalBencode(d.buf.Bytes())
+       if err != nil {
+               panic(&UnmarshalerError{v.Type(), err})
+       }
+       return true
 }
 
-// returns true if there was a value and it's now stored in 'v', otherwise there
-// was an end symbol ("e") and no value was stored
-func (d *decoder) parse_value(v reflect.Value) bool {
+// Returns true if there was a value and it's now stored in 'v', otherwise
+// there was an end symbol ("e") and no value was stored.
+func (d *Decoder) parseValue(v reflect.Value) (bool, error) {
        // we support one level of indirection at the moment
        if v.Kind() == reflect.Ptr {
                // if the pointer is nil, allocate a new element of the type it
@@ -415,128 +572,141 @@ func (d *decoder) parse_value(v reflect.Value) bool {
                v = v.Elem()
        }
 
-       if d.parse_unmarshaler(v) {
-               return true
+       if d.parseUnmarshaler(v) {
+               return true, nil
        }
 
        // common case: interface{}
        if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
-               iface, _ := d.parse_value_interface()
+               iface, _ := d.parseValueInterface()
                v.Set(reflect.ValueOf(iface))
-               return true
+               return true, nil
        }
 
-       b, err := d.ReadByte()
+       b, err := d.r.ReadByte()
        if err != nil {
-               panic(err)
+               return false, err
        }
-       d.offset++
+       d.Offset++
 
        switch b {
        case 'e':
-               return false
+               return false, nil
        case 'd':
-               d.parse_dict(v)
+               return true, d.parseDict(v)
        case 'l':
-               d.parse_list(v)
+               return true, d.parseList(v)
        case 'i':
-               d.parse_int(v)
+               return true, d.parseInt(v)
        default:
                if b >= '0' && b <= '9' {
-                       // string
-                       // append first digit of the length to the buffer
+                       // It's a string.
+                       d.buf.Reset()
+                       // Write the first digit of the length to the buffer.
                        d.buf.WriteByte(b)
-                       d.parse_string(v)
-                       break
+                       return true, d.parseString(v)
                }
 
-               // unknown value
-               panic(&SyntaxError{
-                       Offset: d.offset - 1,
-                       what:   "unknown value type (invalid bencode?)",
-               })
+               d.raiseUnknownValueType(b, d.Offset-1)
        }
+       panic("unreachable")
+}
 
-       return true
+// An unknown bencode type character was encountered.
+func (d *Decoder) raiseUnknownValueType(b byte, offset int64) {
+       panic(&SyntaxError{
+               Offset: offset,
+               What:   fmt.Errorf("unknown value type %+q", b),
+       })
 }
 
-func (d *decoder) parse_value_interface() (interface{}, bool) {
-       b, err := d.ReadByte()
+func (d *Decoder) parseValueInterface() (interface{}, bool) {
+       b, err := d.r.ReadByte()
        if err != nil {
                panic(err)
        }
-       d.offset++
+       d.Offset++
 
        switch b {
        case 'e':
                return nil, false
        case 'd':
-               return d.parse_dict_interface(), true
+               return d.parseDictInterface(), true
        case 'l':
-               return d.parse_list_interface(), true
+               return d.parseListInterface(), true
        case 'i':
-               return d.parse_int_interface(), true
+               return d.parseIntInterface(), true
        default:
                if b >= '0' && b <= '9' {
                        // string
                        // append first digit of the length to the buffer
                        d.buf.WriteByte(b)
-                       return d.parse_string_interface(), true
+                       return d.parseStringInterface(), true
                }
 
-               // unknown value
-               panic(&SyntaxError{
-                       Offset: d.offset - 1,
-                       what:   "unknown value type (invalid bencode?)",
-               })
+               d.raiseUnknownValueType(b, d.Offset-1)
+               panic("unreachable")
        }
-       panic("unreachable")
 }
 
-func (d *decoder) parse_int_interface() interface{} {
-       start := d.offset - 1
-       d.read_until('e')
-       if d.buf.Len() == 0 {
-               panic(&SyntaxError{
-                       Offset: start,
-                       what:   "empty integer value",
-               })
-       }
+// Called after 'i', for an arbitrary integer size.
+func (d *Decoder) parseIntInterface() (ret interface{}) {
+       start := d.Offset - 1
 
+       if err := d.readInt(); err != nil {
+               panic(err)
+       }
        n, err := strconv.ParseInt(d.buf.String(), 10, 64)
-       check_for_int_parse_error(err, start)
+       if ne, ok := err.(*strconv.NumError); ok && ne.Err == strconv.ErrRange {
+               i := new(big.Int)
+               _, ok := i.SetString(d.buf.String(), 10)
+               if !ok {
+                       panic(&SyntaxError{
+                               Offset: start,
+                               What:   errors.New("failed to parse integer"),
+                       })
+               }
+               ret = i
+       } else {
+               checkForIntParseError(err, start)
+               ret = n
+       }
+
        d.buf.Reset()
-       return n
+       return
 }
 
-func (d *decoder) parse_string_interface() interface{} {
-       start := d.offset - 1
-
-       // read the string length first
-       d.read_until(':')
-       length, err := strconv.ParseInt(d.buf.String(), 10, 64)
-       check_for_int_parse_error(err, start)
-
-       d.buf.Reset()
-       n, err := io.CopyN(&d.buf, d, length)
-       d.offset += n
+func (d *Decoder) readBytes(length int) []byte {
+       b, err := io.ReadAll(io.LimitReader(d.r, int64(length)))
        if err != nil {
-               check_for_unexpected_eof(err, d.offset)
-               panic(&SyntaxError{
-                       Offset: d.offset,
-                       what:   "unexpected I/O error: " + err.Error(),
-               })
+               panic(err)
+       }
+       if len(b) != length {
+               panic(fmt.Errorf("read %v bytes expected %v", len(b), length))
        }
+       return b
+}
 
-       s := d.buf.String()
-       d.buf.Reset()
-       return s
+func (d *Decoder) parseStringInterface() string {
+       length, err := d.parseStringLength()
+       if err != nil {
+               panic(err)
+       }
+       b := d.readBytes(int(length))
+       d.Offset += int64(len(b))
+       if err != nil {
+               panic(&SyntaxError{Offset: d.Offset, What: err})
+       }
+       return bytesAsString(b)
 }
 
-func (d *decoder) parse_dict_interface() interface{} {
+func (d *Decoder) parseDictInterface() interface{} {
        dict := make(map[string]interface{})
+       var lastKey string
+       lastKeyOk := false
        for {
-               keyi, ok := d.parse_value_interface()
+               start := d.Offset
+               keyi, ok := d.parseValueInterface()
                if !ok {
                        break
                }
@@ -544,36 +714,39 @@ func (d *decoder) parse_dict_interface() interface{} {
                key, ok := keyi.(string)
                if !ok {
                        panic(&SyntaxError{
-                               Offset: d.offset,
-                               what:   "non-string key in a dict",
+                               Offset: d.Offset,
+                               What:   errors.New("non-string key in a dict"),
                        })
                }
-
-               valuei, ok := d.parse_value_interface()
+               if lastKeyOk && key <= lastKey {
+                       d.throwSyntaxError(start, fmt.Errorf("dict keys unsorted: %q <= %q", key, lastKey))
+               }
+               start = d.Offset
+               valuei, ok := d.parseValueInterface()
                if !ok {
-                       panic(&SyntaxError{
-                               Offset: d.offset,
-                               what:   "unexpected end of dict, no matching value for a given key",
-                       })
+                       d.throwSyntaxError(start, fmt.Errorf("dict elem missing value [key=%v]", key))
                }
 
+               lastKey = key
+               lastKeyOk = true
                dict[key] = valuei
        }
        return dict
 }
 
-func (d *decoder) parse_list_interface() interface{} {
-       var list []interface{}
-       for {
-               valuei, ok := d.parse_value_interface()
-               if !ok {
-                       break
-               }
-
+func (d *Decoder) parseListInterface() (list []interface{}) {
+       list = []interface{}{}
+       valuei, ok := d.parseValueInterface()
+       for ok {
                list = append(list, valuei)
+               valuei, ok = d.parseValueInterface()
        }
-       if list == nil {
-               list = make([]interface{}, 0, 0)
+       return
+}
+
+func (d *Decoder) getMaxStrLen() int64 {
+       if d.MaxStrLen == 0 {
+               return DefaultDecodeMaxStrLen
        }
-       return list
+       return d.MaxStrLen
 }