minio
diff --git a/‎README.md‎
Lines changed: 22 additions & 0 deletions b/‎README.md‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎parse_json_amd64.go‎
Lines changed: 6 additions & 6 deletions b/‎parse_json_amd64.go‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎parse_json_amd64_test.go‎
Lines changed: 26 additions & 17 deletions b/‎parse_json_amd64_test.go‎
Lines changed: 26 additions & 17 deletions
diff --git a/‎parse_number_amd64.go‎
Lines changed: 21 additions & 11 deletions b/‎parse_number_amd64.go‎
Lines changed: 21 additions & 11 deletions
diff --git a/‎parse_number_test.go‎
Lines changed: 1 addition & 1 deletion b/‎parse_number_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎parsed_json.go‎
Lines changed: 65 additions & 7 deletions b/‎parsed_json.go‎
Lines changed: 65 additions & 7 deletions
@@ -133,6 +133,28 @@ method to get an iterator.
 There are methods that allow you to retrieve all elements as a single type, 
 []int64, []uint64, float64 and strings.  
 
+## Number parsing
+
+Numbers in JSON are untyped and are returned by the following rules in order:
+
+* If there is any float point notation, like exponents, or a dot notation, it is always returned as float.
+* If number is a pure integer and it fits within an int64 it is returned as such.
+* If number is a pure positive integer and fits within a uint64 it is returned as such.
+* If the number is valid number it is returned as float64.
+
+If the number was converted from integer notation to a float due to not fitting inside int64/uint64
+the `FloatOverflowedInteger` flag is set, which can be retrieved using `(Iter).FloatFlags()` method.  
+
+JSON numbers follow JavaScript’s double-precision floating-point format.
+
+* Represented in base 10 with no superfluous leading zeros (e.g. 67, 1, 100).
+* Include digits between 0 and 9.
+* Can be a negative number (e.g. -10).
+* Can be a fraction (e.g. .5).
+* Can also have an exponent of 10, prefixed by e or E with a plus or minus sign to indicate positive or negative exponentiation.
+* Octal and hexadecimal formats are not supported.
+* Can not have a value of NaN (Not A Number) or Infinity.
+
 ## Parsing NDSJON stream
 
 Newline delimited json is sent as packets with each line being a root element.
 
@@ -42,10 +42,10 @@ func (pj *internalParsedJson) initialize(size int) {
 		pj.Strings = make([]byte, 0, stringsSize)
 	}
 	pj.Strings = pj.Strings[:0]
-	if cap(pj.containing_scope_offset) < maxdepth {
-		pj.containing_scope_offset = make([]uint64, 0, maxdepth)
+	if cap(pj.containingScopeOffset) < maxdepth {
+		pj.containingScopeOffset = make([]uint64, 0, maxdepth)
 	}
-	pj.containing_scope_offset = pj.containing_scope_offset[:0]
+	pj.containingScopeOffset = pj.containingScopeOffset[:0]
 }
 
 func (pj *internalParsedJson) parseMessage(msg []byte) error {
@@ -75,8 +75,8 @@ func (pj *internalParsedJson) parseMessageInternal(msg []byte, ndjson bool) (err
 	// Make the capacity of the channel smaller than the number of slots.
 	// This way the sender will automatically block until the consumer
 	// has finished the slot it is working on.
-	pj.index_chan = make(chan indexChan, indexSlots-2)
-	pj.buffers_offset = ^uint64(0)
+	pj.indexChans = make(chan indexChan, indexSlots-2)
+	pj.buffersOffset = ^uint64(0)
 
 	var errStage1 error
 	go func() {
@@ -89,7 +89,7 @@ func (pj *internalParsedJson) parseMessageInternal(msg []byte, ndjson bool) (err
 		if !unifiedMachine(pj.Message, pj) {
 			err = errors.New("Bad parsing while executing stage 2")
 			// drain the channel until empty
-			for range pj.index_chan {
+			for range pj.indexChans {
 			}
 		}
 		wg.Done()
 
@@ -96,7 +96,7 @@ func BenchmarkNdjsonStage1(b *testing.B) {
 
 	for i := 0; i < b.N; i++ {
 		// Create new channel (large enough so we won't block)
-		pj.index_chan = make(chan indexChan, 128*10240)
+		pj.indexChans = make(chan indexChan, 128*10240)
 		findStructuralIndices([]byte(ndjson), &pj)
 	}
 }
@@ -210,24 +210,30 @@ func TestParseNumber(t *testing.T) {
 		expectedD float64
 		expectedI int64
 		expectedU uint64
+		flags     FloatFlags
 	}{
-		{"1", TagInteger, 0.0, 1, 0},
-		{"-1", TagInteger, 0.0, -1, 0},
-		{"10000000000000000000", TagUint, 0.0, 0, 10000000000000000000},
-		{"10000000000000000001", TagUint, 0.0, 0, 10000000000000000001},
-		{"-10000000000000000000", TagFloat, -10000000000000000000, 0, 0},
-		{"1.0", TagFloat, 1.0, 0, 0},
-		{"1234567890", TagInteger, 0.0, 1234567890, 0},
-		{"9876.543210", TagFloat, 9876.543210, 0, 0},
-		{"0.123456789e-12", TagFloat, 1.23456789e-13, 0, 0},
-		{"1.234567890E+34", TagFloat, 1.234567890e+34, 0, 0},
-		{"23456789012E66", TagFloat, 23456789012e66, 0, 0},
-		{"-9876.543210", TagFloat, -9876.543210, 0, 0},
-		{"-65.619720000000029", TagFloat, -65.61972000000003, 0, 0},
+		{input: "1", wantTag: TagInteger, expectedI: 1},
+		{input: "-1", wantTag: TagInteger, expectedI: -1},
+		{input: "10000000000000000000", wantTag: TagUint, expectedU: 10000000000000000000},
+		{input: "10000000000000000001", wantTag: TagUint, expectedU: 10000000000000000001},
+		// math.MinInt64 - 1
+		{input: "-9223372036854775809", wantTag: TagFloat, expectedD: -9.223372036854776e+18, flags: FloatOverflowedInteger.Flags()},
+		{input: "-10000000000000000000", wantTag: TagFloat, expectedD: -10000000000000000000, flags: FloatOverflowedInteger.Flags()},
+		{input: "100000000000000000000", wantTag: TagFloat, expectedD: 100000000000000000000, flags: FloatOverflowedInteger.Flags()},
+		// math.MaxUint64 +1
+		{input: "18446744073709551616", wantTag: TagFloat, expectedD: 1.8446744073709552e+19, flags: FloatOverflowedInteger.Flags()},
+		{input: "1.0", wantTag: TagFloat, expectedD: 1.0},
+		{input: "1234567890", wantTag: TagInteger, expectedI: 1234567890},
+		{input: "9876.543210", wantTag: TagFloat, expectedD: 9876.543210},
+		{input: "0.123456789e-12", wantTag: TagFloat, expectedD: 1.23456789e-13},
+		{input: "1.234567890E+34", wantTag: TagFloat, expectedD: 1.234567890e+34},
+		{input: "23456789012E66", wantTag: TagFloat, expectedD: 23456789012e66},
+		{input: "-9876.543210", wantTag: TagFloat, expectedD: -9876.543210},
+		{input: "-65.619720000000029", wantTag: TagFloat, expectedD: -65.61972000000003},
 	}
 
 	for _, tc := range testCases {
-		tag, val := parseNumber([]byte(fmt.Sprintf(`%s:`, tc.input)))
+		tag, val, flags := parseNumber([]byte(fmt.Sprintf(`%s:`, tc.input)))
 		if tag != tc.wantTag {
 			t.Errorf("TestParseNumber: got: %v want: %v", tag, tc.wantTag)
 		}
@@ -246,6 +252,9 @@ func TestParseNumber(t *testing.T) {
 				t.Errorf("TestParseNumber: got: %d want: %d", val, tc.expectedU)
 			}
 		}
+		if flags != uint64(tc.flags) {
+			t.Errorf("TestParseNumber flags; got: %d want: %d", flags, tc.flags)
+		}
 	}
 }
 
@@ -295,7 +304,7 @@ func TestParseInt64(t *testing.T) {
 		test := &parseInt64Tests[i]
 		t.Run(test.in, func(t *testing.T) {
 
-			tag, val := parseNumber([]byte(fmt.Sprintf(`%s:`, test.in)))
+			tag, val, _ := parseNumber([]byte(fmt.Sprintf(`%s:`, test.in)))
 			if tag != test.tag {
 				// Ignore intentionally bad syntactical errors
 				t.Errorf("TestParseInt64: got: %v want: %v", tag, test.tag)
@@ -478,7 +487,7 @@ func TestParseFloat64(t *testing.T) {
 	for i := 0; i < len(atoftests); i++ {
 		test := &atoftests[i]
 		t.Run(test.in, func(t *testing.T) {
-			tag, val := parseNumber([]byte(fmt.Sprintf(`%s:`, test.in)))
+			tag, val, _ := parseNumber([]byte(fmt.Sprintf(`%s:`, test.in)))
 			switch tag {
 			case TagEnd:
 				if test.err == nil {
 
@@ -21,6 +21,7 @@
 package simdjson
 
 import (
+	"errors"
 	"math"
 	"strconv"
 )
@@ -63,29 +64,29 @@ var isNumberRune = [256]uint8{
 // parseNumber will parse the number starting in the buffer.
 // Any non-number characters at the end will be ignored.
 // Returns TagEnd if no valid value found be found.
-func parseNumber(buf []byte) (tag Tag, val uint64) {
+func parseNumber(buf []byte) (tag Tag, val, flags uint64) {
 	pos := 0
 	found := uint8(0)
 	for i, v := range buf {
 		t := isNumberRune[v]
 		if t == 0 {
 			//fmt.Println("aborting on", string(v), "in", string(buf[:i]))
-			return TagEnd, 0
+			return TagEnd, 0, 0
 		}
 		if t == isEOVFlag {
 			break
 		}
 		if t&isMustHaveDigitNext > 0 {
 			// A period and minus must be followed by a digit
 			if len(buf) < i+2 || isNumberRune[buf[i+1]]&isDigitFlag == 0 {
-				return TagEnd, 0
+				return TagEnd, 0, 0
 			}
 		}
 		found |= t
 		pos = i + 1
 	}
 	if pos == 0 {
-		return TagEnd, 0
+		return TagEnd, 0, 0
 	}
 	const maxIntLen = 20
 
@@ -94,33 +95,42 @@ func parseNumber(buf []byte) (tag Tag, val uint64) {
 		if found&isMinusFlag == 0 {
 			if pos > 1 && buf[0] == '0' {
 				// Integers cannot have a leading zero.
-				return TagEnd, 0
+				return TagEnd, 0, 0
 			}
 		} else {
 			if pos > 2 && buf[1] == '0' {
 				// Integers cannot have a leading zero after minus.
-				return TagEnd, 0
+				return TagEnd, 0, 0
 			}
 		}
 		i64, err := strconv.ParseInt(string(buf[:pos]), 10, 64)
 		if err == nil {
-			return TagInteger, uint64(i64)
+			return TagInteger, uint64(i64), 0
 		}
+		if errors.Is(err, strconv.ErrRange) {
+			flags |= uint64(FloatOverflowedInteger)
+		}
+
 		if found&isMinusFlag == 0 {
 			u64, err := strconv.ParseUint(string(buf[:pos]), 10, 64)
 			if err == nil {
-				return TagUint, u64
+				return TagUint, u64, 0
+			}
+			if errors.Is(err, strconv.ErrRange) {
+				flags |= uint64(FloatOverflowedInteger)
 			}
 		}
+	} else if found&isFloatOnlyFlag == 0 {
+		flags |= uint64(FloatOverflowedInteger)
 	}
 
 	if pos > 1 && buf[0] == '0' && isNumberRune[buf[1]]&isFloatOnlyFlag == 0 {
 		// Float can only have have a leading 0 when followed by a period.
-		return TagEnd, 0
+		return TagEnd, 0, 0
 	}
 	f64, err := strconv.ParseFloat(string(buf[:pos]), 64)
 	if err == nil {
-		return TagFloat, math.Float64bits(f64)
+		return TagFloat, math.Float64bits(f64), flags
 	}
-	return TagEnd, 0
+	return TagEnd, 0, 0
 }
@@ -31,7 +31,7 @@ func TestNumberIsValid(t *testing.T) {
 	// From: https://stackoverflow.com/a/13340826
 	var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`)
 	isValidNumber := func(s string) bool {
-		tag, _ := parseNumber([]byte(s))
+		tag, _, _ := parseNumber([]byte(s))
 		return tag != TagEnd
 	}
 	validTests := []string{
 
@@ -42,6 +42,32 @@ const STRINGBUFMASK = 0x7fffffffffffff
 
 const maxdepth = 128
 
+// FloatFlags are flags recorded when converting floats.
+type FloatFlags uint64
+
+// FloatFlag is a flag recorded when parsing floats.
+type FloatFlag uint64
+
+const (
+	// FloatOverflowedInteger is set when number in JSON was in integer notation,
+	// but under/overflowed both int64 and uint64 and therefore was parsed as float.
+	FloatOverflowedInteger FloatFlag = 1 << iota
+)
+
+// Contains returns whether f contains the specified flag.
+func (f FloatFlags) Contains(flag FloatFlag) bool {
+	return FloatFlag(f)&flag == flag
+}
+
+// Flags converts the flag to FloatFlags and optionally merges more flags.
+func (f FloatFlag) Flags(more ...FloatFlag) FloatFlags {
+	// We operate on a copy, so we can modify f.
+	for _, v := range more {
+		f |= v
+	}
+	return FloatFlags(f)
+}
+
 type ParsedJson struct {
 	Message []byte
 	Tape    []uint64
@@ -63,13 +89,13 @@ type indexChan struct {
 
 type internalParsedJson struct {
 	ParsedJson
-	containing_scope_offset []uint64
-	isvalid                 bool
-	index_chan              chan indexChan
-	indexesChan             indexChan
-	buffers                 [indexSlots][indexSize]uint32
-	buffers_offset          uint64
-	ndjson                  uint64
+	containingScopeOffset []uint64
+	isvalid               bool
+	indexChans            chan indexChan
+	indexesChan           indexChan
+	buffers               [indexSlots][indexSize]uint32
+	buffersOffset         uint64
+	ndjson                uint64
 }
 
 // Iter returns a new Iter.
@@ -479,6 +505,34 @@ func (i *Iter) Float() (float64, error) {
 	}
 }
 
+// FloatFlags returns the float value of the next element.
+// This will include flags from parsing.
+// Integers are automatically converted to float.
+func (i *Iter) FloatFlags() (float64, FloatFlags, error) {
+	switch i.t {
+	case TagFloat:
+		if i.off >= len(i.tape.Tape) {
+			return 0, 0, errors.New("corrupt input: expected float, but no more values on tape")
+		}
+		v := math.Float64frombits(i.tape.Tape[i.off])
+		return v, 0, nil
+	case TagInteger:
+		if i.off >= len(i.tape.Tape) {
+			return 0, 0, errors.New("corrupt input: expected integer, but no more values on tape")
+		}
+		v := int64(i.tape.Tape[i.off])
+		return float64(v), 0, nil
+	case TagUint:
+		if i.off >= len(i.tape.Tape) {
+			return 0, 0, errors.New("corrupt input: expected integer, but no more values on tape")
+		}
+		v := i.tape.Tape[i.off]
+		return float64(v), FloatFlags(i.cur), nil
+	default:
+		return 0, 0, fmt.Errorf("unable to convert type %v to float", i.t)
+	}
+}
+
 // Int returns the integer value of the next element.
 // Integers and floats within range are automatically converted.
 func (i *Iter) Int() (int64, error) {
@@ -771,6 +825,10 @@ func (pj *ParsedJson) writeTapeTagVal(tag Tag, val uint64) {
 	pj.Tape = append(pj.Tape, uint64(tag)<<56, val)
 }
 
+func (pj *ParsedJson) writeTapeTagValFlags(tag Tag, val, flags uint64) {
+	pj.Tape = append(pj.Tape, uint64(tag)<<56|flags, val)
+}
+
 func (pj *ParsedJson) write_tape_s64(val int64) {
 	pj.writeTapeTagVal(TagInteger, uint64(val))
 }
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`	`package simdjson`
`22`	`22`
`23`	`23`	`import (`
	`24`	`+ "errors"`
`24`	`25`	`"math"`
`25`	`26`	`"strconv"`
`26`	`27`	`)`
`@@ -63,29 +64,29 @@ var isNumberRune = [256]uint8{`
`63`	`64`	`// parseNumber will parse the number starting in the buffer.`
`64`	`65`	`// Any non-number characters at the end will be ignored.`
`65`	`66`	`// Returns TagEnd if no valid value found be found.`
`66`		`-func parseNumber(buf []byte) (tag Tag, val uint64) {`
	`67`	`+func parseNumber(buf []byte) (tag Tag, val, flags uint64) {`
`67`	`68`	`pos := 0`
`68`	`69`	`found := uint8(0)`
`69`	`70`	`for i, v := range buf {`
`70`	`71`	`t := isNumberRune[v]`
`71`	`72`	`if t == 0 {`
`72`	`73`	`//fmt.Println("aborting on", string(v), "in", string(buf[:i]))`
`73`		`- return TagEnd, 0`
	`74`	`+ return TagEnd, 0, 0`
`74`	`75`	`}`
`75`	`76`	`if t == isEOVFlag {`
`76`	`77`	`break`
`77`	`78`	`}`
`78`	`79`	`if t&isMustHaveDigitNext > 0 {`
`79`	`80`	`// A period and minus must be followed by a digit`
`80`	`81`	`if len(buf) < i+2 \|\| isNumberRune[buf[i+1]]&isDigitFlag == 0 {`
`81`		`- return TagEnd, 0`
	`82`	`+ return TagEnd, 0, 0`
`82`	`83`	`}`
`83`	`84`	`}`
`84`	`85`	`found \|= t`
`85`	`86`	`pos = i + 1`
`86`	`87`	`}`
`87`	`88`	`if pos == 0 {`
`88`		`- return TagEnd, 0`
	`89`	`+ return TagEnd, 0, 0`
`89`	`90`	`}`
`90`	`91`	`const maxIntLen = 20`
`91`	`92`
`@@ -94,33 +95,42 @@ func parseNumber(buf []byte) (tag Tag, val uint64) {`
`94`	`95`	`if found&isMinusFlag == 0 {`
`95`	`96`	`if pos > 1 && buf[0] == '0' {`
`96`	`97`	`// Integers cannot have a leading zero.`
`97`		`- return TagEnd, 0`
	`98`	`+ return TagEnd, 0, 0`
`98`	`99`	`}`
`99`	`100`	`} else {`
`100`	`101`	`if pos > 2 && buf[1] == '0' {`
`101`	`102`	`// Integers cannot have a leading zero after minus.`
`102`		`- return TagEnd, 0`
	`103`	`+ return TagEnd, 0, 0`
`103`	`104`	`}`
`104`	`105`	`}`
`105`	`106`	`i64, err := strconv.ParseInt(string(buf[:pos]), 10, 64)`
`106`	`107`	`if err == nil {`
`107`		`- return TagInteger, uint64(i64)`
	`108`	`+ return TagInteger, uint64(i64), 0`
`108`	`109`	`}`
	`110`	`+ if errors.Is(err, strconv.ErrRange) {`
	`111`	`+ flags \|= uint64(FloatOverflowedInteger)`
	`112`	`+ }`
	`113`	`+`
`109`	`114`	`if found&isMinusFlag == 0 {`
`110`	`115`	`u64, err := strconv.ParseUint(string(buf[:pos]), 10, 64)`
`111`	`116`	`if err == nil {`
`112`		`- return TagUint, u64`
	`117`	`+ return TagUint, u64, 0`
	`118`	`+ }`
	`119`	`+ if errors.Is(err, strconv.ErrRange) {`
	`120`	`+ flags \|= uint64(FloatOverflowedInteger)`
`113`	`121`	`}`
`114`	`122`	`}`
	`123`	`+ } else if found&isFloatOnlyFlag == 0 {`
	`124`	`+ flags \|= uint64(FloatOverflowedInteger)`
`115`	`125`	`}`
`116`	`126`
`117`	`127`	`if pos > 1 && buf[0] == '0' && isNumberRune[buf[1]]&isFloatOnlyFlag == 0 {`
`118`	`128`	`// Float can only have have a leading 0 when followed by a period.`
`119`		`- return TagEnd, 0`
	`129`	`+ return TagEnd, 0, 0`
`120`	`130`	`}`
`121`	`131`	`f64, err := strconv.ParseFloat(string(buf[:pos]), 64)`
`122`	`132`	`if err == nil {`
`123`		`- return TagFloat, math.Float64bits(f64)`
	`133`	`+ return TagFloat, math.Float64bits(f64), flags`
`124`	`134`	`}`
`125`		`- return TagEnd, 0`
	`135`	`+ return TagEnd, 0, 0`
`126`	`136`	`}`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ func TestNumberIsValid(t *testing.T) {`
`31`	`31`	`// From: https://stackoverflow.com/a/13340826`
`32`	`32`	var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0\|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`)
`33`	`33`	`isValidNumber := func(s string) bool {`
`34`		`- tag, _ := parseNumber([]byte(s))`
	`34`	`+ tag, _, _ := parseNumber([]byte(s))`
`35`	`35`	`return tag != TagEnd`
`36`	`36`	`}`
`37`	`37`	`validTests := []string{`