Merge pull request #88 from francoispqt/fix/parsing-big-floats-truncate - gojay - high performance JSON encoder/decoder with stream API for Golang

commit ba96878038b97df0e7631a0d305060eceacede52
parent 688c5d008625b62011496858a3e55f852dccd40f
Author: Francois Parquet <francois.parquet@gmail.com>
Date:   Sun, 28 Oct 2018 02:08:36 +0800

Merge pull request #88 from francoispqt/fix/parsing-big-floats-truncate

make parsing of big floats possible by truncating them at high precision
Diffstat:
A benchmarks/decoder/decoder_bench_float_test.go  | 28 ++++++++++++++++++++++++++++
M decode_number.go  | 3 ++-
M decode_number_float.go  | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
M decode_number_float_test.go  | 37 +++++++++++++++++++++----------------

4 files changed, 102 insertions(+), 36 deletions(-)
diff --git a/benchmarks/decoder/decoder_bench_float_test.go b/benchmarks/decoder/decoder_bench_float_test.go
@@ -0,0 +1,28 @@
+package benchmarks
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/francoispqt/gojay"
+)
+
+var bigf = []byte(`0.00058273999999999999`)
+
+// BenchmarkBigFloatEncodingJSON decodes a big float with the standard package
+func BenchmarkBigFloatEncodingJSON(b *testing.B) {
+	b.ReportAllocs()
+	for n := 0; n < b.N; n++ {
+		var f float64
+		var _ = json.Unmarshal(bigf, &f)
+	}
+}
+
+// BenchmarkBigFloatGojay decodes a big float with gojay
+func BenchmarkBigFloatGojay(b *testing.B) {
+	b.ReportAllocs()
+	for n := 0; n < b.N; n++ {
+		var f float64
+		var _ = gojay.Unmarshal(bigf, &f)
+	}
+}
diff --git a/decode_number.go b/decode_number.go
@@ -24,7 +24,7 @@ const maxInt16Length = 5
 const maxInt8Length = 3
 const invalidNumber = int8(-1)
 
-var pow10uint64 = [20]uint64{
+var pow10uint64 = [21]uint64{
 	0,
 	1,
 	10,
@@ -45,6 +45,7 @@ var pow10uint64 = [20]uint64{
 	10000000000000000,
 	100000000000000000,
 	1000000000000000000,
+	10000000000000000000,
 }
 
 var skipNumberEndCursorIncrement [256]int
diff --git a/decode_number_float.go b/decode_number_float.go
@@ -120,27 +120,37 @@ func (dec *Decoder) getFloat() (float64, error) {
 			// then we get part after decimal as integer
 			start = j + 1
 			// get number after the decimal point
-			// multiple the before decimal point portion by 10 using bitwise
 			for i := j + 1; i < dec.length || dec.read(); i++ {
 				c := dec.data[i]
 				if isDigit(c) {
 					end = i
-					beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1)
+					// multiply the before decimal point portion by 10 using bitwise
+					// make sure it doesn't overflow
+					if end-start < 18 {
+						beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1)
+					}
 					continue
 				} else if (c == 'e' || c == 'E') && j < i-1 {
-					afterDecimal := dec.atoi64(start, end)
-					dec.cursor = i + 1
+					// we have an exponent, convert first the value we got before the exponent
+					var afterDecimal int64
 					expI := end - start + 2
+					// if exp is too long, it means number is too long, just truncate the number
 					if expI >= len(pow10uint64) || expI < 0 {
-						return 0, dec.raiseInvalidJSONErr(dec.cursor)
+						expI = len(pow10uint64) - 2
+						afterDecimal = dec.atoi64(start, start+expI-2)
+					} else {
+						// then we add both integers
+						// then we divide the number by the power found
+						afterDecimal = dec.atoi64(start, end)
 					}
+					dec.cursor = i + 1
 					pow := pow10uint64[expI]
 					floatVal := float64(beforeDecimal+afterDecimal) / float64(pow)
 					exp, err := dec.getExponent()
 					if err != nil {
 						return 0, err
 					}
-					pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1 // abs
+					pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1 // absolute exponent
 					if pExp >= int64(len(pow10uint64)) || pExp < 0 {
 						return 0, dec.raiseInvalidJSONErr(dec.cursor)
 					}
@@ -156,13 +166,18 @@ func (dec *Decoder) getFloat() (float64, error) {
 			if end >= dec.length || end < start {
 				return 0, dec.raiseInvalidJSONErr(dec.cursor)
 			}
-			// then we add both integers
-			// then we divide the number by the power found
-			afterDecimal := dec.atoi64(start, end)
+			var afterDecimal int64
 			expI := end - start + 2
+			// if exp is too long, it means number is too long, just truncate the number
 			if expI >= len(pow10uint64) || expI < 0 {
-				return 0, dec.raiseInvalidJSONErr(dec.cursor)
+				expI = 19
+				afterDecimal = dec.atoi64(start, start+expI-2)
+			} else {
+				// then we add both integers
+				// then we divide the number by the power found
+				afterDecimal = dec.atoi64(start, end)
 			}
+
 			pow := pow10uint64[expI]
 			return float64(beforeDecimal+afterDecimal) / float64(pow), nil
 		case 'e', 'E':
@@ -318,15 +333,27 @@ func (dec *Decoder) getFloat32() (float32, error) {
 				c := dec.data[i]
 				if isDigit(c) {
 					end = i
-					beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1)
+					// multiply the before decimal point portion by 10 using bitwise
+					// make sure it desn't overflow
+					if end-start < 9 {
+						beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1)
+					}
 					continue
 				} else if (c == 'e' || c == 'E') && j < i-1 {
-					afterDecimal := dec.atoi32(start, end)
-					dec.cursor = i + 1
+					// then we add both integers
+					// then we divide the number by the power found
+					var afterDecimal int32
 					expI := end - start + 2
-					if expI >= len(pow10uint64) || expI < 0 {
-						return 0, dec.raiseInvalidJSONErr(dec.cursor)
+					// if exp is too long, it means number is too long, just truncate the number
+					if expI >= 12 || expI < 0 {
+						expI = 10
+						afterDecimal = dec.atoi32(start, start+expI-2)
+					} else {
+						// then we add both integers
+						// then we divide the number by the power found
+						afterDecimal = dec.atoi32(start, end)
 					}
+					dec.cursor = i + 1
 					pow := pow10uint64[expI]
 					floatVal := float32(beforeDecimal+afterDecimal) / float32(pow)
 					exp, err := dec.getExponent()
@@ -351,10 +378,16 @@ func (dec *Decoder) getFloat32() (float32, error) {
 			}
 			// then we add both integers
 			// then we divide the number by the power found
-			afterDecimal := dec.atoi32(start, end)
+			var afterDecimal int32
 			expI := end - start + 2
-			if expI >= len(pow10uint64) || expI < 0 {
-				return 0, dec.raiseInvalidJSONErr(dec.cursor)
+			// if exp is too long, it means number is too long, just truncate the number
+			if expI >= 12 || expI < 0 {
+				expI = 10
+				afterDecimal = dec.atoi32(start, start+expI-2)
+			} else {
+				// then we add both integers
+				// then we divide the number by the power found
+				afterDecimal = dec.atoi32(start, end)
 			}
 			pow := pow10uint64[expI]
 			return float32(beforeDecimal+afterDecimal) / float32(pow), nil
@@ -368,7 +401,6 @@ func (dec *Decoder) getFloat32() (float32, error) {
 				return 0, err
 			}
 			pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1
-			// log.Print(exp, " after")
 			if pExp >= int64(len(pow10uint64)) || pExp < 0 {
 				return 0, dec.raiseInvalidJSONErr(dec.cursor)
 			}
diff --git a/decode_number_float_test.go b/decode_number_float_test.go
@@ -218,10 +218,14 @@ func TestDecoderFloat64(t *testing.T) {
 			err:            true,
 		},
 		{
-			name:           "basic-exp-too-big",
+			name:           "big float",
 			json:           "1.00232492420002423545849009",
-			expectedResult: 0,
-			err:            true,
+			expectedResult: 1.002325,
+		},
+		{
+			name:           "big float",
+			json:           "5620.1400000000003",
+			expectedResult: 5620.14,
 		},
 		{
 			name:           "basic-exp-too-big",
@@ -262,6 +266,11 @@ func TestDecoderFloat64(t *testing.T) {
 			err:            true,
 			errType:        InvalidUnmarshalError(""),
 		},
+		{
+			name:           "big float",
+			json:           "5620.1400000000003",
+			expectedResult: 5620.1400000000003,
+		},
 	}
 	for _, testCase := range testCases {
 		t.Run(testCase.name, func(t *testing.T) {
@@ -282,7 +291,7 @@ func TestDecoderFloat64(t *testing.T) {
 				assert.Nil(t, err, "Err must be nil")
 			}
 			if !testCase.skipResult {
-				assert.Equal(t, testCase.expectedResult*1000000, math.Round(v*1000000), fmt.Sprintf("v must be equal to %f", testCase.expectedResult))
+				assert.Equal(t, math.Round(testCase.expectedResult*1000000), math.Round(v*1000000), fmt.Sprintf("v must be equal to %f", testCase.expectedResult))
 			}
 		})
 	}
@@ -540,16 +549,15 @@ func TestDecoderFloat64Null(t *testing.T) {
 		{
 			name:           "basic-exp-too-big",
 			json:           "0e9223372036000000000 ",
-			expectedResult: 0,
+			expectedResult: 1,
 			err:            true,
 			resultIsNil:    true,
 		},
 		{
 			name:           "basic-exp-too-big",
 			json:           "1.00232492420002423545849009",
-			expectedResult: 0,
-			err:            true,
-			resultIsNil:    true,
+			expectedResult: 1.002325,
+			resultIsNil:    false,
 		},
 		{
 			name:           "basic-exp-too-big",
@@ -618,7 +626,7 @@ func TestDecoderFloat64Null(t *testing.T) {
 			if testCase.resultIsNil {
 				assert.Nil(t, v)
 			} else {
-				assert.Equal(t, testCase.expectedResult*1000000, math.Round(*v*1000000), fmt.Sprintf("v must be equal to %f", testCase.expectedResult))
+				assert.Equal(t, math.Round(testCase.expectedResult*1000000), math.Round(*v*1000000), fmt.Sprintf("v must be equal to %f", testCase.expectedResult))
 			}
 		})
 	}
@@ -822,8 +830,7 @@ func TestDecoderFloat32(t *testing.T) {
 		{
 			name:           "basic-exp-too-big",
 			json:           "1.00232492420002423545849009",
-			expectedResult: 0,
-			err:            true,
+			expectedResult: 1.0023249,
 		},
 		{
 			name:           "basic-exp-too-big",
@@ -915,7 +922,7 @@ func TestDecoderFloat32(t *testing.T) {
 			if !testCase.skipResult {
 				assert.Equal(
 					t,
-					float64(testCase.expectedResult*1000000), math.Round(float64(v*1000000)),
+					math.Round(float64(testCase.expectedResult*1000000)), math.Round(float64(v*1000000)),
 					fmt.Sprintf("v must be equal to %f", testCase.expectedResult),
 				)
 			}
@@ -1153,9 +1160,7 @@ func TestDecoderFloat32Null(t *testing.T) {
 		{
 			name:           "basic-exp-too-big",
 			json:           "1.00232492420002423545849009",
-			expectedResult: 0,
-			err:            true,
-			resultIsNil:    true,
+			expectedResult: 1.0023249,
 		},
 		{
 			name:           "basic-exp-too-big",
@@ -1254,7 +1259,7 @@ func TestDecoderFloat32Null(t *testing.T) {
 			} else {
 				assert.Equal(
 					t,
-					float64(testCase.expectedResult*1000000), math.Round(float64(*v*1000000)),
+					math.Round(float64(testCase.expectedResult*1000000)), math.Round(float64(*v*1000000)),
 					fmt.Sprintf("v must be equal to %f", testCase.expectedResult),
 				)
 			}

	gojay high performance JSON encoder/decoder with stream API for Golang
	git clone git://git.lair.cx/gojay
	Log \| Files \| Refs \| README \| LICENSE

A	benchmarks/decoder/decoder_bench_float_test.go	\|	28	++++++++++++++++++++++++++++
M	decode_number.go	\|	3	++-
M	decode_number_float.go	\|	70	+++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
M	decode_number_float_test.go	\|	37	+++++++++++++++++++++----------------