optimise decoding big floats - gojay - high performance JSON encoder/decoder with stream API for Golang

commit d4326cd93065f8c428796971d42f39d3e7986aa4
parent 90d7e8221793880a06e933749224ae2810271e35
Author: francoispqt <francois@parquet.ninja>
Date:   Sun, 28 Oct 2018 02:06:48 +0800

optimise decoding big floats

Diffstat:
A benchmarks/decoder/decoder_bench_float_test.go  | 28 ++++++++++++++++++++++++++++
M decode_number_float.go  | 61 +++++++++++++++++++++++++++++++++++++++++++------------------
M decode_number_float_test.go  | 11 ++++-------

3 files changed, 75 insertions(+), 25 deletions(-)
diff --git a/benchmarks/decoder/decoder_bench_float_test.go b/benchmarks/decoder/decoder_bench_float_test.go
@@ -0,0 +1,28 @@
+package benchmarks
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/francoispqt/gojay"
+)
+
+var bigf = []byte(`0.00058273999999999999`)
+
+// BenchmarkBigFloatEncodingJSON decodes a big float with the standard package
+func BenchmarkBigFloatEncodingJSON(b *testing.B) {
+	b.ReportAllocs()
+	for n := 0; n < b.N; n++ {
+		var f float64
+		var _ = json.Unmarshal(bigf, &f)
+	}
+}
+
+// BenchmarkBigFloatGojay decodes a big float with gojay
+func BenchmarkBigFloatGojay(b *testing.B) {
+	b.ReportAllocs()
+	for n := 0; n < b.N; n++ {
+		var f float64
+		var _ = gojay.Unmarshal(bigf, &f)
+	}
+}
diff --git a/decode_number_float.go b/decode_number_float.go
@@ -120,29 +120,37 @@ func (dec *Decoder) getFloat() (float64, error) {
 			// then we get part after decimal as integer
 			start = j + 1
 			// get number after the decimal point
-			// multiple the before decimal point portion by 10 using bitwise
 			for i := j + 1; i < dec.length || dec.read(); i++ {
 				c := dec.data[i]
 				if isDigit(c) {
 					end = i
-					if v := (beforeDecimal << 3) + (beforeDecimal << 1); v >= beforeDecimal {
-						beforeDecimal = v
+					// multiply the before decimal point portion by 10 using bitwise
+					// make sure it doesn't overflow
+					if end-start < 18 {
+						beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1)
 					}
 					continue
 				} else if (c == 'e' || c == 'E') && j < i-1 {
-					afterDecimal := dec.atoi64(start, end)
-					dec.cursor = i + 1
+					// we have an exponent, convert first the value we got before the exponent
+					var afterDecimal int64
 					expI := end - start + 2
+					// if exp is too long, it means number is too long, just truncate the number
 					if expI >= len(pow10uint64) || expI < 0 {
-						return 0, dec.raiseInvalidJSONErr(dec.cursor)
+						expI = len(pow10uint64) - 2
+						afterDecimal = dec.atoi64(start, start+expI-2)
+					} else {
+						// then we add both integers
+						// then we divide the number by the power found
+						afterDecimal = dec.atoi64(start, end)
 					}
+					dec.cursor = i + 1
 					pow := pow10uint64[expI]
 					floatVal := float64(beforeDecimal+afterDecimal) / float64(pow)
 					exp, err := dec.getExponent()
 					if err != nil {
 						return 0, err
 					}
-					pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1 // abs
+					pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1 // absolute exponent
 					if pExp >= int64(len(pow10uint64)) || pExp < 0 {
 						return 0, dec.raiseInvalidJSONErr(dec.cursor)
 					}
@@ -160,9 +168,9 @@ func (dec *Decoder) getFloat() (float64, error) {
 			}
 			var afterDecimal int64
 			expI := end - start + 2
-			// if exp is too long, just cut the number
+			// if exp is too long, it means number is too long, just truncate the number
 			if expI >= len(pow10uint64) || expI < 0 {
-				expI = len(pow10uint64) - 2
+				expI = 19
 				afterDecimal = dec.atoi64(start, start+expI-2)
 			} else {
 				// then we add both integers
@@ -325,15 +333,27 @@ func (dec *Decoder) getFloat32() (float32, error) {
 				c := dec.data[i]
 				if isDigit(c) {
 					end = i
-					beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1)
+					// multiply the before decimal point portion by 10 using bitwise
+					// make sure it desn't overflow
+					if end-start < 9 {
+						beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1)
+					}
 					continue
 				} else if (c == 'e' || c == 'E') && j < i-1 {
-					afterDecimal := dec.atoi32(start, end)
-					dec.cursor = i + 1
+					// then we add both integers
+					// then we divide the number by the power found
+					var afterDecimal int32
 					expI := end - start + 2
-					if expI >= len(pow10uint64) || expI < 0 {
-						return 0, dec.raiseInvalidJSONErr(dec.cursor)
+					// if exp is too long, it means number is too long, just truncate the number
+					if expI >= 12 || expI < 0 {
+						expI = 10
+						afterDecimal = dec.atoi32(start, start+expI-2)
+					} else {
+						// then we add both integers
+						// then we divide the number by the power found
+						afterDecimal = dec.atoi32(start, end)
 					}
+					dec.cursor = i + 1
 					pow := pow10uint64[expI]
 					floatVal := float32(beforeDecimal+afterDecimal) / float32(pow)
 					exp, err := dec.getExponent()
@@ -358,10 +378,16 @@ func (dec *Decoder) getFloat32() (float32, error) {
 			}
 			// then we add both integers
 			// then we divide the number by the power found
-			afterDecimal := dec.atoi32(start, end)
+			var afterDecimal int32
 			expI := end - start + 2
-			if expI >= len(pow10uint64) || expI < 0 {
-				return 0, dec.raiseInvalidJSONErr(dec.cursor)
+			// if exp is too long, it means number is too long, just truncate the number
+			if expI >= 12 || expI < 0 {
+				expI = 10
+				afterDecimal = dec.atoi32(start, start+expI-2)
+			} else {
+				// then we add both integers
+				// then we divide the number by the power found
+				afterDecimal = dec.atoi32(start, end)
 			}
 			pow := pow10uint64[expI]
 			return float32(beforeDecimal+afterDecimal) / float32(pow), nil
@@ -375,7 +401,6 @@ func (dec *Decoder) getFloat32() (float32, error) {
 				return 0, err
 			}
 			pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1
-			// log.Print(exp, " after")
 			if pExp >= int64(len(pow10uint64)) || pExp < 0 {
 				return 0, dec.raiseInvalidJSONErr(dec.cursor)
 			}
diff --git a/decode_number_float_test.go b/decode_number_float_test.go
@@ -830,8 +830,7 @@ func TestDecoderFloat32(t *testing.T) {
 		{
 			name:           "basic-exp-too-big",
 			json:           "1.00232492420002423545849009",
-			expectedResult: 0,
-			err:            true,
+			expectedResult: 1.0023249,
 		},
 		{
 			name:           "basic-exp-too-big",
@@ -923,7 +922,7 @@ func TestDecoderFloat32(t *testing.T) {
 			if !testCase.skipResult {
 				assert.Equal(
 					t,
-					float64(testCase.expectedResult*1000000), math.Round(float64(v*1000000)),
+					math.Round(float64(testCase.expectedResult*1000000)), math.Round(float64(v*1000000)),
 					fmt.Sprintf("v must be equal to %f", testCase.expectedResult),
 				)
 			}
@@ -1161,9 +1160,7 @@ func TestDecoderFloat32Null(t *testing.T) {
 		{
 			name:           "basic-exp-too-big",
 			json:           "1.00232492420002423545849009",
-			expectedResult: 0,
-			err:            true,
-			resultIsNil:    true,
+			expectedResult: 1.0023249,
 		},
 		{
 			name:           "basic-exp-too-big",
@@ -1262,7 +1259,7 @@ func TestDecoderFloat32Null(t *testing.T) {
 			} else {
 				assert.Equal(
 					t,
-					float64(testCase.expectedResult*1000000), math.Round(float64(*v*1000000)),
+					math.Round(float64(testCase.expectedResult*1000000)), math.Round(float64(*v*1000000)),
 					fmt.Sprintf("v must be equal to %f", testCase.expectedResult),
 				)
 			}

	gojay high performance JSON encoder/decoder with stream API for Golang
	git clone git://git.lair.cx/gojay
	Log \| Files \| Refs \| README \| LICENSE

A	benchmarks/decoder/decoder_bench_float_test.go	\|	28	++++++++++++++++++++++++++++
M	decode_number_float.go	\|	61	+++++++++++++++++++++++++++++++++++++++++++------------------
M	decode_number_float_test.go	\|	11	++++-------