gojay

high performance JSON encoder/decoder with stream API for Golang
git clone git://git.lair.cx/gojay
Log | Files | Refs | README | LICENSE

commit ba96878038b97df0e7631a0d305060eceacede52
parent 688c5d008625b62011496858a3e55f852dccd40f
Author: Francois Parquet <francois.parquet@gmail.com>
Date:   Sun, 28 Oct 2018 02:08:36 +0800

Merge pull request #88 from francoispqt/fix/parsing-big-floats-truncate

make parsing of big floats possible by truncating them at high precision
Diffstat:
Abenchmarks/decoder/decoder_bench_float_test.go | 28++++++++++++++++++++++++++++
Mdecode_number.go | 3++-
Mdecode_number_float.go | 70+++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mdecode_number_float_test.go | 37+++++++++++++++++++++----------------
4 files changed, 102 insertions(+), 36 deletions(-)

diff --git a/benchmarks/decoder/decoder_bench_float_test.go b/benchmarks/decoder/decoder_bench_float_test.go @@ -0,0 +1,28 @@ +package benchmarks + +import ( + "encoding/json" + "testing" + + "github.com/francoispqt/gojay" +) + +var bigf = []byte(`0.00058273999999999999`) + +// BenchmarkBigFloatEncodingJSON decodes a big float with the standard package +func BenchmarkBigFloatEncodingJSON(b *testing.B) { + b.ReportAllocs() + for n := 0; n < b.N; n++ { + var f float64 + var _ = json.Unmarshal(bigf, &f) + } +} + +// BenchmarkBigFloatGojay decodes a big float with gojay +func BenchmarkBigFloatGojay(b *testing.B) { + b.ReportAllocs() + for n := 0; n < b.N; n++ { + var f float64 + var _ = gojay.Unmarshal(bigf, &f) + } +} diff --git a/decode_number.go b/decode_number.go @@ -24,7 +24,7 @@ const maxInt16Length = 5 const maxInt8Length = 3 const invalidNumber = int8(-1) -var pow10uint64 = [20]uint64{ +var pow10uint64 = [21]uint64{ 0, 1, 10, @@ -45,6 +45,7 @@ var pow10uint64 = [20]uint64{ 10000000000000000, 100000000000000000, 1000000000000000000, + 10000000000000000000, } var skipNumberEndCursorIncrement [256]int diff --git a/decode_number_float.go b/decode_number_float.go @@ -120,27 +120,37 @@ func (dec *Decoder) getFloat() (float64, error) { // then we get part after decimal as integer start = j + 1 // get number after the decimal point - // multiple the before decimal point portion by 10 using bitwise for i := j + 1; i < dec.length || dec.read(); i++ { c := dec.data[i] if isDigit(c) { end = i - beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1) + // multiply the before decimal point portion by 10 using bitwise + // make sure it doesn't overflow + if end-start < 18 { + beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1) + } continue } else if (c == 'e' || c == 'E') && j < i-1 { - afterDecimal := dec.atoi64(start, end) - dec.cursor = i + 1 + // we have an exponent, convert first the value we got before the exponent + var afterDecimal int64 expI := end - start + 2 + // if exp is too long, it means number is too long, just truncate the number if expI >= len(pow10uint64) || expI < 0 { - return 0, dec.raiseInvalidJSONErr(dec.cursor) + expI = len(pow10uint64) - 2 + afterDecimal = dec.atoi64(start, start+expI-2) + } else { + // then we add both integers + // then we divide the number by the power found + afterDecimal = dec.atoi64(start, end) } + dec.cursor = i + 1 pow := pow10uint64[expI] floatVal := float64(beforeDecimal+afterDecimal) / float64(pow) exp, err := dec.getExponent() if err != nil { return 0, err } - pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1 // abs + pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1 // absolute exponent if pExp >= int64(len(pow10uint64)) || pExp < 0 { return 0, dec.raiseInvalidJSONErr(dec.cursor) } @@ -156,13 +166,18 @@ func (dec *Decoder) getFloat() (float64, error) { if end >= dec.length || end < start { return 0, dec.raiseInvalidJSONErr(dec.cursor) } - // then we add both integers - // then we divide the number by the power found - afterDecimal := dec.atoi64(start, end) + var afterDecimal int64 expI := end - start + 2 + // if exp is too long, it means number is too long, just truncate the number if expI >= len(pow10uint64) || expI < 0 { - return 0, dec.raiseInvalidJSONErr(dec.cursor) + expI = 19 + afterDecimal = dec.atoi64(start, start+expI-2) + } else { + // then we add both integers + // then we divide the number by the power found + afterDecimal = dec.atoi64(start, end) } + pow := pow10uint64[expI] return float64(beforeDecimal+afterDecimal) / float64(pow), nil case 'e', 'E': @@ -318,15 +333,27 @@ func (dec *Decoder) getFloat32() (float32, error) { c := dec.data[i] if isDigit(c) { end = i - beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1) + // multiply the before decimal point portion by 10 using bitwise + // make sure it desn't overflow + if end-start < 9 { + beforeDecimal = (beforeDecimal << 3) + (beforeDecimal << 1) + } continue } else if (c == 'e' || c == 'E') && j < i-1 { - afterDecimal := dec.atoi32(start, end) - dec.cursor = i + 1 + // then we add both integers + // then we divide the number by the power found + var afterDecimal int32 expI := end - start + 2 - if expI >= len(pow10uint64) || expI < 0 { - return 0, dec.raiseInvalidJSONErr(dec.cursor) + // if exp is too long, it means number is too long, just truncate the number + if expI >= 12 || expI < 0 { + expI = 10 + afterDecimal = dec.atoi32(start, start+expI-2) + } else { + // then we add both integers + // then we divide the number by the power found + afterDecimal = dec.atoi32(start, end) } + dec.cursor = i + 1 pow := pow10uint64[expI] floatVal := float32(beforeDecimal+afterDecimal) / float32(pow) exp, err := dec.getExponent() @@ -351,10 +378,16 @@ func (dec *Decoder) getFloat32() (float32, error) { } // then we add both integers // then we divide the number by the power found - afterDecimal := dec.atoi32(start, end) + var afterDecimal int32 expI := end - start + 2 - if expI >= len(pow10uint64) || expI < 0 { - return 0, dec.raiseInvalidJSONErr(dec.cursor) + // if exp is too long, it means number is too long, just truncate the number + if expI >= 12 || expI < 0 { + expI = 10 + afterDecimal = dec.atoi32(start, start+expI-2) + } else { + // then we add both integers + // then we divide the number by the power found + afterDecimal = dec.atoi32(start, end) } pow := pow10uint64[expI] return float32(beforeDecimal+afterDecimal) / float32(pow), nil @@ -368,7 +401,6 @@ func (dec *Decoder) getFloat32() (float32, error) { return 0, err } pExp := (exp + (exp >> 31)) ^ (exp >> 31) + 1 - // log.Print(exp, " after") if pExp >= int64(len(pow10uint64)) || pExp < 0 { return 0, dec.raiseInvalidJSONErr(dec.cursor) } diff --git a/decode_number_float_test.go b/decode_number_float_test.go @@ -218,10 +218,14 @@ func TestDecoderFloat64(t *testing.T) { err: true, }, { - name: "basic-exp-too-big", + name: "big float", json: "1.00232492420002423545849009", - expectedResult: 0, - err: true, + expectedResult: 1.002325, + }, + { + name: "big float", + json: "5620.1400000000003", + expectedResult: 5620.14, }, { name: "basic-exp-too-big", @@ -262,6 +266,11 @@ func TestDecoderFloat64(t *testing.T) { err: true, errType: InvalidUnmarshalError(""), }, + { + name: "big float", + json: "5620.1400000000003", + expectedResult: 5620.1400000000003, + }, } for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { @@ -282,7 +291,7 @@ func TestDecoderFloat64(t *testing.T) { assert.Nil(t, err, "Err must be nil") } if !testCase.skipResult { - assert.Equal(t, testCase.expectedResult*1000000, math.Round(v*1000000), fmt.Sprintf("v must be equal to %f", testCase.expectedResult)) + assert.Equal(t, math.Round(testCase.expectedResult*1000000), math.Round(v*1000000), fmt.Sprintf("v must be equal to %f", testCase.expectedResult)) } }) } @@ -540,16 +549,15 @@ func TestDecoderFloat64Null(t *testing.T) { { name: "basic-exp-too-big", json: "0e9223372036000000000 ", - expectedResult: 0, + expectedResult: 1, err: true, resultIsNil: true, }, { name: "basic-exp-too-big", json: "1.00232492420002423545849009", - expectedResult: 0, - err: true, - resultIsNil: true, + expectedResult: 1.002325, + resultIsNil: false, }, { name: "basic-exp-too-big", @@ -618,7 +626,7 @@ func TestDecoderFloat64Null(t *testing.T) { if testCase.resultIsNil { assert.Nil(t, v) } else { - assert.Equal(t, testCase.expectedResult*1000000, math.Round(*v*1000000), fmt.Sprintf("v must be equal to %f", testCase.expectedResult)) + assert.Equal(t, math.Round(testCase.expectedResult*1000000), math.Round(*v*1000000), fmt.Sprintf("v must be equal to %f", testCase.expectedResult)) } }) } @@ -822,8 +830,7 @@ func TestDecoderFloat32(t *testing.T) { { name: "basic-exp-too-big", json: "1.00232492420002423545849009", - expectedResult: 0, - err: true, + expectedResult: 1.0023249, }, { name: "basic-exp-too-big", @@ -915,7 +922,7 @@ func TestDecoderFloat32(t *testing.T) { if !testCase.skipResult { assert.Equal( t, - float64(testCase.expectedResult*1000000), math.Round(float64(v*1000000)), + math.Round(float64(testCase.expectedResult*1000000)), math.Round(float64(v*1000000)), fmt.Sprintf("v must be equal to %f", testCase.expectedResult), ) } @@ -1153,9 +1160,7 @@ func TestDecoderFloat32Null(t *testing.T) { { name: "basic-exp-too-big", json: "1.00232492420002423545849009", - expectedResult: 0, - err: true, - resultIsNil: true, + expectedResult: 1.0023249, }, { name: "basic-exp-too-big", @@ -1254,7 +1259,7 @@ func TestDecoderFloat32Null(t *testing.T) { } else { assert.Equal( t, - float64(testCase.expectedResult*1000000), math.Round(float64(*v*1000000)), + math.Round(float64(testCase.expectedResult*1000000)), math.Round(float64(*v*1000000)), fmt.Sprintf("v must be equal to %f", testCase.expectedResult), ) }