gojay

high performance JSON encoder/decoder with stream API for Golang
git clone git://git.lair.cx/gojay
Log | Files | Refs | README | LICENSE

commit aa9e309f42c7a65c4cc06c13c67f42e8f9fabe2a
parent f4c5a97b2ce16ce14e1d30875b7cecf1f1f4f7a8
Author: Lorenzo Stoakes <lstoakes@gmail.com>
Date:   Sun,  5 Aug 2018 13:16:01 +0100

Handle string escaping correctly

The previous implementation failed on a number of basic tests
e.g. "\nx", seeming to treat "\\n" as "\n". It additionally seemed
rather over-complicated. This patch tries for a simpler
implementation.

This patch additionally adds the `\/` escape chord which is supported
by RFC4627 (and the "encoding/json" also.)

Diffstat:
Mdecode_array_test.go | 4++--
Mdecode_object_test.go | 18+++++++++---------
Mdecode_string.go | 165++++++++++++++++++++++---------------------------------------------------------
Mdecode_string_test.go | 42+++++++++++++++++++++---------------------
4 files changed, 78 insertions(+), 151 deletions(-)

diff --git a/decode_array_test.go b/decode_array_test.go @@ -112,12 +112,12 @@ func TestSliceStrings(t *testing.T) { }, { name: "basic-test", - json: `["hello world", "hey" , "foo","bar \\n escape"]`, + json: `["hello world", "hey" , "foo","bar \n escape"]`, expectedResult: testSliceStrings{"hello world", "hey", "foo", "bar \n escape"}, }, { name: "basic-test", - json: `["hello world", "hey" , null,"bar \\n escape"]`, + json: `["hello world", "hey" , null,"bar \n escape"]`, expectedResult: testSliceStrings{"hello world", "hey", "", "bar \n escape"}, }, { diff --git a/decode_object_test.go b/decode_object_test.go @@ -877,14 +877,14 @@ func TestDecodeObjectComplex(t *testing.T) { json: `{ "testSubObject": { "testStr": "some string", - "testInt":124465, - "testUint16":120, - "testUint8":15, - "testInt16":-135, + "testInt":124465, + "testUint16":120, + "testUint8":15, + "testInt16":-135, "testInt8":-23 }, "testSubSliceInts": [1,2,3,4,5], - "testStr": "some \\n string" + "testStr": "some \n string" }`, expectedResult: testObjectComplex{ testSubObject: &testObject{ @@ -902,7 +902,7 @@ func TestDecodeObjectComplex(t *testing.T) { }, { name: "complex-json-err", - json: `{"testSubObject":{"testStr":"some string,"testInt":124465,"testUint16":120, "testUint8":15,"testInt16":-135,"testInt8":-23},"testSubSliceInts":[1,2],"testStr":"some \\n string"}`, + json: `{"testSubObject":{"testStr":"some string,"testInt":124465,"testUint16":120, "testUint8":15,"testInt16":-135,"testInt8":-23},"testSubSliceInts":[1,2],"testStr":"some \n string"}`, expectedResult: testObjectComplex{ testSubObject: &testObject{}, }, @@ -1012,9 +1012,9 @@ func TestDecodeObjectNull(t *testing.T) { var jsonComplex = []byte(`{ "test": "{\"test\":\"1\",\"test1\":2}", - "test2\\n": "\\\\\\\\\\n", + "test2\n": "\\\\\\\\\n", "testArrSkip": ["testString with escaped \\\" quotes"], - "testSkipString": "skip \\ string with \\n escaped char \" ", + "testSkipString": "skip \\ string with \n escaped char \" ", "testSkipObject": { "testSkipSubObj": { "test": "test" @@ -1028,7 +1028,7 @@ var jsonComplex = []byte(`{ "testSkipBoolNull": null, "testSub": { "test": "{\"test\":\"1\",\"test1\":2}", - "test2\\n": "[1,2,3]", + "test2\n": "[1,2,3]", "test3": 1, "testObjSkip": { "test": "test string with escaped \" quotes" diff --git a/decode_string.go b/decode_string.go @@ -52,127 +52,54 @@ func (dec *Decoder) decodeString(v *string) error { } func (dec *Decoder) parseEscapedString() error { - // know where to stop slash - start := dec.cursor - for ; dec.cursor < dec.length || dec.read(); dec.cursor++ { - if dec.data[dec.cursor] != '\\' { - d := dec.data[dec.cursor] - dec.cursor = dec.cursor + 1 - nSlash := dec.cursor - start - switch d { - case '"': - // nSlash must be odd - if nSlash&1 != 1 { - return dec.raiseInvalidJSONErr(dec.cursor) - } - diff := (nSlash - 1) >> 1 - dec.data = append(dec.data[:start+diff-1], dec.data[dec.cursor-1:]...) - dec.length = len(dec.data) - dec.cursor -= nSlash - diff - return nil - case 'u': - if nSlash&1 == 0 { - diff := nSlash >> 1 - dec.data = append(dec.data[:start+diff-1], dec.data[dec.cursor-1:]...) - dec.length = len(dec.data) - dec.cursor -= nSlash - diff - return nil - } - start := dec.cursor - 2 - ((nSlash - 1) >> 1) - str, err := dec.parseUnicode() - if err != nil { - dec.err = err - return err - } - diff := dec.cursor - start - dec.data = append(append(dec.data[:start], str...), dec.data[dec.cursor:]...) - dec.length = len(dec.data) - dec.cursor = dec.cursor - diff + len(str) - return nil - case 'b': - // number of slash must be even - // if is odd number of slashes - // divide nSlash - 1 by 2 and leave last one - // else divide nSlash by 2 and leave the letter - if nSlash&1 != 0 { - return dec.raiseInvalidJSONErr(dec.cursor) - } - var diff int - diff = nSlash >> 1 - dec.data = append(append(dec.data[:start+diff-2], '\b'), dec.data[dec.cursor:]...) - dec.length = len(dec.data) - dec.cursor -= nSlash - diff + 1 - return nil - case 'f': - // number of slash must be even - // if is odd number of slashes - // divide nSlash - 1 by 2 and leave last one - // else divide nSlash by 2 and leave the letter - if nSlash&1 != 0 { - return dec.raiseInvalidJSONErr(dec.cursor) - } - var diff int - diff = nSlash >> 1 - dec.data = append(append(dec.data[:start+diff-2], '\f'), dec.data[dec.cursor:]...) - dec.length = len(dec.data) - dec.cursor -= nSlash - diff + 1 - return nil - case 'n': - // number of slash must be even - // if is odd number of slashes - // divide nSlash - 1 by 2 and leave last one - // else divide nSlash by 2 and leave the letter - if nSlash&1 != 0 { - return dec.raiseInvalidJSONErr(dec.cursor) - } - var diff int - diff = nSlash >> 1 - dec.data = append(append(dec.data[:start+diff-2], '\n'), dec.data[dec.cursor:]...) - dec.length = len(dec.data) - dec.cursor -= nSlash - diff + 1 - return nil - case 'r': - // number of slash must be even - // if is odd number of slashes - // divide nSlash - 1 by 2 and leave last one - // else divide nSlash by 2 and leave the letter - if nSlash&1 != 0 { - return dec.raiseInvalidJSONErr(dec.cursor) - } - var diff int - diff = nSlash >> 1 - dec.data = append(append(dec.data[:start+diff-2], '\r'), dec.data[dec.cursor:]...) - dec.length = len(dec.data) - dec.cursor -= nSlash - diff + 1 - return nil - case 't': - // number of slash must be even - // if is odd number of slashes - // divide nSlash - 1 by 2 and leave last one - // else divide nSlash by 2 and leave the letter - if nSlash&1 != 0 { - return dec.raiseInvalidJSONErr(dec.cursor) - } - var diff int - diff = nSlash >> 1 - dec.data = append(append(dec.data[:start+diff-2], '\t'), dec.data[dec.cursor:]...) - dec.length = len(dec.data) - dec.cursor -= nSlash - diff + 1 - return nil - default: - // nSlash must be even - if nSlash&1 == 1 { - return dec.raiseInvalidJSONErr(dec.cursor) - } - diff := nSlash >> 1 - dec.data = append(dec.data[:start+diff-1], dec.data[dec.cursor-1:]...) - dec.length = len(dec.data) - dec.cursor -= (nSlash - diff) - return nil - } + if dec.cursor >= dec.length && !dec.read() { + return dec.raiseInvalidJSONErr(dec.cursor) + } + + switch dec.data[dec.cursor] { + case '"': + dec.data[dec.cursor] = '"' + case '\\': + dec.data[dec.cursor] = '\\' + case '/': + dec.data[dec.cursor] = '/' + case 'b': + dec.data[dec.cursor] = '\b' + case 'f': + dec.data[dec.cursor] = '\f' + case 'n': + dec.data[dec.cursor] = '\n' + case 'r': + dec.data[dec.cursor] = '\r' + case 't': + dec.data[dec.cursor] = '\t' + case 'u': + start := dec.cursor + dec.cursor++ + str, err := dec.parseUnicode() + if err != nil { + return err } + + diff := dec.cursor - start + dec.data = append(append(dec.data[:start-1], str...), dec.data[dec.cursor:]...) + dec.length = len(dec.data) + dec.cursor += len(str) - diff - 1 + + return nil + default: + return dec.raiseInvalidJSONErr(dec.cursor) } - return dec.raiseInvalidJSONErr(dec.cursor) + + // Truncate the previous backslash character, and the + dec.data = append(dec.data[:dec.cursor-1], dec.data[dec.cursor:]...) + dec.length-- + + // Since we've lost a character, our dec.cursor offset is now + // 1 past the escaped character which is precisely where we + // want it. + + return nil } func (dec *Decoder) getString() (int, int, error) { diff --git a/decode_string_test.go b/decode_string_test.go @@ -44,56 +44,56 @@ func TestDecoderString(t *testing.T) { { name: "escape-control-char", json: `"\n"`, - expectedResult: "", - err: true, + expectedResult: "\n", + err: false, }, { name: "escape-control-char", json: `"\\n"`, - expectedResult: "\n", + expectedResult: `\n`, err: false, }, { name: "escape-control-char", json: `"\t"`, - expectedResult: "", - err: true, + expectedResult: "\t", + err: false, }, { name: "escape-control-char", json: `"\\t"`, - expectedResult: "\t", + expectedResult: `\t`, err: false, }, { name: "escape-control-char", json: `"\b"`, - expectedResult: "", - err: true, + expectedResult: "\b", + err: false, }, { name: "escape-control-char", json: `"\\b"`, - expectedResult: "\b", + expectedResult: `\b`, err: false, }, { name: "escape-control-char", json: `"\f"`, - expectedResult: "", - err: true, + expectedResult: "\f", + err: false, }, { name: "escape-control-char", json: `"\\f"`, - expectedResult: "\f", + expectedResult: `\f`, err: false, }, { name: "escape-control-char", json: `"\r"`, - expectedResult: "", - err: true, + expectedResult: "\r", + err: false, }, { name: "escape-control-char", @@ -104,7 +104,7 @@ func TestDecoderString(t *testing.T) { { name: "escape-control-char", json: `"\\r"`, - expectedResult: "\r", + expectedResult: `\r`, err: false, }, { @@ -228,31 +228,31 @@ func TestDecoderString(t *testing.T) { }, { name: "escape quote err2", - json: `"test string \\t escaped"`, + json: `"test string \t escaped"`, expectedResult: "test string \t escaped", err: false, }, { name: "escape quote err2", - json: `"test string \\r escaped"`, + json: `"test string \r escaped"`, expectedResult: "test string \r escaped", err: false, }, { name: "escape quote err2", - json: `"test string \\b escaped"`, + json: `"test string \b escaped"`, expectedResult: "test string \b escaped", err: false, }, { name: "escape quote err", - json: `"test string \\n escaped"`, + json: `"test string \n escaped"`, expectedResult: "test string \n escaped", err: false, }, { name: "escape quote err", - json: `"test string \\" escaped"`, + json: `"test string \\\" escaped`, expectedResult: ``, err: true, errType: InvalidJSONError(""), @@ -273,7 +273,7 @@ func TestDecoderString(t *testing.T) { }, { name: "string-complex", - json: ` "string with spaces and \"escape\"d \"quotes\" and escaped line returns \\n and escaped \\\\ escaped char"`, + json: ` "string with spaces and \"escape\"d \"quotes\" and escaped line returns \n and escaped \\\\ escaped char"`, expectedResult: "string with spaces and \"escape\"d \"quotes\" and escaped line returns \n and escaped \\\\ escaped char", }, }