diff --git a/.gitattributes b/.gitattributes index 6515bfa04b..6168d3d095 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8,4 +8,5 @@ enginetest/testdata/test6.csv binary enginetest/testdata/test7.txt binary enginetest/testdata/test8.txt binary enginetest/testdata/test9.txt binary -enginetest/testdata/test10.txt binary \ No newline at end of file +enginetest/testdata/test10.txt binary +enginetest/testdata/simple_json.txt binary \ No newline at end of file diff --git a/enginetest/queries/load_queries.go b/enginetest/queries/load_queries.go index 00f6cef701..7006f25b8c 100644 --- a/enginetest/queries/load_queries.go +++ b/enginetest/queries/load_queries.go @@ -18,6 +18,8 @@ import ( "fmt" "time" + "github.com/dolthub/go-mysql-server/sql/types" + "github.com/dolthub/go-mysql-server/sql" ) @@ -126,6 +128,21 @@ var LoadDataScripts = []ScriptTest{ }, }, }, + { + Name: "Load JSON data. EnclosedBy and EscapedBy are the same.", + SetUpScript: []string{ + "create table loadtable(pk int primary key, j json)", + "LOAD DATA INFILE './testdata/simple_json.txt' INTO TABLE loadtable FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\"';", + }, + Assertions: []ScriptTestAssertion{ + { + Query: "select * from loadtable", + Expected: []sql.Row{ + {1, types.MustJSON(`{"foo": "bar"}`)}, + }, + }, + }, + }, { Name: "LOAD DATA handles Windows line-endings and a subset of columns that are not in order", SetUpScript: []string{ diff --git a/enginetest/testdata/simple_json.txt b/enginetest/testdata/simple_json.txt new file mode 100644 index 0000000000..e0290f3598 --- /dev/null +++ b/enginetest/testdata/simple_json.txt @@ -0,0 +1 @@ +"1","{""foo"":""bar""}" \ No newline at end of file diff --git a/sql/rowexec/ddl_iters.go b/sql/rowexec/ddl_iters.go index 8d638f79da..814c50530d 100644 --- a/sql/rowexec/ddl_iters.go +++ b/sql/rowexec/ddl_iters.go @@ -144,7 +144,7 @@ func (l *loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Express // TODO: Support the OPTIONALLY parameter. if l.fieldsEnclosedBy != "" { for i, field := range fields { - if string(field[0]) == l.fieldsEnclosedBy && string(field[len(field)-1]) == l.fieldsEnclosedBy { + if field[0] == l.fieldsEnclosedBy[0] && field[len(field)-1] == l.fieldsEnclosedBy[0] { fields[i] = field[1 : len(field)-1] } else { return nil, fmt.Errorf("error: field not properly enclosed") @@ -162,7 +162,22 @@ func (l *loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Express } else if field == "\\0" { fields[i] = fmt.Sprintf("%c", 0) // ASCII 0 } else { - fields[i] = strings.ReplaceAll(field, l.fieldsEscapedBy, "") + // The character immediately following the escaped character remains untouched, even if it is the same + // as the escape character + newField := make([]byte, 0, len(field)) + for cIdx := 0; cIdx < len(field); cIdx++ { + c := field[cIdx] + // skip over escaped character, but always add the following character + if c == l.fieldsEscapedBy[0] { + cIdx += 1 + if cIdx < len(field) { + newField = append(newField, c) + } + continue + } + newField = append(newField, c) + } + fields[i] = string(newField) } } }