Skip to content

Commit c1e36e7

Browse files
authored
fix load data when escaped and enclosed are the same (#3238)
1 parent 30064af commit c1e36e7

File tree

4 files changed

+37
-3
lines changed

4 files changed

+37
-3
lines changed

.gitattributes

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ enginetest/testdata/test6.csv binary
88
enginetest/testdata/test7.txt binary
99
enginetest/testdata/test8.txt binary
1010
enginetest/testdata/test9.txt binary
11-
enginetest/testdata/test10.txt binary
11+
enginetest/testdata/test10.txt binary
12+
enginetest/testdata/simple_json.txt binary

enginetest/queries/load_queries.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ import (
1818
"fmt"
1919
"time"
2020

21+
"github.com/dolthub/go-mysql-server/sql/types"
22+
2123
"github.com/dolthub/go-mysql-server/sql"
2224
)
2325

@@ -126,6 +128,21 @@ var LoadDataScripts = []ScriptTest{
126128
},
127129
},
128130
},
131+
{
132+
Name: "Load JSON data. EnclosedBy and EscapedBy are the same.",
133+
SetUpScript: []string{
134+
"create table loadtable(pk int primary key, j json)",
135+
"LOAD DATA INFILE './testdata/simple_json.txt' INTO TABLE loadtable FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\"';",
136+
},
137+
Assertions: []ScriptTestAssertion{
138+
{
139+
Query: "select * from loadtable",
140+
Expected: []sql.Row{
141+
{1, types.MustJSON(`{"foo": "bar"}`)},
142+
},
143+
},
144+
},
145+
},
129146
{
130147
Name: "LOAD DATA handles Windows line-endings and a subset of columns that are not in order",
131148
SetUpScript: []string{
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"1","{""foo"":""bar""}"

sql/rowexec/ddl_iters.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ func (l *loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Express
144144
// TODO: Support the OPTIONALLY parameter.
145145
if l.fieldsEnclosedBy != "" {
146146
for i, field := range fields {
147-
if string(field[0]) == l.fieldsEnclosedBy && string(field[len(field)-1]) == l.fieldsEnclosedBy {
147+
if field[0] == l.fieldsEnclosedBy[0] && field[len(field)-1] == l.fieldsEnclosedBy[0] {
148148
fields[i] = field[1 : len(field)-1]
149149
} else {
150150
return nil, fmt.Errorf("error: field not properly enclosed")
@@ -162,7 +162,22 @@ func (l *loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Express
162162
} else if field == "\\0" {
163163
fields[i] = fmt.Sprintf("%c", 0) // ASCII 0
164164
} else {
165-
fields[i] = strings.ReplaceAll(field, l.fieldsEscapedBy, "")
165+
// The character immediately following the escaped character remains untouched, even if it is the same
166+
// as the escape character
167+
newField := make([]byte, 0, len(field))
168+
for cIdx := 0; cIdx < len(field); cIdx++ {
169+
c := field[cIdx]
170+
// skip over escaped character, but always add the following character
171+
if c == l.fieldsEscapedBy[0] {
172+
cIdx += 1
173+
if cIdx < len(field) {
174+
newField = append(newField, c)
175+
}
176+
continue
177+
}
178+
newField = append(newField, c)
179+
}
180+
fields[i] = string(newField)
166181
}
167182
}
168183
}

0 commit comments

Comments
 (0)