Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
enginetest/testdata/test1.txt binary
enginetest/testdata/test2.csv binary
enginetest/testdata/test3.csv binary
enginetest/testdata/test3backwards.csv binary
enginetest/testdata/test4.txt binary
enginetest/testdata/test5.txt binary
enginetest/testdata/test6.csv binary
enginetest/testdata/test7.txt binary
enginetest/testdata/test8.txt binary
enginetest/testdata/test9.txt binary
enginetest/testdata/test10.txt binary
enginetest/testdata/simple_json.txt binary
enginetest/testdata/test1.txt binary
enginetest/testdata/test2.csv binary
enginetest/testdata/test3.csv binary
enginetest/testdata/test3backwards.csv binary
enginetest/testdata/test4.txt binary
enginetest/testdata/test5.txt binary
enginetest/testdata/test6.csv binary
enginetest/testdata/test7.txt binary
enginetest/testdata/test8.txt binary
enginetest/testdata/test9.txt binary
enginetest/testdata/test10.txt binary
enginetest/testdata/simple_json.txt binary
enginetest/testdata/loaddata_9969.dat binary
enginetest/testdata/loaddata_escape.dat binary
enginetest/testdata/loaddata_enclosed.dat binary
enginetest/testdata/loaddata_single_quotes.dat binary
enginetest/testdata/loaddata_nulls.dat binary
enginetest/testdata/loaddata_mixed_escapes.dat binary
62 changes: 62 additions & 0 deletions enginetest/queries/load_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,68 @@ import (
)

var LoadDataScripts = []ScriptTest{
{
// https://github.com/dolthub/dolt/issues/9969
Name: "LOAD DATA with ENCLOSED BY and ESCAPED BY parsing",
SetUpScript: []string{
"create table t1(pk int primary key, c1 longtext)",
"LOAD DATA INFILE './testdata/loaddata_9969.dat' INTO TABLE t1 FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\"'",
"create table t2(pk int primary key, c1 longtext)",
"LOAD DATA INFILE './testdata/loaddata_escape.dat' INTO TABLE t2 FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\\\\'",
"create table t3(a varchar(20), b varchar(20))",
"LOAD DATA INFILE './testdata/loaddata_enclosed.dat' INTO TABLE t3 FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\"'",
"create table t4(a varchar(20), b varchar(20))",
"LOAD DATA INFILE './testdata/loaddata_mixed_escapes.dat' INTO TABLE t4 FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\\\\'",
"create table t5(a text, b text)",
"LOAD DATA INFILE './testdata/loaddata_single_quotes.dat' INTO TABLE t5 FIELDS TERMINATED BY ',' ENCLOSED BY ''''",
"create table t6(pk int, a varchar(20), b varchar(20))",
"LOAD DATA INFILE './testdata/loaddata_nulls.dat' INTO TABLE t6 FIELDS TERMINATED BY ','",
},
Assertions: []ScriptTestAssertion{
{
Query: "select * from t1",
Expected: []sql.Row{{1, "foo,bar"}},
},
{
Query: "select * from t2",
Expected: []sql.Row{{1, "foo,bar"}},
},
{
Query: "select * from t3 ORDER BY a",
Expected: []sql.Row{
{"a\"b", "cd\"ef"},
{"field1", "field2"},
{"foo,bar", "baz,qux"},
},
},
{
Query: "select * from t4",
Expected: []sql.Row{
{nil, "\x1A"},
{"a,b", "c,d"},
{"hello\nworld", "foo\tbar"},
},
},
{
Query: "select * from t5", // order by a breaks
Expected: []sql.Row{
{"Field A", "Field B"},
{"Field 1", "Field 2"},
{"Field 3", "Field 4"},
{"Field 5", "Field 6"},
},
},
{
Query: "select * from t6 ORDER BY pk",
Expected: []sql.Row{
{1, "hello", "world"},
{2, nil, "test"},
{3, "", "empty"},
{4, nil, nil},
},
},
},
},
{
Name: "LOAD DATA applies column defaults when \\N provided",
SetUpScript: []string{
Expand Down
1 change: 1 addition & 0 deletions enginetest/testdata/loaddata_9969.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"1","foo,bar"
3 changes: 3 additions & 0 deletions enginetest/testdata/loaddata_enclosed.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"field1","field2"
"a""b","cd""ef"
"foo,bar","baz,qux"
1 change: 1 addition & 0 deletions enginetest/testdata/loaddata_escape.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"1","foo\,bar"
3 changes: 3 additions & 0 deletions enginetest/testdata/loaddata_mixed_escapes.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"hello\nworld","foo\tbar"
"a\,b","c\,d"
"\N","\Z"
4 changes: 4 additions & 0 deletions enginetest/testdata/loaddata_nulls.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1,hello,world
2,\N,test
3,,empty
4,\N,\N
4 changes: 4 additions & 0 deletions enginetest/testdata/loaddata_single_quotes.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Field A,'Field B'
Field 1,'Field 2'
Field 3,'Field 4'
'Field 5','Field 6'
102 changes: 64 additions & 38 deletions sql/rowexec/ddl_iters.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,55 +131,81 @@ func (l *loadDataIter) parseLinePrefix(line string) string {
}

func (l *loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Expression, error) {
// Step 1. Start by Searching for prefix if there is one
// Start by searching for prefix if there is one
line = l.parseLinePrefix(line)
if line == "" {
return nil, nil
}

// Step 2: Split the lines into fields given the delim
fields := strings.Split(line, l.fieldsTerminatedBy)

// Step 3: Go through each field and see if it was enclosed by something
// TODO: Support the OPTIONALLY parameter.
// line is parsed character-by-character to respect enclosed fields that may contain the field terminator
var fields []string
var currentField strings.Builder
var encChar, escChar byte
if l.fieldsEnclosedBy != "" {
for i, field := range fields {
if field[0] == l.fieldsEnclosedBy[0] && field[len(field)-1] == l.fieldsEnclosedBy[0] {
fields[i] = field[1 : len(field)-1]
} else {
return nil, fmt.Errorf("error: field not properly enclosed")
}
}
encChar = l.fieldsEnclosedBy[0]
}

// Step 4: Handle the ESCAPED BY parameter.
if l.fieldsEscapedBy != "" {
for i, field := range fields {
if field == "\\N" {
fields[i] = "NULL"
} else if field == "\\Z" {
fields[i] = fmt.Sprintf("%c", 26) // ASCII 26
} else if field == "\\0" {
fields[i] = fmt.Sprintf("%c", 0) // ASCII 0
} else {
// The character immediately following the escaped character remains untouched, even if it is the same
// as the escape character
newField := make([]byte, 0, len(field))
for cIdx := 0; cIdx < len(field); cIdx++ {
c := field[cIdx]
// skip over escaped character, but always add the following character
if c == l.fieldsEscapedBy[0] {
cIdx += 1
if cIdx < len(field) {
newField = append(newField, c)
}
continue
}
newField = append(newField, c)
escChar = l.fieldsEscapedBy[0]
}
termLen := len(l.fieldsTerminatedBy)
inEnclosure := false

for i := 0; i < len(line); i++ {
ch := line[i]
if ch == encChar {
if inEnclosure {
// consume escaped char when encChar = escChar
if ch == escChar && i+1 < len(line) && line[i+1] == encChar {
currentField.WriteByte(encChar)
i++
continue
}
fields[i] = string(newField)
inEnclosure = false
continue
}
if currentField.Len() == 0 {
inEnclosure = true
continue
}
}

// we consumed the char above so we don't process when encChar = escChar
if escChar != encChar && ch == escChar && i+1 < len(line) {
i++
switch line[i] {
case 'N':
currentField.WriteString("NULL")
case 'Z':
currentField.WriteByte(26)
case '0':
currentField.WriteByte(0)
case 'n':
currentField.WriteByte('\n')
case 't':
currentField.WriteByte('\t')
case 'r':
currentField.WriteByte('\r')
case 'b':
currentField.WriteByte('\b')
default:
currentField.WriteByte(line[i])
}
continue
}

if !inEnclosure && i+termLen <= len(line) && line[i:i+termLen] == l.fieldsTerminatedBy {
fields = append(fields, currentField.String())
currentField.Reset()
i += termLen - 1
continue
}

currentField.WriteByte(ch)
}

fields = append(fields, currentField.String())
if !l.fieldsEnclosedByOpt && inEnclosure {
return nil, fmt.Errorf("error: unterminated enclosed field")
}

fieldRow := make(sql.Row, len(fields))
Expand Down