Skip to content

Commit b9b32d3

Browse files
committed
amend to use ch-by-ch only
1 parent 44b3c00 commit b9b32d3

File tree

2 files changed

+56
-89
lines changed

2 files changed

+56
-89
lines changed

enginetest/queries/load_queries.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ var LoadDataScripts = []ScriptTest{
190190
},
191191
},
192192
},
193-
// https://github.com/dolthub/dolt/issues/9969
194193
{
195194
Name: "Load JSON data. EnclosedBy and EscapedBy are the same.",
196195
SetUpScript: []string{

sql/rowexec/ddl_iters.go

Lines changed: 56 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -137,107 +137,75 @@ func (l *loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Express
137137
return nil, nil
138138
}
139139

140-
// Split the line into fields. When ENCLOSED BY is specified, fields must be parsed
141-
// character-by-character to respect quoted fields that may contain the field terminator.
140+
// line is parsed character-by-character to respect enclosed fields that may contain the field terminator
142141
var fields []string
143-
if l.fieldsEnclosedBy == "" {
144-
fields = strings.Split(line, l.fieldsTerminatedBy)
145-
} else {
146-
var currentField strings.Builder
147-
inEnclosure := false
148-
encChar := l.fieldsEnclosedBy[0]
149-
escChar := byte(0)
150-
if l.fieldsEscapedBy != "" {
151-
escChar = l.fieldsEscapedBy[0]
152-
}
153-
termLen := len(l.fieldsTerminatedBy)
154-
155-
for i := 0; i < len(line); i++ {
156-
c := line[i]
157-
158-
// Handle enclosure character
159-
if c == encChar {
160-
if inEnclosure {
161-
// Check for doubled enclosure (escape mechanism when encChar == escChar)
162-
if i+1 < len(line) && line[i+1] == encChar {
163-
currentField.WriteByte(encChar)
164-
i++
165-
continue
166-
}
167-
inEnclosure = false
168-
continue
169-
}
170-
if currentField.Len() == 0 {
171-
inEnclosure = true
142+
var currentField strings.Builder
143+
var encChar, escChar byte
144+
if l.fieldsEnclosedBy != "" {
145+
encChar = l.fieldsEnclosedBy[0]
146+
}
147+
if l.fieldsEscapedBy != "" {
148+
escChar = l.fieldsEscapedBy[0]
149+
}
150+
termLen := len(l.fieldsTerminatedBy)
151+
inEnclosure := false
152+
153+
for i := 0; i < len(line); i++ {
154+
ch := line[i]
155+
if ch == encChar {
156+
if inEnclosure {
157+
// consume escaped char when encChar = escChar
158+
if ch == escChar && i+1 < len(line) && line[i+1] == encChar {
159+
currentField.WriteByte(encChar)
160+
i++
172161
continue
173162
}
174-
}
175-
176-
// Handle escape character (only when different from enclosure character)
177-
if escChar != 0 && escChar != encChar && c == escChar && i+1 < len(line) {
178-
currentField.WriteByte(c)
179-
i++
180-
currentField.WriteByte(line[i])
163+
inEnclosure = false
181164
continue
182165
}
183-
184-
// Handle field terminator (only outside enclosures)
185-
if !inEnclosure && i+termLen <= len(line) && line[i:i+termLen] == l.fieldsTerminatedBy {
186-
fields = append(fields, currentField.String())
187-
currentField.Reset()
188-
i += termLen - 1
166+
if currentField.Len() == 0 {
167+
inEnclosure = true
189168
continue
190169
}
191-
192-
currentField.WriteByte(c)
193170
}
194171

195-
fields = append(fields, currentField.String())
196-
if !l.fieldsEnclosedByOpt && inEnclosure {
197-
return nil, fmt.Errorf("error: unterminated enclosed field")
172+
// we consumed the char above so we don't process when encChar = escChar
173+
if escChar != encChar && ch == escChar && i+1 < len(line) {
174+
i++
175+
switch line[i] {
176+
case 'N':
177+
currentField.WriteString("NULL")
178+
case 'Z':
179+
currentField.WriteByte(26)
180+
case '0':
181+
currentField.WriteByte(0)
182+
case 'n':
183+
currentField.WriteByte('\n')
184+
case 't':
185+
currentField.WriteByte('\t')
186+
case 'r':
187+
currentField.WriteByte('\r')
188+
case 'b':
189+
currentField.WriteByte('\b')
190+
default:
191+
currentField.WriteByte(line[i])
192+
}
193+
continue
198194
}
199-
}
200195

201-
// Handle ESCAPED BY parameter for special sequences like \N, \Z, \0, \n, \t, etc.
202-
// When ESCAPED BY equals ENCLOSED BY, escaping was already handled via doubling.
203-
if l.fieldsEscapedBy != "" && l.fieldsEscapedBy != l.fieldsEnclosedBy {
204-
escByte := l.fieldsEscapedBy[0]
205-
for i, field := range fields {
206-
if !strings.ContainsRune(field, rune(escByte)) {
207-
continue
208-
}
196+
if !inEnclosure && i+termLen <= len(line) && line[i:i+termLen] == l.fieldsTerminatedBy {
197+
fields = append(fields, currentField.String())
198+
currentField.Reset()
199+
i += termLen - 1
200+
continue
201+
}
209202

210-
newField := make([]byte, 0, len(field))
211-
for j := 0; j < len(field); j++ {
212-
if field[j] != escByte || j+1 >= len(field) {
213-
newField = append(newField, field[j])
214-
continue
215-
}
203+
currentField.WriteByte(ch)
204+
}
216205

217-
j++
218-
switch field[j] {
219-
case 'N':
220-
fields[i] = "NULL"
221-
goto nextField
222-
case 'Z':
223-
newField = append(newField, 26)
224-
case '0':
225-
newField = append(newField, 0)
226-
case 'n':
227-
newField = append(newField, '\n')
228-
case 't':
229-
newField = append(newField, '\t')
230-
case 'r':
231-
newField = append(newField, '\r')
232-
case 'b':
233-
newField = append(newField, '\b')
234-
default:
235-
newField = append(newField, field[j])
236-
}
237-
}
238-
fields[i] = string(newField)
239-
nextField:
240-
}
206+
fields = append(fields, currentField.String())
207+
if !l.fieldsEnclosedByOpt && inEnclosure {
208+
return nil, fmt.Errorf("error: unterminated enclosed field")
241209
}
242210

243211
fieldRow := make(sql.Row, len(fields))

0 commit comments

Comments
 (0)