@@ -137,107 +137,75 @@ func (l *loadDataIter) parseFields(ctx *sql.Context, line string) ([]sql.Express
137137 return nil , nil
138138 }
139139
140- // Split the line into fields. When ENCLOSED BY is specified, fields must be parsed
141- // character-by-character to respect quoted fields that may contain the field terminator.
140+ // line is parsed character-by-character to respect enclosed fields that may contain the field terminator
142141 var fields []string
143- if l .fieldsEnclosedBy == "" {
144- fields = strings .Split (line , l .fieldsTerminatedBy )
145- } else {
146- var currentField strings.Builder
147- inEnclosure := false
148- encChar := l .fieldsEnclosedBy [0 ]
149- escChar := byte (0 )
150- if l .fieldsEscapedBy != "" {
151- escChar = l .fieldsEscapedBy [0 ]
152- }
153- termLen := len (l .fieldsTerminatedBy )
154-
155- for i := 0 ; i < len (line ); i ++ {
156- c := line [i ]
157-
158- // Handle enclosure character
159- if c == encChar {
160- if inEnclosure {
161- // Check for doubled enclosure (escape mechanism when encChar == escChar)
162- if i + 1 < len (line ) && line [i + 1 ] == encChar {
163- currentField .WriteByte (encChar )
164- i ++
165- continue
166- }
167- inEnclosure = false
168- continue
169- }
170- if currentField .Len () == 0 {
171- inEnclosure = true
142+ var currentField strings.Builder
143+ var encChar , escChar byte
144+ if l .fieldsEnclosedBy != "" {
145+ encChar = l .fieldsEnclosedBy [0 ]
146+ }
147+ if l .fieldsEscapedBy != "" {
148+ escChar = l .fieldsEscapedBy [0 ]
149+ }
150+ termLen := len (l .fieldsTerminatedBy )
151+ inEnclosure := false
152+
153+ for i := 0 ; i < len (line ); i ++ {
154+ ch := line [i ]
155+ if ch == encChar {
156+ if inEnclosure {
157+ // consume escaped char when encChar = escChar
158+ if ch == escChar && i + 1 < len (line ) && line [i + 1 ] == encChar {
159+ currentField .WriteByte (encChar )
160+ i ++
172161 continue
173162 }
174- }
175-
176- // Handle escape character (only when different from enclosure character)
177- if escChar != 0 && escChar != encChar && c == escChar && i + 1 < len (line ) {
178- currentField .WriteByte (c )
179- i ++
180- currentField .WriteByte (line [i ])
163+ inEnclosure = false
181164 continue
182165 }
183-
184- // Handle field terminator (only outside enclosures)
185- if ! inEnclosure && i + termLen <= len (line ) && line [i :i + termLen ] == l .fieldsTerminatedBy {
186- fields = append (fields , currentField .String ())
187- currentField .Reset ()
188- i += termLen - 1
166+ if currentField .Len () == 0 {
167+ inEnclosure = true
189168 continue
190169 }
191-
192- currentField .WriteByte (c )
193170 }
194171
195- fields = append (fields , currentField .String ())
196- if ! l .fieldsEnclosedByOpt && inEnclosure {
197- return nil , fmt .Errorf ("error: unterminated enclosed field" )
172+ // we consumed the char above so we don't process when encChar = escChar
173+ if escChar != encChar && ch == escChar && i + 1 < len (line ) {
174+ i ++
175+ switch line [i ] {
176+ case 'N' :
177+ currentField .WriteString ("NULL" )
178+ case 'Z' :
179+ currentField .WriteByte (26 )
180+ case '0' :
181+ currentField .WriteByte (0 )
182+ case 'n' :
183+ currentField .WriteByte ('\n' )
184+ case 't' :
185+ currentField .WriteByte ('\t' )
186+ case 'r' :
187+ currentField .WriteByte ('\r' )
188+ case 'b' :
189+ currentField .WriteByte ('\b' )
190+ default :
191+ currentField .WriteByte (line [i ])
192+ }
193+ continue
198194 }
199- }
200195
201- // Handle ESCAPED BY parameter for special sequences like \N, \Z, \0, \n, \t, etc.
202- // When ESCAPED BY equals ENCLOSED BY, escaping was already handled via doubling.
203- if l .fieldsEscapedBy != "" && l .fieldsEscapedBy != l .fieldsEnclosedBy {
204- escByte := l .fieldsEscapedBy [0 ]
205- for i , field := range fields {
206- if ! strings .ContainsRune (field , rune (escByte )) {
207- continue
208- }
196+ if ! inEnclosure && i + termLen <= len (line ) && line [i :i + termLen ] == l .fieldsTerminatedBy {
197+ fields = append (fields , currentField .String ())
198+ currentField .Reset ()
199+ i += termLen - 1
200+ continue
201+ }
209202
210- newField := make ([]byte , 0 , len (field ))
211- for j := 0 ; j < len (field ); j ++ {
212- if field [j ] != escByte || j + 1 >= len (field ) {
213- newField = append (newField , field [j ])
214- continue
215- }
203+ currentField .WriteByte (ch )
204+ }
216205
217- j ++
218- switch field [j ] {
219- case 'N' :
220- fields [i ] = "NULL"
221- goto nextField
222- case 'Z' :
223- newField = append (newField , 26 )
224- case '0' :
225- newField = append (newField , 0 )
226- case 'n' :
227- newField = append (newField , '\n' )
228- case 't' :
229- newField = append (newField , '\t' )
230- case 'r' :
231- newField = append (newField , '\r' )
232- case 'b' :
233- newField = append (newField , '\b' )
234- default :
235- newField = append (newField , field [j ])
236- }
237- }
238- fields [i ] = string (newField )
239- nextField:
240- }
206+ fields = append (fields , currentField .String ())
207+ if ! l .fieldsEnclosedByOpt && inEnclosure {
208+ return nil , fmt .Errorf ("error: unterminated enclosed field" )
241209 }
242210
243211 fieldRow := make (sql.Row , len (fields ))
0 commit comments