@@ -131,54 +131,112 @@ func (l *loadDataIter) parseLinePrefix(line string) string {
131131}
132132
133133func (l * loadDataIter ) parseFields (ctx * sql.Context , line string ) ([]sql.Expression , error ) {
134- // Step 1. Start by Searching for prefix if there is one
134+ // Start by searching for prefix if there is one
135135 line = l .parseLinePrefix (line )
136136 if line == "" {
137137 return nil , nil
138138 }
139139
140- // Step 2: Split the lines into fields given the delim
141- fields := strings .Split (line , l .fieldsTerminatedBy )
140+ // Split the line into fields. When ENCLOSED BY is specified, fields must be parsed
141+ // character-by-character to respect quoted fields that may contain the field terminator.
142+ var fields []string
143+ if l .fieldsEnclosedBy == "" {
144+ fields = strings .Split (line , l .fieldsTerminatedBy )
145+ } else {
146+ var currentField strings.Builder
147+ inEnclosure := false
148+ encChar := l .fieldsEnclosedBy [0 ]
149+ escChar := byte (0 )
150+ if l .fieldsEscapedBy != "" {
151+ escChar = l .fieldsEscapedBy [0 ]
152+ }
153+ termLen := len (l .fieldsTerminatedBy )
154+
155+ for i := 0 ; i < len (line ); i ++ {
156+ c := line [i ]
157+
158+ // Handle enclosure character
159+ if c == encChar {
160+ if inEnclosure {
161+ // Check for doubled enclosure (escape mechanism when encChar == escChar)
162+ if i + 1 < len (line ) && line [i + 1 ] == encChar {
163+ currentField .WriteByte (encChar )
164+ i ++
165+ continue
166+ }
167+ inEnclosure = false
168+ continue
169+ }
170+ if currentField .Len () == 0 {
171+ inEnclosure = true
172+ continue
173+ }
174+ }
142175
143- // Step 3: Go through each field and see if it was enclosed by something
144- // TODO: Support the OPTIONALLY parameter.
145- if l .fieldsEnclosedBy != "" {
146- for i , field := range fields {
147- if field [0 ] == l .fieldsEnclosedBy [0 ] && field [len (field )- 1 ] == l .fieldsEnclosedBy [0 ] {
148- fields [i ] = field [1 : len (field )- 1 ]
149- } else {
150- return nil , fmt .Errorf ("error: field not properly enclosed" )
176+ // Handle escape character (only when different from enclosure character)
177+ if escChar != 0 && escChar != encChar && c == escChar && i + 1 < len (line ) {
178+ currentField .WriteByte (c )
179+ i ++
180+ currentField .WriteByte (line [i ])
181+ continue
182+ }
183+
184+ // Handle field terminator (only outside enclosures)
185+ if ! inEnclosure && i + termLen <= len (line ) && line [i :i + termLen ] == l .fieldsTerminatedBy {
186+ fields = append (fields , currentField .String ())
187+ currentField .Reset ()
188+ i += termLen - 1
189+ continue
151190 }
191+
192+ currentField .WriteByte (c )
193+ }
194+
195+ fields = append (fields , currentField .String ())
196+ if ! l .fieldsEnclosedByOpt && inEnclosure {
197+ return nil , fmt .Errorf ("error: unterminated enclosed field" )
152198 }
153199 }
154200
155- // Step 4: Handle the ESCAPED BY parameter.
156- if l .fieldsEscapedBy != "" {
201+ // Handle ESCAPED BY parameter for special sequences like \N, \Z, \0, \n, \t, etc.
202+ // When ESCAPED BY equals ENCLOSED BY, escaping was already handled via doubling.
203+ if l .fieldsEscapedBy != "" && l .fieldsEscapedBy != l .fieldsEnclosedBy {
204+ escByte := l .fieldsEscapedBy [0 ]
157205 for i , field := range fields {
158- if field == "\\ N" {
159- fields [i ] = "NULL"
160- } else if field == "\\ Z" {
161- fields [i ] = fmt .Sprintf ("%c" , 26 ) // ASCII 26
162- } else if field == "\\ 0" {
163- fields [i ] = fmt .Sprintf ("%c" , 0 ) // ASCII 0
164- } else {
165- // The character immediately following the escaped character remains untouched, even if it is the same
166- // as the escape character
167- newField := make ([]byte , 0 , len (field ))
168- for cIdx := 0 ; cIdx < len (field ); cIdx ++ {
169- c := field [cIdx ]
170- // skip over escaped character, but always add the following character
171- if c == l .fieldsEscapedBy [0 ] {
172- cIdx += 1
173- if cIdx < len (field ) {
174- newField = append (newField , c )
175- }
176- continue
177- }
178- newField = append (newField , c )
206+ if ! strings .ContainsRune (field , rune (escByte )) {
207+ continue
208+ }
209+
210+ newField := make ([]byte , 0 , len (field ))
211+ for j := 0 ; j < len (field ); j ++ {
212+ if field [j ] != escByte || j + 1 >= len (field ) {
213+ newField = append (newField , field [j ])
214+ continue
215+ }
216+
217+ j ++
218+ switch field [j ] {
219+ case 'N' :
220+ fields [i ] = "NULL"
221+ goto nextField
222+ case 'Z' :
223+ newField = append (newField , 26 )
224+ case '0' :
225+ newField = append (newField , 0 )
226+ case 'n' :
227+ newField = append (newField , '\n' )
228+ case 't' :
229+ newField = append (newField , '\t' )
230+ case 'r' :
231+ newField = append (newField , '\r' )
232+ case 'b' :
233+ newField = append (newField , '\b' )
234+ default :
235+ newField = append (newField , field [j ])
179236 }
180- fields [i ] = string (newField )
181237 }
238+ fields [i ] = string (newField )
239+ nextField:
182240 }
183241 }
184242
0 commit comments