@@ -72,8 +72,15 @@ impl ImportFormat {
7272 {
7373 match self {
7474 ImportFormat :: CSV | ImportFormat :: CSVNoHeader | ImportFormat :: CSVOptions { .. } => {
75+ let ( delimiter, disable_quoting) = match self {
76+ ImportFormat :: CSV | ImportFormat :: CSVNoHeader => ( ',' , false ) ,
77+ ImportFormat :: CSVOptions {
78+ delimiter, quote, ..
79+ } => ( delimiter. unwrap_or ( ',' ) , quote. is_none ( ) ) ,
80+ } ;
81+
7582 let lines_stream: Pin < Box < dyn Stream < Item = Result < String , CubeError > > + Send > > =
76- Box :: pin ( CsvLineStream :: new ( reader) ) ;
83+ Box :: pin ( CsvLineStream :: new ( reader, disable_quoting ) ) ;
7784
7885 let mut header_mapping = match self {
7986 ImportFormat :: CSVNoHeader
@@ -89,11 +96,6 @@ impl ImportFormat {
8996 _ => None ,
9097 } ;
9198
92- let delimiter = match self {
93- ImportFormat :: CSV | ImportFormat :: CSVNoHeader => ',' ,
94- ImportFormat :: CSVOptions { delimiter, .. } => delimiter. unwrap_or ( ',' ) ,
95- } ;
96-
9799 if delimiter as u16 > 255 {
98100 return Err ( CubeError :: user ( format ! (
99101 "Non ASCII delimiters are unsupported: '{}'" ,
@@ -104,7 +106,8 @@ impl ImportFormat {
104106 let rows = lines_stream. map ( move |line| -> Result < Option < Row > , CubeError > {
105107 let str = line?;
106108
107- let mut parser = CsvLineParser :: new ( delimiter as u8 , str. as_str ( ) ) ;
109+ let mut parser =
110+ CsvLineParser :: new ( delimiter as u8 , disable_quoting, str. as_str ( ) ) ;
108111
109112 if header_mapping. is_none ( ) {
110113 let mut mapping = Vec :: new ( ) ;
@@ -310,21 +313,23 @@ fn parse_binary_data(value: &str) -> Result<Vec<u8>, CubeError> {
310313
311314struct CsvLineParser < ' a > {
312315 delimiter : u8 ,
316+ disable_quoting : bool ,
313317 line : & ' a str ,
314318 remaining : & ' a str ,
315319}
316320
317321impl < ' a > CsvLineParser < ' a > {
318- fn new ( delimiter : u8 , line : & ' a str ) -> Self {
322+ fn new ( delimiter : u8 , disable_quoting : bool , line : & ' a str ) -> Self {
319323 Self {
320324 delimiter,
325+ disable_quoting,
321326 line,
322327 remaining : line,
323328 }
324329 }
325330
326331 fn next_value ( & mut self ) -> Result < MaybeOwnedStr < ' _ > , CubeError > {
327- Ok (
332+ if ! self . disable_quoting {
328333 if let Some ( b'"' ) = self . remaining . as_bytes ( ) . iter ( ) . nth ( 0 ) {
329334 let mut closing_index = None ;
330335 let mut seen_escapes = false ;
@@ -356,19 +361,18 @@ impl<'a> CsvLineParser<'a> {
356361 res = MaybeOwnedStr :: Borrowed ( & self . remaining [ 0 ..closing_index] )
357362 }
358363 self . remaining = self . remaining [ ( closing_index + 1 ) ..] . as_ref ( ) ;
359- res
360- } else {
361- let next_comma = self
362- . remaining
363- . as_bytes ( )
364- . iter ( )
365- . position ( |c| * c == self . delimiter )
366- . unwrap_or ( self . remaining . len ( ) ) ;
367- let res = & self . remaining [ 0 ..next_comma] ;
368- self . remaining = self . remaining [ next_comma..] . as_ref ( ) ;
369- MaybeOwnedStr :: Borrowed ( res)
370- } ,
371- )
364+ return Ok ( res) ;
365+ }
366+ }
367+ let next_comma = self
368+ . remaining
369+ . as_bytes ( )
370+ . iter ( )
371+ . position ( |c| * c == self . delimiter )
372+ . unwrap_or ( self . remaining . len ( ) ) ;
373+ let res = & self . remaining [ 0 ..next_comma] ;
374+ self . remaining = self . remaining [ next_comma..] . as_ref ( ) ;
375+ Ok ( MaybeOwnedStr :: Borrowed ( res) )
372376 }
373377
374378 fn advance ( & mut self ) -> Result < ( ) , CubeError > {
@@ -385,15 +389,17 @@ pin_project! {
385389 struct CsvLineStream <R : AsyncBufRead > {
386390 #[ pin]
387391 reader: R ,
392+ disable_quoting: bool ,
388393 buf: Vec <u8 >,
389394 in_quotes: bool ,
390395 }
391396}
392397
393398impl < R : AsyncBufRead > CsvLineStream < R > {
394- pub fn new ( reader : R ) -> Self {
399+ pub fn new ( reader : R , disable_quoting : bool ) -> Self {
395400 Self {
396401 reader,
402+ disable_quoting,
397403 buf : Vec :: new ( ) ,
398404 in_quotes : false ,
399405 }
@@ -417,38 +423,49 @@ impl<R: AsyncBufRead> Stream for CsvLineStream<R> {
417423 return Poll :: Ready ( Some ( Err ( CubeError :: from_error ( err) ) ) ) ;
418424 }
419425 Ok ( available) => {
420- if * projected. in_quotes {
421- let quote_pos = memchr:: memchr ( b'"' , available) ;
422- if let Some ( i) = quote_pos {
423- // It consumes every pair of quotes.
424- // Matching for escapes is unnecessary as it's double "" sequence
425- * projected. in_quotes = false ;
426+ if * projected. disable_quoting {
427+ let new_line_pos = memchr:: memchr ( b'\n' , available) ;
428+ if let Some ( i) = new_line_pos {
426429 projected. buf . extend_from_slice ( & available[ ..=i] ) ;
427- ( false , i + 1 )
430+ ( true , i + 1 )
428431 } else {
429432 projected. buf . extend_from_slice ( available) ;
430433 ( false , available. len ( ) )
431434 }
432435 } else {
433- let new_line_pos = memchr:: memchr ( b'\n' , available) ;
434- let quote_pos = memchr:: memchr ( b'"' , available) ;
435- let in_quotes = quote_pos. is_some ( )
436- && ( new_line_pos. is_some ( ) && quote_pos < new_line_pos
437- || new_line_pos. is_none ( ) ) ;
438- if in_quotes {
436+ if * projected. in_quotes {
437+ let quote_pos = memchr:: memchr ( b'"' , available) ;
439438 if let Some ( i) = quote_pos {
439+ // It consumes every pair of quotes.
440+ // Matching for escapes is unnecessary as it's double "" sequence
441+ * projected. in_quotes = false ;
440442 projected. buf . extend_from_slice ( & available[ ..=i] ) ;
441- * projected. in_quotes = in_quotes;
442443 ( false , i + 1 )
443444 } else {
444- unreachable ! ( )
445+ projected. buf . extend_from_slice ( available) ;
446+ ( false , available. len ( ) )
445447 }
446- } else if let Some ( i) = new_line_pos {
447- projected. buf . extend_from_slice ( & available[ ..=i] ) ;
448- ( true , i + 1 )
449448 } else {
450- projected. buf . extend_from_slice ( available) ;
451- ( false , available. len ( ) )
449+ let new_line_pos = memchr:: memchr ( b'\n' , available) ;
450+ let quote_pos = memchr:: memchr ( b'"' , available) ;
451+ let in_quotes = quote_pos. is_some ( )
452+ && ( new_line_pos. is_some ( ) && quote_pos < new_line_pos
453+ || new_line_pos. is_none ( ) ) ;
454+ if in_quotes {
455+ if let Some ( i) = quote_pos {
456+ projected. buf . extend_from_slice ( & available[ ..=i] ) ;
457+ * projected. in_quotes = in_quotes;
458+ ( false , i + 1 )
459+ } else {
460+ unreachable ! ( )
461+ }
462+ } else if let Some ( i) = new_line_pos {
463+ projected. buf . extend_from_slice ( & available[ ..=i] ) ;
464+ ( true , i + 1 )
465+ } else {
466+ projected. buf . extend_from_slice ( available) ;
467+ ( false , available. len ( ) )
468+ }
452469 }
453470 }
454471 }
0 commit comments