@@ -265,7 +265,7 @@ class dialect(Dialect):
265265 # _csv.reader won't accept a quotechar of ''
266266 dialect .quotechar = quotechar or '"'
267267 dialect .skipinitialspace = skipinitialspace
268-
268+ dialect . quoting = self . _guess_quoting ( sample , dialect )
269269 return dialect
270270
271271
@@ -448,6 +448,66 @@ def _guess_delimiter(self, data, delimiters):
448448 data [0 ].count ("%c " % delim ))
449449 return (delim , skipinitialspace )
450450
451+ def _guess_quoting (self , data , dialect ):
452+ """
453+ Looks for the quoting rules that are used in the data setting the most conservative quoting rule.
454+ """
455+ lines = data .split (dialect .lineterminator )
456+ if len (lines ) == 1 :
457+ lines = data .split ("\n " )
458+ policies = []
459+ for line in lines [:5 ]:
460+ elems = line .split (dialect .delimiter )
461+ num_quoted_elems = 0
462+ num_float_elems = 0
463+ num_none_elems = 0
464+ for elem in elems :
465+ if not elem :
466+ continue
467+ if elem [0 ] == dialect .quotechar :
468+ num_quoted_elems += 1
469+ elem = elem .strip (dialect .quotechar )
470+ if dialect .doublequote :
471+ if elem [0 ] == dialect .quotechar :
472+ elem = elem .strip (dialect .quotechar )
473+ elif elem [0 ] == '"' :
474+ elem = elem .strip ('"' )
475+ else :
476+ elem = elem .strip ("'" )
477+ try :
478+ float (elem )
479+ num_float_elems += 1
480+ is_float = True
481+ except ValueError :
482+ is_float = False
483+ if elem == "None" :
484+ num_none_elems += 1
485+ max_quote_policy = QUOTE_NONE
486+ if num_quoted_elems == len (elems ):
487+ max_quote_policy = QUOTE_ALL
488+ elif num_quoted_elems == len (elems ) - num_none_elems :
489+ max_quote_policy = QUOTE_NOTNULL
490+ elif num_quoted_elems == len (elems ) - num_float_elems :
491+ max_quote_policy = QUOTE_NONNUMERIC
492+ elif num_quoted_elems == len (elems ) - num_none_elems - num_float_elems :
493+ max_quote_policy = QUOTE_STRINGS
494+ elif num_quoted_elems > 0 :
495+ max_quote_policy = QUOTE_MINIMAL
496+ policies .append (max_quote_policy )
497+ return self ._determine_quote_priority (policies )
498+
499+ def _determine_quote_priority (self , quote_policies ):
500+ priority_order = {
501+ QUOTE_ALL : 50 ,
502+ QUOTE_NOTNULL : 40 ,
503+ QUOTE_NONNUMERIC : 30 ,
504+ QUOTE_STRINGS : 20 ,
505+ QUOTE_MINIMAL : 10 ,
506+ QUOTE_NONE : 0
507+ }
508+ reverse_priority_order = {v : k for k , v in priority_order .items ()}
509+ max_priority = map (lambda e :priority_order .get (e , 0 ), quote_policies )
510+ return reverse_priority_order [max (max_priority )]
451511
452512 def has_header (self , sample ):
453513 # Creates a dictionary of types of data in each column. If any
0 commit comments