@@ -40,6 +40,7 @@ def __init__(self, cell_value, cell_location):
40
40
except ImportError :
41
41
from UserDict import UserDict # pylint: disable=F0401
42
42
43
+
43
44
def convert_type (type_string , value , timezone = pytz .timezone ('UTC' )):
44
45
if value == '' or value is None :
45
46
return None
@@ -448,18 +449,29 @@ class CSVInput(SpreadsheetInput):
448
449
encoding = 'utf-8'
449
450
450
451
def get_sheet_headings (self , sheet_name ):
452
+ sheet_configuration = self .sheet_configuration [self .sheet_names_map [sheet_name ]]
453
+ configuration_line = 1 if sheet_configuration else 0
454
+ if not sheet_configuration :
455
+ sheet_configuration = self .base_configuration
456
+ if not self .use_configuration :
457
+ sheet_configuration = {}
458
+ skip_rows = sheet_configuration .get ("skipRows" , 0 )
459
+ if sheet_configuration .get ("ignore" ):
460
+ # returning empty headers is a proxy for no data in the sheet.
461
+ return []
462
+
451
463
if sys .version > '3' : # If Python 3 or greater
452
464
with open (os .path .join (self .input_name , sheet_name + '.csv' ), encoding = self .encoding ) as main_sheet_file :
453
465
r = csvreader (main_sheet_file )
454
- for row in enumerate (r ):
455
- # Just return the first row
456
- return row [ 1 ]
466
+ for num , row in enumerate (r ):
467
+ if num == ( skip_rows + configuration_line ):
468
+ return row
457
469
else : # If Python 2
458
470
with open (os .path .join (self .input_name , sheet_name + '.csv' )) as main_sheet_file :
459
471
r = csvreader (main_sheet_file , encoding = self .encoding )
460
- for row in enumerate (r ):
461
- # Just return the first row
462
- return row [ 1 ]
472
+ for num , row in enumerate (r ):
473
+ if num == ( skip_rows + configuration_line ):
474
+ return row
463
475
464
476
def read_sheets (self ):
465
477
sheet_file_names = os .listdir (self .input_name )
@@ -474,21 +486,66 @@ def read_sheets(self):
474
486
except ValueError :
475
487
pass
476
488
self .sub_sheet_names = sheet_names
489
+ self .sheet_names_map = OrderedDict ((sheet_name , sheet_name ) for sheet_name in sheet_names )
477
490
self .configure_sheets ()
478
491
492
+ def generate_rows (self , dictreader , sheet_name ):
493
+ sheet_configuration = self .sheet_configuration [self .sheet_names_map [sheet_name ]]
494
+ configuration_line = 1 if sheet_configuration else 0
495
+ if not sheet_configuration :
496
+ sheet_configuration = self .base_configuration
497
+ if not self .use_configuration :
498
+ sheet_configuration = {}
499
+
500
+ skip_rows = sheet_configuration .get ("skipRows" , 0 )
501
+ header_rows = sheet_configuration .get ("headerRows" , 1 )
502
+ for i in range (0 , configuration_line + skip_rows ):
503
+ previous_row = next (dictreader .reader )
504
+ if sys .version > '3' : # If Python 3 or greater
505
+ fieldnames = dictreader .fieldnames
506
+ else :
507
+ # unicodecsv dictreader always reads the headingline first
508
+ # so in the case of there being any rows to skip look at
509
+ # previous row and use that for fieldnames.
510
+ if (configuration_line + skip_rows ):
511
+ fieldnames = previous_row
512
+ dictreader .fieldnames = fieldnames
513
+ dictreader .unicode_fieldnames = fieldnames
514
+ else :
515
+ fieldnames = dictreader .unicode_fieldnames
516
+ for i in range (0 , header_rows - 1 ):
517
+ next (dictreader .reader )
518
+ for line in dictreader :
519
+ yield OrderedDict ((fieldname , line [fieldname ]) for fieldname in fieldnames )
520
+
521
+ def get_sheet_configuration (self , sheet_name ):
522
+ if sys .version > '3' : # If Python 3 or greater
523
+ with open (os .path .join (self .input_name , sheet_name + '.csv' ), encoding = self .encoding ) as main_sheet_file :
524
+ r = csvreader (main_sheet_file )
525
+ heading_row = next (r )
526
+ else : # If Python 2
527
+ with open (os .path .join (self .input_name , sheet_name + '.csv' )) as main_sheet_file :
528
+ r = csvreader (main_sheet_file , encoding = self .encoding )
529
+ heading_row = next (r )
530
+ if heading_row [0 ] == '#' :
531
+ return heading_row [1 :]
532
+ return []
533
+
534
+
535
+
479
536
def get_sheet_lines (self , sheet_name ):
480
537
if sys .version > '3' : # If Python 3 or greater
481
538
# Pass the encoding to the open function
482
539
with open (os .path .join (self .input_name , sheet_name + '.csv' ), encoding = self .encoding ) as main_sheet_file :
483
540
dictreader = DictReader (main_sheet_file )
484
- for line in dictreader :
485
- yield OrderedDict (( fieldname , line [ fieldname ]) for fieldname in dictreader . fieldnames )
541
+ for row in self . generate_rows ( dictreader , sheet_name ) :
542
+ yield row
486
543
else : # If Python 2
487
544
# Pass the encoding to DictReader
488
545
with open (os .path .join (self .input_name , sheet_name + '.csv' )) as main_sheet_file :
489
546
dictreader = DictReader (main_sheet_file , encoding = self .encoding )
490
- for line in dictreader :
491
- yield OrderedDict (( fieldname , line [ fieldname ]) for fieldname in dictreader . fieldnames )
547
+ for row in self . generate_rows ( dictreader , sheet_name ) :
548
+ yield row
492
549
493
550
494
551
class XLSXInput (SpreadsheetInput ):
0 commit comments