@@ -40,6 +40,7 @@ def __init__(self, cell_value, cell_location):
40
40
except ImportError :
41
41
from UserDict import UserDict # pylint: disable=F0401
42
42
43
+
43
44
def convert_type (type_string , value , timezone = pytz .timezone ('UTC' )):
44
45
if value == '' or value is None :
45
46
return None
@@ -93,6 +94,8 @@ def convert_type(type_string, value, timezone = pytz.timezone('UTC')):
93
94
elif type_string == '' :
94
95
if type (value ) == datetime .datetime :
95
96
return timezone .localize (value ).isoformat ()
97
+ if type (value ) == float and int (value ) == value :
98
+ return int (value )
96
99
return value if type (value ) in [int ] else text_type (value )
97
100
else :
98
101
raise ValueError ('Unrecognised type: "{}"' .format (type_string ))
@@ -446,18 +449,29 @@ class CSVInput(SpreadsheetInput):
446
449
encoding = 'utf-8'
447
450
448
451
def get_sheet_headings (self , sheet_name ):
452
+ sheet_configuration = self .sheet_configuration [self .sheet_names_map [sheet_name ]]
453
+ configuration_line = 1 if sheet_configuration else 0
454
+ if not sheet_configuration :
455
+ sheet_configuration = self .base_configuration
456
+ if not self .use_configuration :
457
+ sheet_configuration = {}
458
+ skip_rows = sheet_configuration .get ("skipRows" , 0 )
459
+ if sheet_configuration .get ("ignore" ):
460
+ # returning empty headers is a proxy for no data in the sheet.
461
+ return []
462
+
449
463
if sys .version > '3' : # If Python 3 or greater
450
464
with open (os .path .join (self .input_name , sheet_name + '.csv' ), encoding = self .encoding ) as main_sheet_file :
451
465
r = csvreader (main_sheet_file )
452
- for row in enumerate (r ):
453
- # Just return the first row
454
- return row [ 1 ]
466
+ for num , row in enumerate (r ):
467
+ if num == ( skip_rows + configuration_line ):
468
+ return row
455
469
else : # If Python 2
456
470
with open (os .path .join (self .input_name , sheet_name + '.csv' )) as main_sheet_file :
457
471
r = csvreader (main_sheet_file , encoding = self .encoding )
458
- for row in enumerate (r ):
459
- # Just return the first row
460
- return row [ 1 ]
472
+ for num , row in enumerate (r ):
473
+ if num == ( skip_rows + configuration_line ):
474
+ return row
461
475
462
476
def read_sheets (self ):
463
477
sheet_file_names = os .listdir (self .input_name )
@@ -472,21 +486,66 @@ def read_sheets(self):
472
486
except ValueError :
473
487
pass
474
488
self .sub_sheet_names = sheet_names
489
+ self .sheet_names_map = OrderedDict ((sheet_name , sheet_name ) for sheet_name in sheet_names )
475
490
self .configure_sheets ()
476
491
492
+ def generate_rows (self , dictreader , sheet_name ):
493
+ sheet_configuration = self .sheet_configuration [self .sheet_names_map [sheet_name ]]
494
+ configuration_line = 1 if sheet_configuration else 0
495
+ if not sheet_configuration :
496
+ sheet_configuration = self .base_configuration
497
+ if not self .use_configuration :
498
+ sheet_configuration = {}
499
+
500
+ skip_rows = sheet_configuration .get ("skipRows" , 0 )
501
+ header_rows = sheet_configuration .get ("headerRows" , 1 )
502
+ for i in range (0 , configuration_line + skip_rows ):
503
+ previous_row = next (dictreader .reader )
504
+ if sys .version > '3' : # If Python 3 or greater
505
+ fieldnames = dictreader .fieldnames
506
+ else :
507
+ # unicodecsv dictreader always reads the headingline first
508
+ # so in the case of there being any rows to skip look at
509
+ # previous row and use that for fieldnames.
510
+ if (configuration_line + skip_rows ):
511
+ fieldnames = previous_row
512
+ dictreader .fieldnames = fieldnames
513
+ dictreader .unicode_fieldnames = fieldnames
514
+ else :
515
+ fieldnames = dictreader .unicode_fieldnames
516
+ for i in range (0 , header_rows - 1 ):
517
+ next (dictreader .reader )
518
+ for line in dictreader :
519
+ yield OrderedDict ((fieldname , line [fieldname ]) for fieldname in fieldnames )
520
+
521
+ def get_sheet_configuration (self , sheet_name ):
522
+ if sys .version > '3' : # If Python 3 or greater
523
+ with open (os .path .join (self .input_name , sheet_name + '.csv' ), encoding = self .encoding ) as main_sheet_file :
524
+ r = csvreader (main_sheet_file )
525
+ heading_row = next (r )
526
+ else : # If Python 2
527
+ with open (os .path .join (self .input_name , sheet_name + '.csv' )) as main_sheet_file :
528
+ r = csvreader (main_sheet_file , encoding = self .encoding )
529
+ heading_row = next (r )
530
+ if heading_row [0 ] == '#' :
531
+ return heading_row [1 :]
532
+ return []
533
+
534
+
535
+
477
536
def get_sheet_lines (self , sheet_name ):
478
537
if sys .version > '3' : # If Python 3 or greater
479
538
# Pass the encoding to the open function
480
539
with open (os .path .join (self .input_name , sheet_name + '.csv' ), encoding = self .encoding ) as main_sheet_file :
481
540
dictreader = DictReader (main_sheet_file )
482
- for line in dictreader :
483
- yield OrderedDict (( fieldname , line [ fieldname ]) for fieldname in dictreader . fieldnames )
541
+ for row in self . generate_rows ( dictreader , sheet_name ) :
542
+ yield row
484
543
else : # If Python 2
485
544
# Pass the encoding to DictReader
486
545
with open (os .path .join (self .input_name , sheet_name + '.csv' )) as main_sheet_file :
487
546
dictreader = DictReader (main_sheet_file , encoding = self .encoding )
488
- for line in dictreader :
489
- yield OrderedDict (( fieldname , line [ fieldname ]) for fieldname in dictreader . fieldnames )
547
+ for row in self . generate_rows ( dictreader , sheet_name ) :
548
+ yield row
490
549
491
550
492
551
class XLSXInput (SpreadsheetInput ):
0 commit comments