Skip to content

Commit 8cd2f74

Browse files
authored
Merge branch 'master' into cove-838-float-int
2 parents 087736a + f712220 commit 8cd2f74

File tree

4 files changed

+87
-11
lines changed

4 files changed

+87
-11
lines changed

flattentool/input.py

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(self, cell_value, cell_location):
4040
except ImportError:
4141
from UserDict import UserDict # pylint: disable=F0401
4242

43+
4344
def convert_type(type_string, value, timezone = pytz.timezone('UTC')):
4445
if value == '' or value is None:
4546
return None
@@ -448,18 +449,29 @@ class CSVInput(SpreadsheetInput):
448449
encoding = 'utf-8'
449450

450451
def get_sheet_headings(self, sheet_name):
452+
sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
453+
configuration_line = 1 if sheet_configuration else 0
454+
if not sheet_configuration:
455+
sheet_configuration = self.base_configuration
456+
if not self.use_configuration:
457+
sheet_configuration = {}
458+
skip_rows = sheet_configuration.get("skipRows", 0)
459+
if sheet_configuration.get("ignore"):
460+
# returning empty headers is a proxy for no data in the sheet.
461+
return []
462+
451463
if sys.version > '3': # If Python 3 or greater
452464
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
453465
r = csvreader(main_sheet_file)
454-
for row in enumerate(r):
455-
# Just return the first row
456-
return row[1]
466+
for num, row in enumerate(r):
467+
if num == (skip_rows + configuration_line):
468+
return row
457469
else: # If Python 2
458470
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
459471
r = csvreader(main_sheet_file, encoding=self.encoding)
460-
for row in enumerate(r):
461-
# Just return the first row
462-
return row[1]
472+
for num, row in enumerate(r):
473+
if num == (skip_rows + configuration_line):
474+
return row
463475

464476
def read_sheets(self):
465477
sheet_file_names = os.listdir(self.input_name)
@@ -474,21 +486,66 @@ def read_sheets(self):
474486
except ValueError:
475487
pass
476488
self.sub_sheet_names = sheet_names
489+
self.sheet_names_map = OrderedDict((sheet_name, sheet_name) for sheet_name in sheet_names)
477490
self.configure_sheets()
478491

492+
def generate_rows(self, dictreader, sheet_name):
493+
sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
494+
configuration_line = 1 if sheet_configuration else 0
495+
if not sheet_configuration:
496+
sheet_configuration = self.base_configuration
497+
if not self.use_configuration:
498+
sheet_configuration = {}
499+
500+
skip_rows = sheet_configuration.get("skipRows", 0)
501+
header_rows = sheet_configuration.get("headerRows", 1)
502+
for i in range(0, configuration_line + skip_rows):
503+
previous_row = next(dictreader.reader)
504+
if sys.version > '3': # If Python 3 or greater
505+
fieldnames = dictreader.fieldnames
506+
else:
507+
# unicodecsv dictreader always reads the headingline first
508+
# so in the case of there being any rows to skip look at
509+
# previous row and use that for fieldnames.
510+
if (configuration_line + skip_rows):
511+
fieldnames = previous_row
512+
dictreader.fieldnames = fieldnames
513+
dictreader.unicode_fieldnames = fieldnames
514+
else:
515+
fieldnames = dictreader.unicode_fieldnames
516+
for i in range(0, header_rows - 1):
517+
next(dictreader.reader)
518+
for line in dictreader:
519+
yield OrderedDict((fieldname, line[fieldname]) for fieldname in fieldnames)
520+
521+
def get_sheet_configuration(self, sheet_name):
522+
if sys.version > '3': # If Python 3 or greater
523+
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
524+
r = csvreader(main_sheet_file)
525+
heading_row = next(r)
526+
else: # If Python 2
527+
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
528+
r = csvreader(main_sheet_file, encoding=self.encoding)
529+
heading_row = next(r)
530+
if heading_row[0] == '#':
531+
return heading_row[1:]
532+
return []
533+
534+
535+
479536
def get_sheet_lines(self, sheet_name):
480537
if sys.version > '3': # If Python 3 or greater
481538
# Pass the encoding to the open function
482539
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
483540
dictreader = DictReader(main_sheet_file)
484-
for line in dictreader:
485-
yield OrderedDict((fieldname, line[fieldname]) for fieldname in dictreader.fieldnames)
541+
for row in self.generate_rows(dictreader, sheet_name):
542+
yield row
486543
else: # If Python 2
487544
# Pass the encoding to DictReader
488545
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
489546
dictreader = DictReader(main_sheet_file, encoding=self.encoding)
490-
for line in dictreader:
491-
yield OrderedDict((fieldname, line[fieldname]) for fieldname in dictreader.fieldnames)
547+
for row in self.generate_rows(dictreader, sheet_name):
548+
yield row
492549

493550

494551
class XLSXInput(SpreadsheetInput):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#,ignore
2+
bla,bla,bla
3+
bla,bla,bla
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#,skipRows 1,HeaderRows 2
2+
,,
3+
some,actual,headings
4+
some,other,headings
5+
some,actual,data

flattentool/tests/test_init.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ def test_bad_format(tmpdir):
11561156
output_name=tmpdir.join('meta_unflattened.json').strpath,
11571157
)
11581158

1159-
def test_commands_single_sheet(tmpdir):
1159+
def test_commands_single_sheet_xlsx(tmpdir):
11601160

11611161
unflatten(
11621162
'flattentool/tests/fixtures/xlsx/commands_in_file.xlsx',
@@ -1170,6 +1170,17 @@ def test_commands_single_sheet(tmpdir):
11701170

11711171
assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
11721172

1173+
def test_commands_single_sheet_csv(tmpdir):
1174+
unflatten(
1175+
'flattentool/tests/fixtures/csv/commands_in_file',
1176+
input_format='csv',
1177+
output_name=tmpdir.join('command_single_unflattened.json').strpath,
1178+
cell_source_map=tmpdir.join('command_single_source_map.json').strpath,
1179+
heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath,
1180+
)
1181+
unflattened = json.load(tmpdir.join('command_single_unflattened.json'))
1182+
assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
1183+
11731184
def test_commands_metatab(tmpdir):
11741185

11751186
unflatten(

0 commit comments

Comments
 (0)