Skip to content

Commit f70c414

Browse files
committed
[#171] Add basic commands for csv files.
1 parent 2e51a26 commit f70c414

File tree

4 files changed

+76
-11
lines changed

4 files changed

+76
-11
lines changed

flattentool/input.py

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(self, cell_value, cell_location):
4040
except ImportError:
4141
from UserDict import UserDict # pylint: disable=F0401
4242

43+
4344
def convert_type(type_string, value, timezone = pytz.timezone('UTC')):
4445
if value == '' or value is None:
4546
return None
@@ -446,18 +447,29 @@ class CSVInput(SpreadsheetInput):
446447
encoding = 'utf-8'
447448

448449
def get_sheet_headings(self, sheet_name):
450+
sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
451+
configuration_line = 1 if sheet_configuration else 0
452+
if not sheet_configuration:
453+
sheet_configuration = self.base_configuration
454+
if not self.use_configuration:
455+
sheet_configuration = {}
456+
skip_rows = sheet_configuration.get("skipRows", 0)
457+
if sheet_configuration.get("ignore"):
458+
# returning empty headers is a proxy for no data in the sheet.
459+
return []
460+
449461
if sys.version > '3': # If Python 3 or greater
450462
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
451463
r = csvreader(main_sheet_file)
452-
for row in enumerate(r):
453-
# Just return the first row
454-
return row[1]
464+
for num, row in enumerate(r):
465+
if num == (skip_rows + configuration_line):
466+
return row
455467
else: # If Python 2
456468
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
457469
r = csvreader(main_sheet_file, encoding=self.encoding)
458-
for row in enumerate(r):
459-
# Just return the first row
460-
return row[1]
470+
for num, row in enumerate(r):
471+
if num == (skip_rows + configuration_line):
472+
return row
461473

462474
def read_sheets(self):
463475
sheet_file_names = os.listdir(self.input_name)
@@ -472,21 +484,55 @@ def read_sheets(self):
472484
except ValueError:
473485
pass
474486
self.sub_sheet_names = sheet_names
487+
self.sheet_names_map = OrderedDict((sheet_name, sheet_name) for sheet_name in sheet_names)
475488
self.configure_sheets()
476489

490+
def generate_rows(self, dictreader, sheet_name):
491+
sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
492+
configuration_line = 1 if sheet_configuration else 0
493+
if not sheet_configuration:
494+
sheet_configuration = self.base_configuration
495+
if not self.use_configuration:
496+
sheet_configuration = {}
497+
498+
skip_rows = sheet_configuration.get("skipRows", 0)
499+
header_rows = sheet_configuration.get("headerRows", 1)
500+
for i in range(0, configuration_line + skip_rows):
501+
next(dictreader.reader)
502+
fieldnames = dictreader.fieldnames
503+
for i in range(0, header_rows - 1):
504+
next(dictreader.reader)
505+
for line in dictreader:
506+
yield OrderedDict((fieldname, line[fieldname]) for fieldname in fieldnames)
507+
508+
def get_sheet_configuration(self, sheet_name):
509+
if sys.version > '3': # If Python 3 or greater
510+
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
511+
r = csvreader(main_sheet_file)
512+
heading_row = next(r)
513+
else: # If Python 2
514+
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
515+
r = csvreader(main_sheet_file, encoding=self.encoding)
516+
heading_row = next(r)
517+
if heading_row[0] == '#':
518+
return heading_row[1:]
519+
return []
520+
521+
522+
477523
def get_sheet_lines(self, sheet_name):
478524
if sys.version > '3': # If Python 3 or greater
479525
# Pass the encoding to the open function
480526
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
481527
dictreader = DictReader(main_sheet_file)
482-
for line in dictreader:
483-
yield OrderedDict((fieldname, line[fieldname]) for fieldname in dictreader.fieldnames)
528+
for row in self.generate_rows(dictreader, sheet_name):
529+
yield row
484530
else: # If Python 2
485531
# Pass the encoding to DictReader
486532
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
487533
dictreader = DictReader(main_sheet_file, encoding=self.encoding)
488-
for line in dictreader:
489-
yield OrderedDict((fieldname, line[fieldname]) for fieldname in dictreader.fieldnames)
534+
for row in self.generate_rows(dictreader, sheet_name):
535+
yield row
490536

491537

492538
class XLSXInput(SpreadsheetInput):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#,ignore
2+
bla,bla,bla
3+
bla,bla,bla
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#,skipRows 1,HeaderRows 2
2+
,,
3+
some,actual,headings
4+
some,other,headings
5+
some,actual,data

flattentool/tests/test_init.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ def test_bad_format(tmpdir):
11561156
output_name=tmpdir.join('meta_unflattened.json').strpath,
11571157
)
11581158

1159-
def test_commands_single_sheet(tmpdir):
1159+
def test_commands_single_sheet_xlsx(tmpdir):
11601160

11611161
unflatten(
11621162
'flattentool/tests/fixtures/xlsx/commands_in_file.xlsx',
@@ -1170,6 +1170,17 @@ def test_commands_single_sheet(tmpdir):
11701170

11711171
assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
11721172

1173+
def test_commands_single_sheet_csv(tmpdir):
1174+
unflatten(
1175+
'flattentool/tests/fixtures/csv/commands_in_file',
1176+
input_format='csv',
1177+
output_name=tmpdir.join('command_single_unflattened.json').strpath,
1178+
cell_source_map=tmpdir.join('command_single_source_map.json').strpath,
1179+
heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath,
1180+
)
1181+
unflattened = json.load(tmpdir.join('command_single_unflattened.json'))
1182+
assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
1183+
11731184
def test_commands_metatab(tmpdir):
11741185

11751186
unflatten(

0 commit comments

Comments
 (0)