Skip to content

Commit f712220

Browse files
authored
Merge pull request #172 from OpenDataServices/171-commands-csv
[#171] Add basic commands for csv files.
2 parents 2e51a26 + 55b28be commit f712220

File tree

4 files changed

+87
-11
lines changed

4 files changed

+87
-11
lines changed

flattentool/input.py

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(self, cell_value, cell_location):
4040
except ImportError:
4141
from UserDict import UserDict # pylint: disable=F0401
4242

43+
4344
def convert_type(type_string, value, timezone = pytz.timezone('UTC')):
4445
if value == '' or value is None:
4546
return None
@@ -446,18 +447,29 @@ class CSVInput(SpreadsheetInput):
446447
encoding = 'utf-8'
447448

448449
def get_sheet_headings(self, sheet_name):
450+
sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
451+
configuration_line = 1 if sheet_configuration else 0
452+
if not sheet_configuration:
453+
sheet_configuration = self.base_configuration
454+
if not self.use_configuration:
455+
sheet_configuration = {}
456+
skip_rows = sheet_configuration.get("skipRows", 0)
457+
if sheet_configuration.get("ignore"):
458+
# returning empty headers is a proxy for no data in the sheet.
459+
return []
460+
449461
if sys.version > '3': # If Python 3 or greater
450462
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
451463
r = csvreader(main_sheet_file)
452-
for row in enumerate(r):
453-
# Just return the first row
454-
return row[1]
464+
for num, row in enumerate(r):
465+
if num == (skip_rows + configuration_line):
466+
return row
455467
else: # If Python 2
456468
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
457469
r = csvreader(main_sheet_file, encoding=self.encoding)
458-
for row in enumerate(r):
459-
# Just return the first row
460-
return row[1]
470+
for num, row in enumerate(r):
471+
if num == (skip_rows + configuration_line):
472+
return row
461473

462474
def read_sheets(self):
463475
sheet_file_names = os.listdir(self.input_name)
@@ -472,21 +484,66 @@ def read_sheets(self):
472484
except ValueError:
473485
pass
474486
self.sub_sheet_names = sheet_names
487+
self.sheet_names_map = OrderedDict((sheet_name, sheet_name) for sheet_name in sheet_names)
475488
self.configure_sheets()
476489

490+
def generate_rows(self, dictreader, sheet_name):
491+
sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
492+
configuration_line = 1 if sheet_configuration else 0
493+
if not sheet_configuration:
494+
sheet_configuration = self.base_configuration
495+
if not self.use_configuration:
496+
sheet_configuration = {}
497+
498+
skip_rows = sheet_configuration.get("skipRows", 0)
499+
header_rows = sheet_configuration.get("headerRows", 1)
500+
for i in range(0, configuration_line + skip_rows):
501+
previous_row = next(dictreader.reader)
502+
if sys.version > '3': # If Python 3 or greater
503+
fieldnames = dictreader.fieldnames
504+
else:
505+
# unicodecsv dictreader always reads the headingline first
506+
# so in the case of there being any rows to skip look at
507+
# previous row and use that for fieldnames.
508+
if (configuration_line + skip_rows):
509+
fieldnames = previous_row
510+
dictreader.fieldnames = fieldnames
511+
dictreader.unicode_fieldnames = fieldnames
512+
else:
513+
fieldnames = dictreader.unicode_fieldnames
514+
for i in range(0, header_rows - 1):
515+
next(dictreader.reader)
516+
for line in dictreader:
517+
yield OrderedDict((fieldname, line[fieldname]) for fieldname in fieldnames)
518+
519+
def get_sheet_configuration(self, sheet_name):
520+
if sys.version > '3': # If Python 3 or greater
521+
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
522+
r = csvreader(main_sheet_file)
523+
heading_row = next(r)
524+
else: # If Python 2
525+
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
526+
r = csvreader(main_sheet_file, encoding=self.encoding)
527+
heading_row = next(r)
528+
if heading_row[0] == '#':
529+
return heading_row[1:]
530+
return []
531+
532+
533+
477534
def get_sheet_lines(self, sheet_name):
478535
if sys.version > '3': # If Python 3 or greater
479536
# Pass the encoding to the open function
480537
with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
481538
dictreader = DictReader(main_sheet_file)
482-
for line in dictreader:
483-
yield OrderedDict((fieldname, line[fieldname]) for fieldname in dictreader.fieldnames)
539+
for row in self.generate_rows(dictreader, sheet_name):
540+
yield row
484541
else: # If Python 2
485542
# Pass the encoding to DictReader
486543
with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
487544
dictreader = DictReader(main_sheet_file, encoding=self.encoding)
488-
for line in dictreader:
489-
yield OrderedDict((fieldname, line[fieldname]) for fieldname in dictreader.fieldnames)
545+
for row in self.generate_rows(dictreader, sheet_name):
546+
yield row
490547

491548

492549
class XLSXInput(SpreadsheetInput):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#,ignore
2+
bla,bla,bla
3+
bla,bla,bla
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#,skipRows 1,HeaderRows 2
2+
,,
3+
some,actual,headings
4+
some,other,headings
5+
some,actual,data

flattentool/tests/test_init.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ def test_bad_format(tmpdir):
11561156
output_name=tmpdir.join('meta_unflattened.json').strpath,
11571157
)
11581158

1159-
def test_commands_single_sheet(tmpdir):
1159+
def test_commands_single_sheet_xlsx(tmpdir):
11601160

11611161
unflatten(
11621162
'flattentool/tests/fixtures/xlsx/commands_in_file.xlsx',
@@ -1170,6 +1170,17 @@ def test_commands_single_sheet(tmpdir):
11701170

11711171
assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
11721172

1173+
def test_commands_single_sheet_csv(tmpdir):
1174+
unflatten(
1175+
'flattentool/tests/fixtures/csv/commands_in_file',
1176+
input_format='csv',
1177+
output_name=tmpdir.join('command_single_unflattened.json').strpath,
1178+
cell_source_map=tmpdir.join('command_single_source_map.json').strpath,
1179+
heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath,
1180+
)
1181+
unflattened = json.load(tmpdir.join('command_single_unflattened.json'))
1182+
assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
1183+
11731184
def test_commands_metatab(tmpdir):
11741185

11751186
unflatten(

0 commit comments

Comments
 (0)