Skip to content

Commit 55ed6a8

Browse files
committed
#428 Update transform code
* redesign code for renaming * update test code/sample ToDo: * Need to update the documentation. * Need to update the aboutcode's major version as the conf is not backward compatible * Need more tests
1 parent 1ce4046 commit 55ed6a8

File tree

5 files changed

+55
-26
lines changed

5 files changed

+55
-26
lines changed

src/attributecode/transform.py

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from attributecode.util import csv
3030
from attributecode.util import python2
3131
from attributecode.util import replace_tab_with_spaces
32+
from __builtin__ import True
3233

3334

3435
if python2: # pragma: nocover
@@ -76,24 +77,28 @@ def transform_data(rows, transformer):
7677
dupes = check_duplicate_columns(column_names)
7778

7879
if dupes:
79-
msg = 'Duplicated column name: {name}'
80-
errors.extend(Error(CRITICAL, msg.format(name)) for name in dupes)
80+
msg = u'Duplicated column name: %(name)s'
81+
for name in dupes:
82+
errors.append(Error(CRITICAL, msg % locals()))
8183
return column_names, [], errors
8284

83-
column_names = transformer.apply_renamings(column_names)
84-
85-
# convert to dicts using the renamed columns
85+
# Convert to dicts
8686
data = [OrderedDict(zip_longest(column_names, row)) for row in rows]
87+
88+
#column_names = transformer.apply_renamings(column_names)
89+
renamed_column_data = transformer.apply_renamings(data)
90+
91+
column_names = renamed_column_data[0].keys()
8792

8893
if transformer.column_filters:
89-
data = list(transformer.filter_columns(data))
94+
renamed_column_data = list(transformer.filter_columns(renamed_column_data))
9095
column_names = [c for c in column_names if c in transformer.column_filters]
9196

92-
errors = transformer.check_required_columns(data)
97+
errors = transformer.check_required_columns(renamed_column_data)
9398
if errors:
9499
return column_names, data, errors
95100

96-
return column_names, data, errors
101+
return column_names, renamed_column_data, errors
97102

98103

99104
tranformer_config_help = '''
@@ -210,22 +215,29 @@ def check_required_columns(self, data):
210215
errors.append(Error(CRITICAL, msg.format(**locals())))
211216
return errors
212217

213-
def apply_renamings(self, column_names):
218+
def apply_renamings(self, data):
214219
"""
215-
Return a tranformed list of `column_names` where columns are renamed
220+
Return a transformed dictionary list where columns are renamed
216221
based on this Transformer configuration.
217222
"""
218223
renamings = self.column_renamings
219224
if not renamings:
220-
return column_names
221-
renamings = {n.lower(): rn.lower() for n, rn in renamings.items()}
222-
223-
renamed = []
224-
for name in column_names:
225-
name = name.lower()
226-
new_name = renamings.get(name, name)
227-
renamed.append(new_name)
228-
return renamed
225+
return data
226+
renamings = {n.lower(): rn.lower() for n,rn in renamings.items()}
227+
228+
renamed_list = []
229+
for row in data:
230+
renamed = OrderedDict()
231+
for key in row:
232+
matched = False
233+
for renamed_key in renamings:
234+
if key == renamings[renamed_key]:
235+
renamed[renamed_key] = row[key]
236+
matched = True
237+
if not matched:
238+
renamed[key] = row[key]
239+
renamed_list.append(renamed)
240+
return renamed_list
229241

230242
def clean_columns(self, column_names):
231243
"""

tests/test_transform.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# -*- coding: utf8 -*-
33

44
# ============================================================================
5-
# Copyright (c) 2014-2019 nexB Inc. http://www.nexb.com/ - All rights reserved.
5+
# Copyright (c) 2014-2020 nexB Inc. http://www.nexb.com/ - All rights reserved.
66
# Licensed under the Apache License, Version 2.0 (the "License");
77
# you may not use this file except in compliance with the License.
88
# You may obtain a copy of the License at
@@ -35,11 +35,24 @@
3535

3636

3737
class TransformTest(unittest.TestCase):
38-
def test_transform_data(self):
38+
def test_transform_data1(self):
3939
test_file = get_test_loc('test_transform/input.csv')
4040
configuration = get_test_loc('test_transform/configuration')
4141
rows = read_csv_rows(test_file)
4242
transformer = Transformer.from_file(configuration)
4343
col_name, data, err = transform_data(rows, transformer)
44-
expect = [u'about_resource', u'name']
45-
assert col_name == expect
44+
expect_col = [u'about_resource', u'name']
45+
expected_data = [OrderedDict([(u'about_resource', u'/tmp/test.c'), (u'name', u'test.c')])]
46+
assert col_name == expect_col
47+
assert data == expected_data
48+
49+
def test_transform_data_new_col(self):
50+
test_file = get_test_loc('test_transform/input.csv')
51+
configuration = get_test_loc('test_transform/configuration_new_cols')
52+
rows = read_csv_rows(test_file)
53+
transformer = Transformer.from_file(configuration)
54+
col_name, data, err = transform_data(rows, transformer)
55+
expect_col = [u'path', u'about_resource', u'name']
56+
expected_data = [OrderedDict([(u'path', u'/tmp/test.c'), (u'about_resource', u'/tmp/test.c'), (u'name', u'test.c')])]
57+
assert col_name == expect_col
58+
assert data == expected_data
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
column_renamings:
2-
'Directory/Filename' : about_resource
3-
Component: name
2+
about_resource: 'Directory/Filename'
3+
name: Component
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
column_renamings:
2+
about_resource: 'Directory/Filename'
3+
name: Component
4+
path: 'Directory/Filename'
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
Directory/Filename,Component
2-
/tmp/test.c, test,c
2+
/tmp/test.c,test.c

0 commit comments

Comments
 (0)