#428 Update transform code

chinyeungli · chinyeungli · commit 55ed6a8e8d14 · 2020-08-03T17:22:20.000+08:00
* redesign code for renaming
 * update test code/sample

ToDo:
 * Need to update the documentation.
 * Need to update the aboutcode's major version as the conf is not
backward compatible
 * Need more tests
diff --git a/src/attributecode/transform.py b/src/attributecode/transform.py
@@ -29,6 +29,7 @@
 from attributecode.util import csv
 from attributecode.util import python2
 from attributecode.util import replace_tab_with_spaces
+from __builtin__ import True
 
 
 if python2:  # pragma: nocover
@@ -76,24 +77,28 @@ def transform_data(rows, transformer):
     dupes = check_duplicate_columns(column_names)
 
     if dupes:
-        msg = 'Duplicated column name: {name}'
-        errors.extend(Error(CRITICAL, msg.format(name)) for name in dupes)
+        msg = u'Duplicated column name: %(name)s'
+        for name in dupes:
+            errors.append(Error(CRITICAL, msg % locals()))
         return column_names, [], errors
 
-    column_names = transformer.apply_renamings(column_names)
-
-    # convert to dicts using the renamed columns
+    # Convert to dicts
     data = [OrderedDict(zip_longest(column_names, row)) for row in rows]
+    
+    #column_names = transformer.apply_renamings(column_names)
+    renamed_column_data = transformer.apply_renamings(data)
+
+    column_names = renamed_column_data[0].keys()
 
     if transformer.column_filters:
-        data = list(transformer.filter_columns(data))
+        renamed_column_data = list(transformer.filter_columns(renamed_column_data))
         column_names = [c for c in column_names if c in transformer.column_filters]
 
-    errors = transformer.check_required_columns(data)
+    errors = transformer.check_required_columns(renamed_column_data)
     if errors:
         return column_names, data, errors
 
-    return column_names, data, errors
+    return column_names, renamed_column_data, errors
 
 
 tranformer_config_help = '''
@@ -210,22 +215,29 @@ def check_required_columns(self, data):
             errors.append(Error(CRITICAL, msg.format(**locals())))
         return errors
 
-    def apply_renamings(self, column_names):
+    def apply_renamings(self, data):
         """
-        Return a tranformed list of `column_names` where columns are renamed
+        Return a transformed dictionary list where columns are renamed
         based on this Transformer configuration.
         """
         renamings = self.column_renamings
         if not renamings:
-            return column_names
-        renamings = {n.lower(): rn.lower() for n, rn in renamings.items()}
-
-        renamed = []
-        for name in column_names:
-            name = name.lower()
-            new_name = renamings.get(name, name)
-            renamed.append(new_name)
-        return renamed
+            return data
+        renamings = {n.lower(): rn.lower() for n,rn in renamings.items()}
+
+        renamed_list = []
+        for row in data:
+            renamed = OrderedDict()
+            for key in row:
+                matched = False
+                for renamed_key in renamings:
+                    if key == renamings[renamed_key]:
+                        renamed[renamed_key] = row[key]
+                        matched = True
+                if not matched:
+                    renamed[key] = row[key]
+            renamed_list.append(renamed)
+        return renamed_list
 
     def clean_columns(self, column_names):
         """
diff --git a/tests/test_transform.py b/tests/test_transform.py
@@ -2,7 +2,7 @@
 # -*- coding: utf8 -*-
 
 # ============================================================================
-#  Copyright (c) 2014-2019 nexB Inc. http://www.nexb.com/ - All rights reserved.
+#  Copyright (c) 2014-2020 nexB Inc. http://www.nexb.com/ - All rights reserved.
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
@@ -35,11 +35,24 @@
 
 
 class TransformTest(unittest.TestCase):
-    def test_transform_data(self):
+    def test_transform_data1(self):
         test_file = get_test_loc('test_transform/input.csv')
         configuration = get_test_loc('test_transform/configuration')
         rows = read_csv_rows(test_file)
         transformer = Transformer.from_file(configuration)
         col_name, data, err = transform_data(rows, transformer)
-        expect = [u'about_resource', u'name']
-        assert col_name == expect
+        expect_col = [u'about_resource', u'name']
+        expected_data = [OrderedDict([(u'about_resource', u'/tmp/test.c'), (u'name', u'test.c')])]
+        assert col_name == expect_col
+        assert data == expected_data
+
+    def test_transform_data_new_col(self):
+        test_file = get_test_loc('test_transform/input.csv')
+        configuration = get_test_loc('test_transform/configuration_new_cols')
+        rows = read_csv_rows(test_file)
+        transformer = Transformer.from_file(configuration)
+        col_name, data, err = transform_data(rows, transformer)
+        expect_col = [u'path', u'about_resource', u'name']
+        expected_data = [OrderedDict([(u'path', u'/tmp/test.c'), (u'about_resource', u'/tmp/test.c'), (u'name', u'test.c')])]
+        assert col_name == expect_col
+        assert data == expected_data
diff --git a/tests/testdata/test_transform/configuration b/tests/testdata/test_transform/configuration
@@ -1,3 +1,3 @@
 column_renamings:
-    'Directory/Filename' : about_resource
-    Component: name
+    about_resource: 'Directory/Filename'
+    name: Component
diff --git a/tests/testdata/test_transform/configuration_new_cols b/tests/testdata/test_transform/configuration_new_cols
@@ -0,0 +1,4 @@
+column_renamings:
+    about_resource: 'Directory/Filename'
+    name: Component
+    path: 'Directory/Filename'
diff --git a/tests/testdata/test_transform/input.csv b/tests/testdata/test_transform/input.csv
@@ -1,2 +1,2 @@
 Directory/Filename,Component
-/tmp/test.c, test,c
+/tmp/test.c,test.c

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`Directory/Filename,Component`
`2`		`-/tmp/test.c, test,c`
	`2`	`+/tmp/test.c,test.c`