Fixed #417 - Make transform works with json file

chinyeungli · chinyeungli · commit a4781f4c7ed1 · 2019-12-27T15:11:44.000+08:00
* Update Changelog
 * Create functions to work on `transfrom` from JSON to JSON
 * Add code to validate the extension for both input and output are the
same
 * Update test `configuration` to increase converage
 * Added/Updated test code
diff --git a/docs/CHANGELOG.rst b/docs/CHANGELOG.rst
@@ -2,6 +2,7 @@
     Release 4.0.2
 
     * Upgrade license-expression library to v1.2
+    * Enhance the `transform` to also work with JSON file
 
 
 2019-10-17
diff --git a/src/attributecode/cmd.py b/src/attributecode/cmd.py
@@ -401,17 +401,17 @@ def print_config_help(ctx, param, value):
 
 
 @about.command(cls=AboutCommand,
-    short_help='Transform a CSV by applying renamings, filters and checks.')
+    short_help='Transform a CSV/JSON by applying renamings, filters and checks.')
 
 @click.argument('location',
     required=True,
-    callback=partial(validate_extensions, extensions=('.csv',)),
+    callback=partial(validate_extensions, extensions=('.csv', '.json',)),
     metavar='LOCATION',
     type=click.Path(exists=True, dir_okay=False, readable=True, resolve_path=True))
 
 @click.argument('output',
     required=True,
-    callback=partial(validate_extensions, extensions=('.csv',)),
+    callback=partial(validate_extensions, extensions=('.csv', '.json',)),
     metavar='OUTPUT',
     type=click.Path(exists=False, dir_okay=False, writable=True, resolve_path=True))
 
@@ -438,30 +438,39 @@ def print_config_help(ctx, param, value):
 
 def transform(location, output, configuration, quiet, verbose):  # NOQA
     """
-Transform the CSV file at LOCATION by applying renamings, filters and checks
-and write a new CSV to OUTPUT.
+Transform the CSV/JSON file at LOCATION by applying renamings, filters and checks
+and write a new CSV/JSON to OUTPUT.
 
-LOCATION: Path to a CSV file.
+LOCATION: Path to a CSV/JSON file.
 
-OUTPUT: Path to CSV inventory file to create.
+OUTPUT: Path to CSV/JSON inventory file to create.
     """
     from attributecode.transform import transform_csv_to_csv
+    from attributecode.transform import transform_json_to_json
     from attributecode.transform import Transformer
 
-    if not quiet:
-        print_version()
-        click.echo('Transforming CSV...')
 
     if not configuration:
         transformer = Transformer.default()
     else:
         transformer = Transformer.from_file(configuration)
 
-    errors = transform_csv_to_csv(location, output, transformer)
+    if location.endswith('.csv') and output.endswith('.csv'):
+        errors = transform_csv_to_csv(location, output, transformer)
+    elif location.endswith('.json') and output.endswith('.json'):
+        errors = transform_json_to_json(location, output, transformer)
+    else:
+        msg = 'Extension for the input and output need to be the same.'
+        click.echo(msg)
+        sys.exit()
+
+    if not quiet:
+        print_version()
+        click.echo('Transforming...')
 
     errors_count = report_errors(errors, quiet, verbose, log_file_loc=output + '-error.log')
     if not quiet and not errors:
-        msg = 'Transformed CSV written to {output}.'.format(**locals())
+        msg = 'Transformed file written to {output}.'.format(**locals())
         click.echo(msg)
     sys.exit(errors_count)
 
diff --git a/src/attributecode/transform.py b/src/attributecode/transform.py
@@ -20,6 +20,7 @@
 from collections import Counter
 from collections import OrderedDict
 import io
+import json
 
 import attr
 
@@ -40,27 +41,46 @@
 def transform_csv_to_csv(location, output, transformer):
     """
     Read a CSV file at `location` and write a new CSV file at `output`. Apply
-    transformations using the `transformer` Tranformer.
+    transformations using the `transformer` Transformer.
     Return a list of Error objects.
     """
     if not transformer:
         raise ValueError('Cannot transform without Transformer')
 
     rows = read_csv_rows(location)
 
-    column_names, data, errors = transform_data(rows, transformer)
+    column_names, data, errors = transform_csv(rows, transformer)
 
     if errors:
         return errors
     else:
         write_csv(output, data, column_names)
         return []
 
+def transform_json_to_json(location, output, transformer):
+    """
+    Read a JSON file at `location` and write a new JSON file at `output`. Apply
+    transformations using the `transformer` Transformer.
+    Return a list of Error objects.
+    """
+    if not transformer:
+        raise ValueError('Cannot transform without Transformer')
 
-def transform_data(rows, transformer):
+    data = read_json(location)
+
+    new_data, errors = transform_json(data, transformer)
+
+    if errors:
+        return errors
+    else:
+        write_json(output, new_data)
+        return []
+
+
+def transform_csv(rows, transformer):
     """
     Read a list of list of CSV-like data `rows` and apply transformations using the
-    `transformer` Tranformer.
+    `transformer` Transformer.
     Return a tuple of:
        ([column names...], [transformed ordered dict...], [Error objects..])
     """
@@ -90,12 +110,54 @@ def transform_data(rows, transformer):
         column_names = [c for c in column_names if c in transformer.column_filters]
 
     errors = transformer.check_required_columns(data)
-    if errors:
-        return column_names, data, errors
 
     return column_names, data, errors
 
 
+def transform_json(data, transformer):
+    """
+    Read a dictionary and apply transformations using the
+    `transformer` Transformer.
+    Return a new list of dictionary.
+    """
+
+    if not transformer:
+        return data
+
+    errors = []
+    new_data = []
+    renamings = transformer.column_renamings
+    if isinstance(data, list):
+        for item in data:
+            element, err = process_json_keys(item, renamings, transformer)
+            for e in element:
+                new_data.append(e)
+            for e in err:
+                errors.append(e)
+    else: 
+        new_data, errors = process_json_keys(data, renamings, transformer)
+
+    return new_data, errors
+
+
+def process_json_keys(data, renamings, transformer):
+    o_dict = OrderedDict()
+    for k in data.keys():
+        if k in renamings.keys():
+            for r_key in renamings.keys():
+                if k == r_key:
+                    o_dict[renamings[r_key]] = data[k]
+        else:
+            o_dict[k] = data[k]
+        new_data = [o_dict]
+
+    if transformer.column_filters:
+        new_data = list(transformer.filter_columns(new_data))
+
+    errors = transformer.check_required_columns(new_data)
+    return new_data, errors
+
+
 tranformer_config_help = '''
 A transform configuration file is used to describe which transformations and
 validations to apply to a source CSV file. This is a simple text file using YAML
@@ -266,6 +328,15 @@ def read_csv_rows(location):
             yield row
 
 
+def read_json(location):
+    """
+    Yield rows (as a list of values) from a CSV file at `location`.
+    """
+    with io.open(location, encoding='utf-8', errors='replace') as jsonfile:
+        data = json.load(jsonfile, object_pairs_hook=OrderedDict)
+        return data
+
+
 def write_csv(location, data, column_names):  # NOQA
     """
     Write a CSV file at `location` the `data` list of ordered dicts using the
@@ -275,3 +346,11 @@ def write_csv(location, data, column_names):  # NOQA
         writer = csv.DictWriter(csvfile, fieldnames=column_names)
         writer.writeheader()
         writer.writerows(data)
+
+
+def write_json(location, data):
+    """
+    Write a JSON file at `location` the `data` list of ordered dicts.
+    """
+    with open(location, 'w') as jsonfile:
+        json.dump(data, jsonfile, indent=3)
diff --git a/tests/test_transform.py b/tests/test_transform.py
@@ -30,7 +30,9 @@
 from attributecode import Error
 from attributecode import gen
 from attributecode.transform import read_csv_rows
-from attributecode.transform import transform_data
+from attributecode.transform import read_json
+from attributecode.transform import transform_csv
+from attributecode.transform import transform_json
 from attributecode.transform import Transformer
 
 
@@ -40,6 +42,30 @@ def test_transform_data(self):
         configuration = get_test_loc('test_transform/configuration')
         rows = read_csv_rows(test_file)
         transformer = Transformer.from_file(configuration)
-        col_name, data, err = transform_data(rows, transformer)
-        expect = [u'about_resource', u'name']
+        col_name, data, err = transform_csv(rows, transformer)
+        expect = [u'about_resource', u'name', u'version']
         assert col_name == expect
+
+    def test_transform_data_json(self):
+        test_file = get_test_loc('test_transform/input.json')
+        configuration = get_test_loc('test_transform/configuration')
+        json_data = read_json(test_file)
+        transformer = Transformer.from_file(configuration)
+        data, err = transform_json(json_data, transformer)
+        keys = []
+        for item in data:
+            keys = item.keys()
+        expect = [u'about_resource', u'name', u'version']
+        assert keys == expect
+
+    def test_transform_data_json_as_array(self):
+        test_file = get_test_loc('test_transform/input_as_array.json')
+        configuration = get_test_loc('test_transform/configuration')
+        json_data = read_json(test_file)
+        transformer = Transformer.from_file(configuration)
+        data, err = transform_json(json_data, transformer)
+        keys = []
+        for item in data:
+            keys = item.keys()
+        expect = [u'about_resource', u'name', u'version']
+        assert keys == expect
diff --git a/tests/testdata/test_cmd/help/about_help.txt b/tests/testdata/test_cmd/help/about_help.txt
@@ -18,4 +18,4 @@ Commands:
              errors and warnings.
   gen        Generate .ABOUT files from an inventory as CSV or JSON.
   inventory  Collect the inventory of .ABOUT files to a CSV or JSON file.
-  transform  Transform a CSV by applying renamings, filters and checks.
+  transform  Transform a CSV/JSON by applying renamings, filters and checks.
diff --git a/tests/testdata/test_cmd/help/about_transform_help.txt b/tests/testdata/test_cmd/help/about_transform_help.txt
@@ -1,11 +1,11 @@
 Usage: about transform [OPTIONS] LOCATION OUTPUT
 
-  Transform the CSV file at LOCATION by applying renamings, filters and checks
-  and write a new CSV to OUTPUT.
+  Transform the CSV/JSON file at LOCATION by applying renamings, filters and
+  checks and write a new CSV/JSON to OUTPUT.
 
-  LOCATION: Path to a CSV file.
+  LOCATION: Path to a CSV/JSON file.
 
-  OUTPUT: Path to CSV inventory file to create.
+  OUTPUT: Path to CSV/JSON inventory file to create.
 
 Options:
   -c, --configuration FILE  Path to an optional YAML configuration file. See
diff --git a/tests/testdata/test_transform/configuration b/tests/testdata/test_transform/configuration
@@ -1,3 +1,10 @@
 column_renamings:
     'Directory/Filename' : about_resource
-    Component: name
+    Component: name
+column_filters:
+    - about_resource
+    - name
+    - version
+required_columns:
+	- name
+	- version
diff --git a/tests/testdata/test_transform/input.csv b/tests/testdata/test_transform/input.csv
@@ -1,2 +1,2 @@
-Directory/Filename,Component
-/tmp/test.c, test,c
+Directory/Filename,Component,version,notes
+/tmp/test.c, test.c,1,test
diff --git a/tests/testdata/test_transform/input.json b/tests/testdata/test_transform/input.json
@@ -0,0 +1,6 @@
+{
+    "Directory/Filename": "/aboutcode-toolkit/",
+	"Component": "AboutCode-toolkit",
+	"version": "1.2.3",
+	"note": "test"
+}
diff --git a/tests/testdata/test_transform/input_as_array.json b/tests/testdata/test_transform/input_as_array.json
@@ -0,0 +1,12 @@
+[
+  {
+    "Directory/Filename": "/aboutcode-toolkit/", 
+    "Component": "AboutCode-toolkit",
+	"version": "1.0"
+  },
+  {
+    "Directory/Filename": "/aboutcode-toolkit1/", 
+    "Component": "AboutCode-toolkit1",
+	"version": "1.1"
+  }  
+]