Skip to content

Commit db03ed1

Browse files
committed
modified transform to work with json output from scancode
Signed-off-by: Srthkdb <[email protected]>
1 parent b6c6c61 commit db03ed1

File tree

5 files changed

+3057
-72
lines changed

5 files changed

+3057
-72
lines changed

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"python.pythonPath": "/home/sarthak/gsoc/aboutcode-toolkit/local/bin/python2.7"
3+
}

src/attributecode/transform.py

Lines changed: 15 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -129,77 +129,24 @@ def transform_json(data, transformer):
129129
renamings = transformer.column_renamings
130130
#if json is output of scancode-toolkit
131131
if(data["headers"][0]["tool_name"] == "scancode-toolkit"):
132-
new_data, errors = process_json_keys_scancode(data, renamings, transformer)
133-
134-
elif isinstance(data, list):
135-
for item in data:
136-
element, err = process_json_keys(item, renamings, transformer)
137-
for e in element:
138-
new_data.append(e)
139-
for e in err:
140-
errors.append(e)
132+
#only takes data inside "files"
133+
data = data["files"]
134+
#automatically renames path to about_resource
135+
if("path" not in renamings.keys()):
136+
renamings["path"] = "about_resource"
137+
138+
if isinstance(data, list):
139+
for item in data:
140+
element, err = process_json_keys(item, renamings, transformer)
141+
for e in element:
142+
new_data.append(e)
143+
for e in err:
144+
errors.append(e)
141145
else:
142146
new_data, errors = process_json_keys(data, renamings, transformer)
143147

144148
return new_data, errors
145149

146-
def process_json_keys_scancode(data, renamings, transformer):
147-
o_dict = OrderedDict()
148-
o_dict_headers_list = []
149-
o_dict_files_list = []
150-
new_data = []
151-
152-
for item in data["headers"]:
153-
o_dict_headers = OrderedDict()
154-
for k in item.keys():
155-
if k in renamings.keys():
156-
for r_key in renamings.keys():
157-
if k == r_key:
158-
o_dict_headers[renamings[r_key]] = item[k]
159-
else:
160-
o_dict_headers[k] = item[k]
161-
o_dict_headers_list.append(o_dict_headers)
162-
163-
164-
for item in data["files"]:
165-
o_dict_files = OrderedDict()
166-
for k in item.keys():
167-
if k in renamings.keys():
168-
for r_key in renamings.keys():
169-
if k == r_key:
170-
o_dict_files[renamings[r_key]] = item[k]
171-
else:
172-
o_dict_files[k] = item[k]
173-
o_dict_files_list.append(o_dict_files)
174-
175-
176-
for k in data.keys():
177-
if k in renamings.keys():
178-
for r_key in renamings.keys():
179-
if k == r_key:
180-
o_dict[renamings[r_key]] = data[k]
181-
else:
182-
o_dict[k] = data[k]
183-
184-
if("files" in renamings.keys()):
185-
o_dict[renamings["files"]] = o_dict_files_list
186-
else:
187-
o_dict["files"] = o_dict_files_list
188-
if("headers" in renamings.keys()):
189-
o_dict[renamings["headers"]] = o_dict_headers_list
190-
else:
191-
o_dict["headers"] = o_dict_headers_list
192-
new_data = [o_dict]
193-
194-
if transformer.column_filters:
195-
new_data = list(transformer.filter_columns(new_data))
196-
else:
197-
new_data = list(new_data)
198-
199-
errors = transformer.check_required_columns(new_data, isFromScancode=True)
200-
201-
return new_data, errors
202-
203150

204151
def process_json_keys(data, renamings, transformer):
205152
o_dict = OrderedDict()
@@ -315,16 +262,13 @@ def from_file(cls, location):
315262
column_filters=data.get('column_filters', []),
316263
)
317264

318-
def check_required_columns(self, data, isFromScancode=False):
265+
def check_required_columns(self, data):
319266
"""
320267
Return a list of Error for a `data` list of ordered dict where a
321268
dict is missing a value for a required column name.
322269
"""
323270
errors = []
324-
if(isFromScancode):
325-
required = set(self.required_columns)
326-
else:
327-
required = set(self.essential_columns + self.required_columns)
271+
required = set(self.essential_columns + self.required_columns)
328272
if not required:
329273
return []
330274

tests/test_transform.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,17 @@ def test_transform_data_json_as_array(self):
6868
for item in data:
6969
keys = list(item.keys())
7070
expect = [u'about_resource', u'name', u'version']
71-
assert keys == expect
71+
assert keys == expect
72+
73+
def test_transform_data_json_scancode(self):
74+
test_file = get_test_loc('test_transform/input_scancode.json')
75+
configuration = get_test_loc('test_transform/configuration_scancode')
76+
json_data = read_json(test_file)
77+
transformer = Transformer.from_file(configuration)
78+
data, err = transform_json(json_data, transformer)
79+
keys = []
80+
for item in data:
81+
keys = list(item.keys())
82+
expect = [u'about_resource', u'name', u'new_extension']
83+
assert keys == expect
84+
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
column_renamings:
2+
extension : new_extension
3+
column_filters:
4+
- name
5+
- new_extension
6+
- about_resource
7+
required_columns:
8+
- name
9+
- type
10+
11+

0 commit comments

Comments
 (0)