Skip to content

Commit 80fad49

Browse files
committed
Merge branch '781-jsonlines-plugin' into develop
2 parents 58b1880 + d54f8d7 commit 80fad49

File tree

6 files changed

+205
-2
lines changed

6 files changed

+205
-2
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ def read(*names, **kwargs):
219219
'spdx-tv = formattedcode.format_spdx:write_spdx_tag_value',
220220
'spdx-rdf = formattedcode.format_spdx:write_spdx_rdf',
221221
'csv = formattedcode.format_csv:write_csv',
222+
'jsonlines = formattedcode.format_jsonlines:write_jsonlines',
222223
],
223224

224225
# scancode_post_scan is an entry point for post_scan_plugins.

src/formattedcode/format_json.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,6 @@ def _write_json(files_count, version, notice, scanned_files, options, output_fil
6666
else:
6767
kwargs['separators'] = (',', ':',)
6868

69+
# FIXME: Why do we wrap the output in unicode? Test output when we do not wrap the output in unicode
6970
output_file.write(unicode(simplejson.dumps(scan, **kwargs)))
7071
output_file.write('\n')
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#
2+
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
3+
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
4+
# The ScanCode software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode require an acknowledgment.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# When you publish or redistribute any data created with ScanCode or any ScanCode
16+
# derivative work, you must accompany this data with the following acknowledgment:
17+
#
18+
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
19+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
20+
# ScanCode should be considered or used as legal advice. Consult an Attorney
21+
# for any legal advice.
22+
# ScanCode is a free software code scanning tool from nexB Inc. and others.
23+
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
24+
25+
from __future__ import absolute_import
26+
from __future__ import unicode_literals
27+
28+
from collections import OrderedDict
29+
30+
import simplejson
31+
32+
from plugincode.output import scan_output_writer
33+
34+
35+
"""
36+
Output plugins to write scan results as JSON Lines.
37+
"""
38+
39+
40+
@scan_output_writer
41+
def write_jsonlines(files_count, version, notice, scanned_files, options, output_file, *args, **kwargs):
42+
"""
43+
Write scan output formatted as JSON Lines.
44+
"""
45+
header = dict(header=OrderedDict([
46+
('scancode_notice', notice),
47+
('scancode_version', version),
48+
('scancode_options', options),
49+
('files_count', files_count)
50+
]))
51+
52+
kwargs = dict(iterable_as_array=True, encoding='utf-8', separators=(',', ':',))
53+
54+
output_file.write(simplejson.dumps(header, **kwargs))
55+
output_file.write('\n')
56+
57+
for scanned_file in scanned_files:
58+
scanned_file_line = {'files': [scanned_file]}
59+
output_file.write(simplejson.dumps(scanned_file_line, **kwargs))
60+
output_file.write('\n')

src/scancode/cli_test_utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ def check_json_scan(expected_file, result_file, regen=False, strip_dates=False):
6969

7070
def _load_json_result(result_file):
7171
"""
72-
Load the result file as utf-8 JSON and strip test_dir prefix from
73-
locations.
72+
Load the result file as utf-8 JSON
7473
Sort the results by location.
7574
"""
7675
with codecs.open(result_file, encoding='utf-8') as res:
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
[
2+
{
3+
"header": {
4+
"scancode_notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
5+
"scancode_version": "2.1.0.post69.536f354.dirty.20171004191716",
6+
"scancode_options": {
7+
"--info": true,
8+
"--license-score": 0,
9+
"--format": "jsonlines"
10+
},
11+
"files_count": 1
12+
}
13+
},
14+
{
15+
"files": [
16+
{
17+
"path": "simple/copyright_acme_c-c.c",
18+
"type": "file",
19+
"name": "copyright_acme_c-c.c",
20+
"base_name": "copyright_acme_c-c",
21+
"extension": ".c",
22+
"date": "2017-10-03",
23+
"size": 55,
24+
"sha1": "e2466d5b764d27fb301ceb439ffb5da22e43ab1d",
25+
"md5": "bdf7c572beb4094c2059508fa73c05a4",
26+
"files_count": null,
27+
"mime_type": "text/plain",
28+
"file_type": "UTF-8 Unicode text, with no line terminators",
29+
"programming_language": "C",
30+
"is_binary": false,
31+
"is_text": true,
32+
"is_archive": false,
33+
"is_media": false,
34+
"is_source": true,
35+
"is_script": false,
36+
"scan_errors": []
37+
}
38+
]
39+
}
40+
]
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#
2+
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
3+
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
4+
# The ScanCode software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode require an acknowledgment.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# When you publish or redistribute any data created with ScanCode or any ScanCode
16+
# derivative work, you must accompany this data with the following acknowledgment:
17+
#
18+
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
19+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
20+
# ScanCode should be considered or used as legal advice. Consult an Attorney
21+
# for any legal advice.
22+
# ScanCode is a free software code scanning tool from nexB Inc. and others.
23+
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
24+
25+
from __future__ import absolute_import
26+
from __future__ import print_function
27+
from __future__ import division
28+
from __future__ import unicode_literals
29+
30+
import codecs
31+
from collections import OrderedDict
32+
import json
33+
import os
34+
35+
from commoncode.testcase import FileDrivenTesting
36+
from scancode.cli_test_utils import run_scan_click
37+
38+
39+
test_env = FileDrivenTesting()
40+
test_env.test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
41+
42+
43+
def remove_variable_data(scan_result):
44+
"""
45+
Remove variable fields from scan, such as date, version to ensure that the
46+
test data is stable.
47+
"""
48+
for line in scan_result:
49+
header = line.get('header')
50+
if header:
51+
del header['scancode_version']
52+
for scanned_file in line.get('files', []):
53+
if 'date' in scanned_file:
54+
del scanned_file['date']
55+
56+
57+
def check_jsonlines_scan(expected_file, result_file, regen=False):
58+
"""
59+
Check the scan result_file JSON Lines results against the expected_file
60+
expected JSON results, which is a list of mappings, one per line. If regen
61+
is True the expected_file WILL BE overwritten with the results. This is
62+
convenient for updating tests expectations. But use with caution.
63+
"""
64+
result = _load_jsonlines_result(result_file)
65+
remove_variable_data(result)
66+
67+
if regen:
68+
with open(expected_file, 'wb') as reg:
69+
json.dump(result, reg)
70+
71+
expected = _load_json_result(expected_file)
72+
remove_variable_data(expected)
73+
74+
assert expected == result
75+
76+
77+
def _load_jsonlines_result(result_file):
78+
"""
79+
Load the result file as utf-8 JSON Lines
80+
"""
81+
with codecs.open(result_file, encoding='utf-8') as res:
82+
return [json.loads(line, object_pairs_hook=OrderedDict) for line in res]
83+
84+
85+
def _load_json_result(result_file):
86+
"""
87+
Load the result file as utf-8 JSON
88+
"""
89+
with codecs.open(result_file, encoding='utf-8') as res:
90+
return json.load(res, object_pairs_hook=OrderedDict)
91+
92+
93+
def test_jsonlines():
94+
test_dir = test_env.get_test_loc('json/simple')
95+
result_file = test_env.get_temp_file('jsonline')
96+
97+
result = run_scan_click(['-i', '--format', 'jsonlines', test_dir, result_file])
98+
assert result.exit_code == 0
99+
assert 'Scanning done' in result.output
100+
101+
expected = test_env.get_test_loc('json/simple-expected.jsonlines')
102+
check_jsonlines_scan(test_env.get_test_loc(expected), result_file, regen=False)

0 commit comments

Comments
 (0)