Skip to content

Commit 2adb3e9

Browse files
committed
Code quality improvements on coverage report generator script
1 parent d0a46eb commit 2adb3e9

File tree

1 file changed

+73
-39
lines changed

1 file changed

+73
-39
lines changed

misc/scripts/generate-csv-coverage-report.py

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
"""
88
This script runs the CSV coverage report QL query, and transforms it to a more readable format.
9-
"""
9+
There are two main outputs: (i) a CSV file containing the coverage data, and (ii) an RST page containing the coverage
10+
data.
11+
"""
1012

1113

1214
def subprocess_run(cmd):
@@ -48,8 +50,20 @@ def append_csv_dict_item(list, dictionary, key):
4850
list.append(None)
4951

5052

51-
def collect_package_stats(packages, filter):
52-
"""Collects coverage statistics for packages matching the given filter."""
53+
def increment_dict_item(value, dictionary, key):
54+
"""Increments the value of the dictionary[key] by value."""
55+
if key not in dictionary:
56+
dictionary[key] = 0
57+
dictionary[key] += int(value)
58+
59+
60+
def collect_package_stats(packages, cwes, filter):
61+
"""
62+
Collects coverage statistics for packages matching the given filter. `filter` is a `lambda` that for example (i) matches
63+
packages to frameworks, or (2) matches packages that were previously not processed.
64+
65+
The returned statistics are used to generate a single row in a CSV file.
66+
"""
5367
sources = 0
5468
steps = 0
5569
sinks = 0
@@ -75,7 +89,11 @@ def collect_package_stats(packages, filter):
7589

7690

7791
def add_package_stats_to_row(row, sorted_cwes, collect):
78-
""" Adds collected statistic to the row. """
92+
"""
93+
Adds collected statistic to the row. `collect` is a `lambda` that returns the statistics for example for (i) individual
94+
frameworks, (ii) leftout frameworks summarized in the 'Others' row, or (iii) all frameworks summarized in the 'Totals'
95+
row.
96+
"""
7997
sources, steps, sinks, framework_cwes, processed_packages = collect()
8098

8199
append_csv_number(row, sources)
@@ -112,11 +130,19 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
112130
"java", "Java", ".java", prefix + "java/ql/src/meta/frameworks/Coverage.ql")
113131
]
114132

115-
with open("flow-model-coverage.rst", 'w') as rst_file:
133+
# The names of input and output files. The placeholder {language} is replaced with the language name.
134+
output_rst = "flow-model-coverage.rst"
135+
output_rst_csv = "rst-csv-flow-model-coverage-{language}.csv"
136+
output_ql_csv = "output-{language}.csv"
137+
output_csv = "csv-flow-model-coverage-{language}.csv"
138+
input_framework_csv = prefix + "misc/scripts/frameworks-{language}.csv"
139+
input_cwe_sink_csv = prefix + "misc/scripts/cwe-sink-{language}.csv"
140+
141+
with open(output_rst, 'w') as rst_file:
116142
for config in configs:
117143
lang = config.lang
118144
db = "empty-" + lang
119-
ql_output = "output-" + lang + ".csv"
145+
ql_output = output_ql_csv.format(language=lang)
120146
create_empty_database(lang, config.ext, db)
121147
run_codeql_query(config.ql_path, db, ql_output)
122148

@@ -128,36 +154,37 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
128154
with open(ql_output) as csvfile:
129155
reader = csv.reader(csvfile)
130156
for row in reader:
157+
# row: "android.util",1,"remote","source",16
131158
package = row[0]
132159
if package not in packages:
133160
packages[package] = {
134161
"count": row[1],
162+
# part: "summary", "sink", or "source"
135163
"part": {},
164+
# kind: "source:remote", "sink:create-file", ...
136165
"kind": {}
137166
}
167+
138168
part = row[3]
139169
parts.add(part)
140-
if part not in packages[package]["part"]:
141-
packages[package]["part"][part] = 0
142-
packages[package]["part"][part] += int(row[4])
170+
increment_dict_item(row[4], packages[package]["part"], part)
171+
143172
kind = part + ":" + row[2]
144173
kinds.add(kind)
145-
if kind not in packages[package]["kind"]:
146-
packages[package]["kind"][kind] = 0
147-
packages[package]["kind"][kind] += int(row[4])
174+
increment_dict_item(row[4], packages[package]["kind"], kind)
175+
176+
parts = sorted(parts)
177+
kinds = sorted(kinds)
148178

149179
# Write the denormalized package statistics to a CSV file.
150-
with open("csv-flow-model-coverage-" + lang + ".csv", 'w', newline='') as csvfile:
180+
with open(output_csv.format(language=lang), 'w', newline='') as csvfile:
151181
csvwriter = csv.writer(csvfile)
152182

153-
parts = sorted(parts)
154-
kinds = sorted(kinds)
155-
156-
columns = ["package"]
157-
columns.extend(parts)
158-
columns.extend(kinds)
183+
headers = ["package"]
184+
headers.extend(parts)
185+
headers.extend(kinds)
159186

160-
csvwriter.writerow(columns)
187+
csvwriter.writerow(headers)
161188

162189
for package in sorted(packages):
163190
row = [package]
@@ -170,10 +197,11 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
170197
# Read the additional framework data, such as URL, friendly name
171198
frameworks = {}
172199

173-
with open(prefix + "misc/scripts/frameworks-" + lang + ".csv") as csvfile:
200+
with open(input_framework_csv.format(language=lang)) as csvfile:
174201
reader = csv.reader(csvfile)
175202
next(reader)
176203
for row in reader:
204+
# row: Hibernate,https://hibernate.org/,org.hibernate
177205
framwork = row[0]
178206
if framwork not in frameworks:
179207
frameworks[framwork] = {
@@ -184,18 +212,21 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
184212
# Read the additional CWE data
185213
cwes = {}
186214

187-
with open(prefix + "misc/scripts/cwe-sink-" + lang + ".csv") as csvfile:
215+
with open(input_cwe_sink_csv.format(language=lang)) as csvfile:
188216
reader = csv.reader(csvfile)
189217
next(reader)
190218
for row in reader:
219+
# row: CWE-89,sql,SQL injection
191220
cwe = row[0]
192221
if cwe not in cwes:
193222
cwes[cwe] = {
194223
"sink": row[1],
195224
"label": row[2]
196225
}
197226

198-
file_name = "rst-csv-flow-model-coverage-" + lang + ".csv"
227+
sorted_cwes = sorted(cwes)
228+
229+
file_name = output_rst_csv.format(language=lang)
199230

200231
rst_file.write(
201232
config.capitalized_lang + " framework & library support\n")
@@ -210,17 +241,23 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
210241
with open(file_name, 'w', newline='') as csvfile:
211242
csvwriter = csv.writer(csvfile)
212243

213-
columns = ["Framework / library", "package",
214-
"remote flow sources", "taint & value steps", "sinks (total)"]
215-
for cwe in sorted(cwes):
216-
columns.append("`" + cwe + "` :sub:`" +
217-
cwes[cwe]["label"] + "`")
218-
csvwriter.writerow(columns)
244+
# Write CSV header.
245+
headers = ["Framework / library",
246+
"Package",
247+
"Remote flow sources",
248+
"Taint & value steps",
249+
"Sinks (total)"]
250+
for cwe in sorted_cwes:
251+
headers.append(
252+
"`{0}` :sub:`{1}`".format(cwe, cwes[cwe]["label"]))
253+
csvwriter.writerow(headers)
219254

220255
processed_packages = set()
221256

257+
# Write a row for each framework.
222258
for framework in sorted(frameworks):
223259
row = []
260+
224261
# Add the framework name to the row
225262
if not frameworks[framework]["url"]:
226263
row.append(framework)
@@ -234,12 +271,12 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
234271
prefix = frameworks[framework]["package"]
235272

236273
# Collect statistics on the current framework
274+
# package name is either full name, such as "org.hibernate", or a prefix, such as "java.*"
237275
def collect_framework(): return collect_package_stats(
238-
packages,
239-
lambda p: (prefix.endswith("*") and p.startswith(prefix[:-1])) or (not prefix.endswith("*") and prefix == p))
276+
packages, cwes, lambda p: (prefix.endswith("*") and p.startswith(prefix[:-1])) or (not prefix.endswith("*") and prefix == p))
240277

241278
row, f_processed_packages = add_package_stats_to_row(
242-
row, sorted(cwes), collect_framework)
279+
row, sorted_cwes, collect_framework)
243280

244281
csvwriter.writerow(row)
245282
processed_packages.update(f_processed_packages)
@@ -248,11 +285,10 @@ def collect_framework(): return collect_package_stats(
248285
row = ["Others", None]
249286

250287
def collect_others(): return collect_package_stats(
251-
packages,
252-
lambda p: p not in processed_packages)
288+
packages, cwes, lambda p: p not in processed_packages)
253289

254290
row, other_packages = add_package_stats_to_row(
255-
row, sorted(cwes), collect_others)
291+
row, sorted_cwes, collect_others)
256292

257293
row[1] = ", ".join("``{0}``".format(p)
258294
for p in sorted(other_packages))
@@ -262,11 +298,9 @@ def collect_others(): return collect_package_stats(
262298
# Collect statistics on all packages
263299
row = ["Totals", None]
264300

265-
def collect_total(): return collect_package_stats(
266-
packages,
267-
lambda p: True)
301+
def collect_total(): return collect_package_stats(packages, cwes, lambda p: True)
268302

269303
row, _ = add_package_stats_to_row(
270-
row, sorted(cwes), collect_total)
304+
row, sorted_cwes, collect_total)
271305

272306
csvwriter.writerow(row)

0 commit comments

Comments
 (0)