Skip to content

Commit ba9c2e0

Browse files
committed
Rework CSV report generator and change timeseries report to use framework.csv
1 parent c6cb7c6 commit ba9c2e0

File tree

5 files changed

+319
-156
lines changed

5 files changed

+319
-156
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import csv
2+
import sys
3+
import packages
4+
5+
6+
class Framework:
7+
"""
8+
Frameworks are the aggregation units in the RST and timeseries report. These are read from the frameworks.csv file.
9+
"""
10+
11+
def __init__(self, name, url, package_pattern):
12+
self.name = name
13+
self.url = url
14+
self.package_pattern = package_pattern
15+
16+
17+
class FrameworkCollection:
18+
"""
19+
A (sorted) list of frameworks.
20+
"""
21+
22+
def __init__(self, path):
23+
self.frameworks: list[Framework] = []
24+
self.package_patterns = set()
25+
26+
with open(path) as csvfile:
27+
reader = csv.reader(csvfile)
28+
next(reader)
29+
for row in reader:
30+
# row: Hibernate,https://hibernate.org/,org.hibernate
31+
self.__add(Framework(row[0], row[1], row[2]))
32+
self.__sort()
33+
34+
def __add(self, framework: Framework):
35+
if framework.package_pattern not in self.package_patterns:
36+
self.package_patterns.add(framework.package_pattern)
37+
self.frameworks.append(framework)
38+
else:
39+
print("Package pattern already exists: " +
40+
framework.package_pattern, file=sys.stderr)
41+
42+
def __sort(self):
43+
self.frameworks.sort(key=lambda f: f.name)
44+
45+
def get(self, framework_name):
46+
for framework in self.frameworks:
47+
if framework.name == framework_name:
48+
return framework
49+
return None
50+
51+
def get_patterns(self):
52+
return self.package_patterns
53+
54+
def get_frameworks(self):
55+
return self.frameworks
56+
57+
def __package_match(self, package: packages.Package, pattern):
58+
return (pattern.endswith("*") and package.name.startswith(pattern[:-1])) or (not pattern.endswith("*") and pattern == package.name)
59+
60+
def get_package_filter(self, framework: Framework):
61+
"""
62+
Returns a lambda filter that holds for packages that match the current framework.
63+
64+
The pattern is either full name, such as "org.hibernate", or a prefix, such as "java.*"
65+
Package patterns might overlap, in case of 'org.apache.commons.io' and 'org.apache.*', the statistics for
66+
the latter will not include the statistics for the former.
67+
"""
68+
return lambda p: \
69+
self.__package_match(p, framework.package_pattern) and \
70+
all(
71+
len(framework.package_pattern) >= len(pattern) or
72+
not self.__package_match(p, pattern) for pattern in self.get_patterns())

misc/scripts/library-coverage/generate-report.py

Lines changed: 31 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import shutil
55
import settings
66
import utils
7+
import packages as pack
8+
import frameworks as fr
79

810
"""
911
This script runs the CSV coverage report QL query, and transforms it to a more readable format.
@@ -28,14 +30,7 @@ def append_csv_dict_item(list, dictionary, key):
2830
list.append(None)
2931

3032

31-
def increment_dict_item(value, dictionary, key):
32-
"""Increments the value of the dictionary[key] by value."""
33-
if key not in dictionary:
34-
dictionary[key] = 0
35-
dictionary[key] += int(value)
36-
37-
38-
def collect_package_stats(packages, cwes, filter):
33+
def collect_package_stats(packages: pack.PackageCollection, cwes, filter):
3934
"""
4035
Collects coverage statistics for packages matching the given filter. `filter` is a `lambda` that for example (i) matches
4136
packages to frameworks, or (2) matches packages that were previously not processed.
@@ -48,20 +43,21 @@ def collect_package_stats(packages, cwes, filter):
4843
framework_cwes = {}
4944
processed_packages = set()
5045

51-
for package in packages:
46+
for package in packages.get_packages():
47+
package: pack.Package = package
5248
if filter(package):
5349
processed_packages.add(package)
54-
sources += int(packages[package]["kind"].get("source:remote", 0))
55-
steps += int(packages[package]["part"].get("summary", 0))
56-
sinks += int(packages[package]["part"].get("sink", 0))
50+
sources += package.get_kind_count("source:remote")
51+
steps += package.get_part_count("summary")
52+
sinks += package.get_part_count("sink")
5753

5854
for cwe in cwes:
5955
sink = "sink:" + cwes[cwe]["sink"]
60-
if sink in packages[package]["kind"]:
56+
count = package.get_kind_count(sink)
57+
if count > 0:
6158
if cwe not in framework_cwes:
6259
framework_cwes[cwe] = 0
63-
framework_cwes[cwe] += int(
64-
packages[package]["kind"][sink])
60+
framework_cwes[cwe] += count
6561

6662
return sources, steps, sinks, framework_cwes, processed_packages
6763

@@ -137,37 +133,12 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
137133
utils.run_codeql_query(config.ql_path, db, ql_output)
138134
shutil.rmtree(db)
139135

140-
packages = {}
141-
parts = set()
142-
kinds = set()
143-
144-
# Read the generated CSV file, and collect package statistics.
145-
with open(ql_output) as csvfile:
146-
reader = csv.reader(csvfile)
147-
for row in reader:
148-
# row: "android.util",1,"remote","source",16
149-
package = row[0]
150-
if package not in packages:
151-
packages[package] = {
152-
"count": row[1],
153-
# part: "summary", "sink", or "source"
154-
"part": {},
155-
# kind: "source:remote", "sink:create-file", ...
156-
"kind": {}
157-
}
158-
159-
part = row[3]
160-
parts.add(part)
161-
increment_dict_item(row[4], packages[package]["part"], part)
162-
163-
kind = part + ":" + row[2]
164-
kinds.add(kind)
165-
increment_dict_item(row[4], packages[package]["kind"], kind)
136+
packages = pack.PackageCollection(ql_output)
166137

167138
os.remove(ql_output)
168139

169-
parts = sorted(parts)
170-
kinds = sorted(kinds)
140+
parts = packages.get_parts()
141+
kinds = packages.get_kinds()
171142

172143
# Write the denormalized package statistics to a CSV file.
173144
with open(output_csv.format(language=lang), 'w', newline='') as csvfile:
@@ -179,44 +150,21 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
179150

180151
csvwriter.writerow(headers)
181152

182-
for package in sorted(packages):
183-
row = [package]
153+
for package in packages.get_packages():
154+
package: pack.Package = package
155+
row = [package.name]
184156
for part in parts:
185-
append_csv_dict_item(row, packages[package]["part"], part)
157+
append_csv_number(row, package.get_part_count(part))
186158
for kind in kinds:
187-
append_csv_dict_item(row, packages[package]["kind"], kind)
159+
append_csv_number(row, package.get_kind_count(kind))
188160
csvwriter.writerow(row)
189161

190162
# Read the additional framework data, such as URL, friendly name
191-
frameworks = {}
192-
193-
with open(input_framework_csv.format(language=lang)) as csvfile:
194-
reader = csv.reader(csvfile)
195-
next(reader)
196-
for row in reader:
197-
# row: Hibernate,https://hibernate.org/,org.hibernate
198-
framwork = row[0]
199-
if framwork not in frameworks:
200-
frameworks[framwork] = {
201-
"package": row[2],
202-
"url": row[1]
203-
}
163+
frameworks = fr.FrameworkCollection(
164+
input_framework_csv.format(language=lang))
204165

205166
# Read the additional CWE data
206-
cwes = {}
207-
208-
with open(input_cwe_sink_csv.format(language=lang)) as csvfile:
209-
reader = csv.reader(csvfile)
210-
next(reader)
211-
for row in reader:
212-
# row: CWE-89,sql,SQL injection
213-
cwe = row[0]
214-
if cwe not in cwes:
215-
cwes[cwe] = {
216-
"sink": row[1],
217-
"label": row[2]
218-
}
219-
167+
cwes = utils.read_cwes(input_cwe_sink_csv.format(language=lang))
220168
sorted_cwes = sorted(cwes)
221169

222170
with open(output_rst.format(language=lang), 'w', newline='') as rst_file:
@@ -246,34 +194,24 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
246194

247195
processed_packages = set()
248196

249-
all_package_patterns = set(
250-
(frameworks[fr]["package"] for fr in frameworks))
251-
252197
# Write a row for each framework.
253-
for framework in sorted(frameworks):
198+
for framework in frameworks.get_frameworks():
199+
framework: fr.Framework = framework
254200
row = []
255201

256202
# Add the framework name to the row
257-
if not frameworks[framework]["url"]:
258-
row.append(row_prefix + framework)
203+
if not framework.url:
204+
row.append(row_prefix + framework.name)
259205
else:
260206
row.append(
261-
row_prefix + "`" + framework + " <" + frameworks[framework]["url"] + ">`_")
207+
row_prefix + "`" + framework.name + " <" + framework.url + ">`_")
262208

263209
# Add the package name to the row
264-
row.append("``" + frameworks[framework]["package"] + "``")
265-
266-
current_package_pattern = frameworks[framework]["package"]
210+
row.append("``" + framework.package_pattern + "``")
267211

268212
# Collect statistics on the current framework
269-
# current_package_pattern is either full name, such as "org.hibernate", or a prefix, such as "java.*"
270-
# Package patterns might overlap, in case of 'org.apache.commons.io' and 'org.apache.*', the statistics for
271-
# the latter will not include the statistics for the former.
272-
def package_match(package_name, pattern): return (pattern.endswith(
273-
"*") and package_name.startswith(pattern[:-1])) or (not pattern.endswith("*") and pattern == package_name)
274-
275213
def collect_framework(): return collect_package_stats(
276-
packages, cwes, lambda p: package_match(p, current_package_pattern) and all(len(current_package_pattern) >= len(pattern) or not package_match(p, pattern) for pattern in all_package_patterns))
214+
packages, cwes, frameworks.get_package_filter(framework))
277215

278216
row, f_processed_packages = add_package_stats_to_row(
279217
row, sorted_cwes, collect_framework)
@@ -290,8 +228,8 @@ def collect_others(): return collect_package_stats(
290228
row, other_packages = add_package_stats_to_row(
291229
row, sorted_cwes, collect_others)
292230

293-
row[1] = ", ".join("``{0}``".format(p)
294-
for p in sorted(other_packages))
231+
row[1] = ", ".join("``{0}``".format(p.name)
232+
for p in sorted(other_packages, key=lambda x: x.name))
295233

296234
csvwriter.writerow(row)
297235

0 commit comments

Comments
 (0)