Skip to content

Commit beea361

Browse files
committed
Add CSV file with framework and CWE info to be used in RST file
1 parent 979034a commit beea361

File tree

3 files changed

+159
-9
lines changed

3 files changed

+159
-9
lines changed

misc/scripts/cwe-sink-java.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
CWE,Sink identifier,Label
2+
CWE-89,sql,SQL injection
3+
CWE-22,create-file,Path injection

misc/scripts/frameworks-java.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Framework name,URL,Package prefix
2+
Hibernate,https://hibernate.org/,org.hibernate
3+
Java Standard Library,,java.*
4+
Google,,com.google.common.*
5+
Apache,,org.apache.*

misc/scripts/generate-csv-coverage-report.py

Lines changed: 151 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,62 @@ def run_codeql_query(query, database, output):
3232
"--format=csv", "--no-titles", "--output", output])
3333

3434

35+
def append_csv_number(list, value):
36+
"""Adds a number to the list or None if the value is not greater than 0."""
37+
if value > 0:
38+
list.append(value)
39+
else:
40+
list.append(None)
41+
42+
43+
def append_csv_dict_item(list, dictionary, key):
44+
"""Adds a dictionary item to the list if the key is in the dictionary."""
45+
if key in dictionary:
46+
list.append(dictionary[key])
47+
else:
48+
list.append(None)
49+
50+
51+
def collect_package_stats(packages, filter):
52+
"""Collects coverage statistics for packages matching the given filter."""
53+
sources = 0
54+
steps = 0
55+
sinks = 0
56+
framework_cwes = {}
57+
processed_packages = set()
58+
59+
for package in packages:
60+
if filter(package):
61+
processed_packages.add(package)
62+
sources += int(packages[package]["kind"].get("source:remote", 0))
63+
steps += int(packages[package]["part"].get("summary", 0))
64+
sinks += int(packages[package]["part"].get("sink", 0))
65+
66+
for cwe in cwes:
67+
sink = "sink:" + cwes[cwe]["sink"]
68+
if sink in packages[package]["kind"]:
69+
if cwe not in framework_cwes:
70+
framework_cwes[cwe] = 0
71+
framework_cwes[cwe] += int(
72+
packages[package]["kind"][sink])
73+
74+
return sources, steps, sinks, framework_cwes, processed_packages
75+
76+
77+
def add_package_stats_to_row(row, sorted_cwes, collect):
78+
""" Adds collected statistic to the row. """
79+
sources, steps, sinks, framework_cwes, processed_packages = collect()
80+
81+
append_csv_number(row, sources)
82+
append_csv_number(row, steps)
83+
append_csv_number(row, sinks)
84+
85+
for cwe in sorted_cwes:
86+
append_csv_dict_item(row, framework_cwes, cwe)
87+
88+
return row, processed_packages
89+
90+
3591
class LanguageConfig:
3692
def __init__(self, lang, ext, ql_path):
3793
self.lang = lang
@@ -61,13 +117,14 @@ def __init__(self, lang, ext, ql_path):
61117
query_path = config.ql_path
62118
db = "empty-" + lang
63119
ql_output = "output-" + lang + ".csv"
64-
create_empty_database(lang, ext, db)
120+
# create_empty_database(lang, ext, db)
65121
run_codeql_query(query_path, db, ql_output)
66122

67123
packages = {}
68124
parts = set()
69125
kinds = set()
70126

127+
# Read the generated CSV file, and collect package statistics.
71128
with open(ql_output) as csvfile:
72129
reader = csv.reader(csvfile)
73130
for row in reader:
@@ -89,6 +146,7 @@ def __init__(self, lang, ext, ql_path):
89146
packages[package]["kind"][kind] = 0
90147
packages[package]["kind"][kind] += int(row[4])
91148

149+
# Write the denormalized package statistics to a CSV file.
92150
with open("csv-flow-model-coverage-" + lang + ".csv", 'w', newline='') as csvfile:
93151
csvwriter = csv.writer(csvfile)
94152

@@ -104,13 +162,97 @@ def __init__(self, lang, ext, ql_path):
104162
for package in sorted(packages):
105163
row = [package]
106164
for part in parts:
107-
if part in packages[package]["part"]:
108-
row.append(packages[package]["part"][part])
109-
else:
110-
row.append(None)
165+
append_csv_dict_item(row, packages[package]["part"], part)
111166
for kind in kinds:
112-
if kind in packages[package]["kind"]:
113-
row.append(packages[package]["kind"][kind])
114-
else:
115-
row.append(None)
167+
append_csv_dict_item(row, packages[package]["kind"], kind)
116168
csvwriter.writerow(row)
169+
170+
# Read the additional framework data, such as URL, friendly name
171+
frameworks = {}
172+
173+
with open(prefix + "misc/scripts/frameworks-" + lang + ".csv") as csvfile:
174+
reader = csv.reader(csvfile)
175+
next(reader)
176+
for row in reader:
177+
framwork = row[0]
178+
if framwork not in frameworks:
179+
frameworks[framwork] = {
180+
"package": row[2],
181+
"url": row[1]
182+
}
183+
184+
# Read the additional CWE data
185+
cwes = {}
186+
187+
with open(prefix + "misc/scripts/cwe-sink-" + lang + ".csv") as csvfile:
188+
reader = csv.reader(csvfile)
189+
next(reader)
190+
for row in reader:
191+
cwe = row[0]
192+
if cwe not in cwes:
193+
cwes[cwe] = {
194+
"sink": row[1],
195+
"label": row[2]
196+
}
197+
198+
with open("rst-csv-flow-model-coverage-" + lang + ".csv", 'w', newline='') as csvfile:
199+
csvwriter = csv.writer(csvfile)
200+
201+
columns = ["Framework / library", "package",
202+
"remote flow sources", "taint & value steps", "sinks (total)"]
203+
for cwe in sorted(cwes):
204+
columns.append("`" + cwe + "` :sub:`" + cwes[cwe]["label"] + "`")
205+
csvwriter.writerow(columns)
206+
207+
processed_packages = set()
208+
209+
for framework in sorted(frameworks):
210+
row = []
211+
# Add the framework name to the row
212+
if not frameworks[framework]["url"]:
213+
row.append(framework)
214+
else:
215+
row.append(
216+
"`" + framework + " <" + frameworks[framework]["url"] + ">`_")
217+
218+
# Add the package name to the row
219+
row.append(frameworks[framework]["package"])
220+
221+
prefix = frameworks[framework]["package"]
222+
223+
# Collect statistics on the current framework
224+
def collect_framework(): return collect_package_stats(
225+
packages,
226+
lambda p: (prefix.endswith("*") and p.startswith(prefix[:-1])) or (not prefix.endswith("*") and prefix == p))
227+
228+
row, f_processed_packages = add_package_stats_to_row(
229+
row, sorted(cwes), collect_framework)
230+
231+
csvwriter.writerow(row)
232+
processed_packages.update(f_processed_packages)
233+
234+
# Collect statistics on all packages that are not part of a framework
235+
row = ["Others", None]
236+
237+
def collect_others(): return collect_package_stats(
238+
packages,
239+
lambda p: p not in processed_packages)
240+
241+
row, _ = add_package_stats_to_row(
242+
row, sorted(cwes), collect_others)
243+
244+
csvwriter.writerow(row)
245+
246+
# Collect statistics on all packages
247+
row = ["Total", None]
248+
249+
def collect_total(): return collect_package_stats(
250+
packages,
251+
lambda p: True)
252+
253+
row, _ = add_package_stats_to_row(
254+
row, sorted(cwes), collect_total)
255+
256+
csvwriter.writerow(row)
257+
258+
# todo: generate rst page referencing the csv files

0 commit comments

Comments
 (0)