6
6
7
7
"""
8
8
This script runs the CSV coverage report QL query, and transforms it to a more readable format.
9
- """
9
+ There are two main outputs: (i) a CSV file containing the coverage data, and (ii) an RST page containing the coverage
10
+ data.
11
+ """
10
12
11
13
12
14
def subprocess_run (cmd ):
@@ -48,8 +50,20 @@ def append_csv_dict_item(list, dictionary, key):
48
50
list .append (None )
49
51
50
52
51
- def collect_package_stats (packages , filter ):
52
- """Collects coverage statistics for packages matching the given filter."""
53
+ def increment_dict_item (value , dictionary , key ):
54
+ """Increments the value of the dictionary[key] by value."""
55
+ if key not in dictionary :
56
+ dictionary [key ] = 0
57
+ dictionary [key ] += int (value )
58
+
59
+
60
+ def collect_package_stats (packages , cwes , filter ):
61
+ """
62
+ Collects coverage statistics for packages matching the given filter. `filter` is a `lambda` that for example (i) matches
63
+ packages to frameworks, or (2) matches packages that were previously not processed.
64
+
65
+ The returned statistics are used to generate a single row in a CSV file.
66
+ """
53
67
sources = 0
54
68
steps = 0
55
69
sinks = 0
@@ -75,7 +89,11 @@ def collect_package_stats(packages, filter):
75
89
76
90
77
91
def add_package_stats_to_row (row , sorted_cwes , collect ):
78
- """ Adds collected statistic to the row. """
92
+ """
93
+ Adds collected statistic to the row. `collect` is a `lambda` that returns the statistics for example for (i) individual
94
+ frameworks, (ii) leftout frameworks summarized in the 'Others' row, or (iii) all frameworks summarized in the 'Totals'
95
+ row.
96
+ """
79
97
sources , steps , sinks , framework_cwes , processed_packages = collect ()
80
98
81
99
append_csv_number (row , sources )
@@ -112,11 +130,19 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
112
130
"java" , "Java" , ".java" , prefix + "java/ql/src/meta/frameworks/Coverage.ql" )
113
131
]
114
132
115
- with open ("flow-model-coverage.rst" , 'w' ) as rst_file :
133
+ # The names of input and output files. The placeholder {language} is replaced with the language name.
134
+ output_rst = "flow-model-coverage.rst"
135
+ output_rst_csv = "rst-csv-flow-model-coverage-{language}.csv"
136
+ output_ql_csv = "output-{language}.csv"
137
+ output_csv = "csv-flow-model-coverage-{language}.csv"
138
+ input_framework_csv = prefix + "misc/scripts/frameworks-{language}.csv"
139
+ input_cwe_sink_csv = prefix + "misc/scripts/cwe-sink-{language}.csv"
140
+
141
+ with open (output_rst , 'w' ) as rst_file :
116
142
for config in configs :
117
143
lang = config .lang
118
144
db = "empty-" + lang
119
- ql_output = "output-" + lang + ".csv"
145
+ ql_output = output_ql_csv . format ( language = lang )
120
146
create_empty_database (lang , config .ext , db )
121
147
run_codeql_query (config .ql_path , db , ql_output )
122
148
@@ -128,36 +154,37 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
128
154
with open (ql_output ) as csvfile :
129
155
reader = csv .reader (csvfile )
130
156
for row in reader :
157
+ # row: "android.util",1,"remote","source",16
131
158
package = row [0 ]
132
159
if package not in packages :
133
160
packages [package ] = {
134
161
"count" : row [1 ],
162
+ # part: "summary", "sink", or "source"
135
163
"part" : {},
164
+ # kind: "source:remote", "sink:create-file", ...
136
165
"kind" : {}
137
166
}
167
+
138
168
part = row [3 ]
139
169
parts .add (part )
140
- if part not in packages [package ]["part" ]:
141
- packages [package ]["part" ][part ] = 0
142
- packages [package ]["part" ][part ] += int (row [4 ])
170
+ increment_dict_item (row [4 ], packages [package ]["part" ], part )
171
+
143
172
kind = part + ":" + row [2 ]
144
173
kinds .add (kind )
145
- if kind not in packages [package ]["kind" ]:
146
- packages [package ]["kind" ][kind ] = 0
147
- packages [package ]["kind" ][kind ] += int (row [4 ])
174
+ increment_dict_item (row [4 ], packages [package ]["kind" ], kind )
175
+
176
+ parts = sorted (parts )
177
+ kinds = sorted (kinds )
148
178
149
179
# Write the denormalized package statistics to a CSV file.
150
- with open ("csv-flow-model-coverage-" + lang + ".csv" , 'w' , newline = '' ) as csvfile :
180
+ with open (output_csv . format ( language = lang ) , 'w' , newline = '' ) as csvfile :
151
181
csvwriter = csv .writer (csvfile )
152
182
153
- parts = sorted (parts )
154
- kinds = sorted (kinds )
155
-
156
- columns = ["package" ]
157
- columns .extend (parts )
158
- columns .extend (kinds )
183
+ headers = ["package" ]
184
+ headers .extend (parts )
185
+ headers .extend (kinds )
159
186
160
- csvwriter .writerow (columns )
187
+ csvwriter .writerow (headers )
161
188
162
189
for package in sorted (packages ):
163
190
row = [package ]
@@ -170,10 +197,11 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
170
197
# Read the additional framework data, such as URL, friendly name
171
198
frameworks = {}
172
199
173
- with open (prefix + "misc/scripts/frameworks-" + lang + ".csv" ) as csvfile :
200
+ with open (input_framework_csv . format ( language = lang ) ) as csvfile :
174
201
reader = csv .reader (csvfile )
175
202
next (reader )
176
203
for row in reader :
204
+ # row: Hibernate,https://hibernate.org/,org.hibernate
177
205
framwork = row [0 ]
178
206
if framwork not in frameworks :
179
207
frameworks [framwork ] = {
@@ -184,18 +212,21 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
184
212
# Read the additional CWE data
185
213
cwes = {}
186
214
187
- with open (prefix + "misc/scripts/cwe-sink-" + lang + ".csv" ) as csvfile :
215
+ with open (input_cwe_sink_csv . format ( language = lang ) ) as csvfile :
188
216
reader = csv .reader (csvfile )
189
217
next (reader )
190
218
for row in reader :
219
+ # row: CWE-89,sql,SQL injection
191
220
cwe = row [0 ]
192
221
if cwe not in cwes :
193
222
cwes [cwe ] = {
194
223
"sink" : row [1 ],
195
224
"label" : row [2 ]
196
225
}
197
226
198
- file_name = "rst-csv-flow-model-coverage-" + lang + ".csv"
227
+ sorted_cwes = sorted (cwes )
228
+
229
+ file_name = output_rst_csv .format (language = lang )
199
230
200
231
rst_file .write (
201
232
config .capitalized_lang + " framework & library support\n " )
@@ -210,17 +241,23 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
210
241
with open (file_name , 'w' , newline = '' ) as csvfile :
211
242
csvwriter = csv .writer (csvfile )
212
243
213
- columns = ["Framework / library" , "package" ,
214
- "remote flow sources" , "taint & value steps" , "sinks (total)" ]
215
- for cwe in sorted (cwes ):
216
- columns .append ("`" + cwe + "` :sub:`" +
217
- cwes [cwe ]["label" ] + "`" )
218
- csvwriter .writerow (columns )
244
+ # Write CSV header.
245
+ headers = ["Framework / library" ,
246
+ "Package" ,
247
+ "Remote flow sources" ,
248
+ "Taint & value steps" ,
249
+ "Sinks (total)" ]
250
+ for cwe in sorted_cwes :
251
+ headers .append (
252
+ "`{0}` :sub:`{1}`" .format (cwe , cwes [cwe ]["label" ]))
253
+ csvwriter .writerow (headers )
219
254
220
255
processed_packages = set ()
221
256
257
+ # Write a row for each framework.
222
258
for framework in sorted (frameworks ):
223
259
row = []
260
+
224
261
# Add the framework name to the row
225
262
if not frameworks [framework ]["url" ]:
226
263
row .append (framework )
@@ -234,12 +271,12 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
234
271
prefix = frameworks [framework ]["package" ]
235
272
236
273
# Collect statistics on the current framework
274
+ # package name is either full name, such as "org.hibernate", or a prefix, such as "java.*"
237
275
def collect_framework (): return collect_package_stats (
238
- packages ,
239
- lambda p : (prefix .endswith ("*" ) and p .startswith (prefix [:- 1 ])) or (not prefix .endswith ("*" ) and prefix == p ))
276
+ packages , cwes , lambda p : (prefix .endswith ("*" ) and p .startswith (prefix [:- 1 ])) or (not prefix .endswith ("*" ) and prefix == p ))
240
277
241
278
row , f_processed_packages = add_package_stats_to_row (
242
- row , sorted ( cwes ) , collect_framework )
279
+ row , sorted_cwes , collect_framework )
243
280
244
281
csvwriter .writerow (row )
245
282
processed_packages .update (f_processed_packages )
@@ -248,11 +285,10 @@ def collect_framework(): return collect_package_stats(
248
285
row = ["Others" , None ]
249
286
250
287
def collect_others (): return collect_package_stats (
251
- packages ,
252
- lambda p : p not in processed_packages )
288
+ packages , cwes , lambda p : p not in processed_packages )
253
289
254
290
row , other_packages = add_package_stats_to_row (
255
- row , sorted ( cwes ) , collect_others )
291
+ row , sorted_cwes , collect_others )
256
292
257
293
row [1 ] = ", " .join ("``{0}``" .format (p )
258
294
for p in sorted (other_packages ))
@@ -262,11 +298,9 @@ def collect_others(): return collect_package_stats(
262
298
# Collect statistics on all packages
263
299
row = ["Totals" , None ]
264
300
265
- def collect_total (): return collect_package_stats (
266
- packages ,
267
- lambda p : True )
301
+ def collect_total (): return collect_package_stats (packages , cwes , lambda p : True )
268
302
269
303
row , _ = add_package_stats_to_row (
270
- row , sorted ( cwes ) , collect_total )
304
+ row , sorted_cwes , collect_total )
271
305
272
306
csvwriter .writerow (row )
0 commit comments