@@ -32,6 +32,62 @@ def run_codeql_query(query, database, output):
32
32
"--format=csv" , "--no-titles" , "--output" , output ])
33
33
34
34
35
+ def append_csv_number (list , value ):
36
+ """Adds a number to the list or None if the value is not greater than 0."""
37
+ if value > 0 :
38
+ list .append (value )
39
+ else :
40
+ list .append (None )
41
+
42
+
43
+ def append_csv_dict_item (list , dictionary , key ):
44
+ """Adds a dictionary item to the list if the key is in the dictionary."""
45
+ if key in dictionary :
46
+ list .append (dictionary [key ])
47
+ else :
48
+ list .append (None )
49
+
50
+
51
+ def collect_package_stats (packages , filter ):
52
+ """Collects coverage statistics for packages matching the given filter."""
53
+ sources = 0
54
+ steps = 0
55
+ sinks = 0
56
+ framework_cwes = {}
57
+ processed_packages = set ()
58
+
59
+ for package in packages :
60
+ if filter (package ):
61
+ processed_packages .add (package )
62
+ sources += int (packages [package ]["kind" ].get ("source:remote" , 0 ))
63
+ steps += int (packages [package ]["part" ].get ("summary" , 0 ))
64
+ sinks += int (packages [package ]["part" ].get ("sink" , 0 ))
65
+
66
+ for cwe in cwes :
67
+ sink = "sink:" + cwes [cwe ]["sink" ]
68
+ if sink in packages [package ]["kind" ]:
69
+ if cwe not in framework_cwes :
70
+ framework_cwes [cwe ] = 0
71
+ framework_cwes [cwe ] += int (
72
+ packages [package ]["kind" ][sink ])
73
+
74
+ return sources , steps , sinks , framework_cwes , processed_packages
75
+
76
+
77
+ def add_package_stats_to_row (row , sorted_cwes , collect ):
78
+ """ Adds collected statistic to the row. """
79
+ sources , steps , sinks , framework_cwes , processed_packages = collect ()
80
+
81
+ append_csv_number (row , sources )
82
+ append_csv_number (row , steps )
83
+ append_csv_number (row , sinks )
84
+
85
+ for cwe in sorted_cwes :
86
+ append_csv_dict_item (row , framework_cwes , cwe )
87
+
88
+ return row , processed_packages
89
+
90
+
35
91
class LanguageConfig :
36
92
def __init__ (self , lang , ext , ql_path ):
37
93
self .lang = lang
@@ -61,13 +117,14 @@ def __init__(self, lang, ext, ql_path):
61
117
query_path = config .ql_path
62
118
db = "empty-" + lang
63
119
ql_output = "output-" + lang + ".csv"
64
- create_empty_database (lang , ext , db )
120
+ # create_empty_database(lang, ext, db)
65
121
run_codeql_query (query_path , db , ql_output )
66
122
67
123
packages = {}
68
124
parts = set ()
69
125
kinds = set ()
70
126
127
+ # Read the generated CSV file, and collect package statistics.
71
128
with open (ql_output ) as csvfile :
72
129
reader = csv .reader (csvfile )
73
130
for row in reader :
@@ -89,6 +146,7 @@ def __init__(self, lang, ext, ql_path):
89
146
packages [package ]["kind" ][kind ] = 0
90
147
packages [package ]["kind" ][kind ] += int (row [4 ])
91
148
149
+ # Write the denormalized package statistics to a CSV file.
92
150
with open ("csv-flow-model-coverage-" + lang + ".csv" , 'w' , newline = '' ) as csvfile :
93
151
csvwriter = csv .writer (csvfile )
94
152
@@ -104,13 +162,97 @@ def __init__(self, lang, ext, ql_path):
104
162
for package in sorted (packages ):
105
163
row = [package ]
106
164
for part in parts :
107
- if part in packages [package ]["part" ]:
108
- row .append (packages [package ]["part" ][part ])
109
- else :
110
- row .append (None )
165
+ append_csv_dict_item (row , packages [package ]["part" ], part )
111
166
for kind in kinds :
112
- if kind in packages [package ]["kind" ]:
113
- row .append (packages [package ]["kind" ][kind ])
114
- else :
115
- row .append (None )
167
+ append_csv_dict_item (row , packages [package ]["kind" ], kind )
116
168
csvwriter .writerow (row )
169
+
170
+ # Read the additional framework data, such as URL, friendly name
171
+ frameworks = {}
172
+
173
+ with open (prefix + "misc/scripts/frameworks-" + lang + ".csv" ) as csvfile :
174
+ reader = csv .reader (csvfile )
175
+ next (reader )
176
+ for row in reader :
177
+ framwork = row [0 ]
178
+ if framwork not in frameworks :
179
+ frameworks [framwork ] = {
180
+ "package" : row [2 ],
181
+ "url" : row [1 ]
182
+ }
183
+
184
+ # Read the additional CWE data
185
+ cwes = {}
186
+
187
+ with open (prefix + "misc/scripts/cwe-sink-" + lang + ".csv" ) as csvfile :
188
+ reader = csv .reader (csvfile )
189
+ next (reader )
190
+ for row in reader :
191
+ cwe = row [0 ]
192
+ if cwe not in cwes :
193
+ cwes [cwe ] = {
194
+ "sink" : row [1 ],
195
+ "label" : row [2 ]
196
+ }
197
+
198
+ with open ("rst-csv-flow-model-coverage-" + lang + ".csv" , 'w' , newline = '' ) as csvfile :
199
+ csvwriter = csv .writer (csvfile )
200
+
201
+ columns = ["Framework / library" , "package" ,
202
+ "remote flow sources" , "taint & value steps" , "sinks (total)" ]
203
+ for cwe in sorted (cwes ):
204
+ columns .append ("`" + cwe + "` :sub:`" + cwes [cwe ]["label" ] + "`" )
205
+ csvwriter .writerow (columns )
206
+
207
+ processed_packages = set ()
208
+
209
+ for framework in sorted (frameworks ):
210
+ row = []
211
+ # Add the framework name to the row
212
+ if not frameworks [framework ]["url" ]:
213
+ row .append (framework )
214
+ else :
215
+ row .append (
216
+ "`" + framework + " <" + frameworks [framework ]["url" ] + ">`_" )
217
+
218
+ # Add the package name to the row
219
+ row .append (frameworks [framework ]["package" ])
220
+
221
+ prefix = frameworks [framework ]["package" ]
222
+
223
+ # Collect statistics on the current framework
224
+ def collect_framework (): return collect_package_stats (
225
+ packages ,
226
+ lambda p : (prefix .endswith ("*" ) and p .startswith (prefix [:- 1 ])) or (not prefix .endswith ("*" ) and prefix == p ))
227
+
228
+ row , f_processed_packages = add_package_stats_to_row (
229
+ row , sorted (cwes ), collect_framework )
230
+
231
+ csvwriter .writerow (row )
232
+ processed_packages .update (f_processed_packages )
233
+
234
+ # Collect statistics on all packages that are not part of a framework
235
+ row = ["Others" , None ]
236
+
237
+ def collect_others (): return collect_package_stats (
238
+ packages ,
239
+ lambda p : p not in processed_packages )
240
+
241
+ row , _ = add_package_stats_to_row (
242
+ row , sorted (cwes ), collect_others )
243
+
244
+ csvwriter .writerow (row )
245
+
246
+ # Collect statistics on all packages
247
+ row = ["Total" , None ]
248
+
249
+ def collect_total (): return collect_package_stats (
250
+ packages ,
251
+ lambda p : True )
252
+
253
+ row , _ = add_package_stats_to_row (
254
+ row , sorted (cwes ), collect_total )
255
+
256
+ csvwriter .writerow (row )
257
+
258
+ # todo: generate rst page referencing the csv files
0 commit comments