4
4
import shutil
5
5
import settings
6
6
import utils
7
+ import packages as pack
8
+ import frameworks as fr
7
9
8
10
"""
9
11
This script runs the CSV coverage report QL query, and transforms it to a more readable format.
@@ -28,14 +30,7 @@ def append_csv_dict_item(list, dictionary, key):
28
30
list .append (None )
29
31
30
32
31
- def increment_dict_item (value , dictionary , key ):
32
- """Increments the value of the dictionary[key] by value."""
33
- if key not in dictionary :
34
- dictionary [key ] = 0
35
- dictionary [key ] += int (value )
36
-
37
-
38
- def collect_package_stats (packages , cwes , filter ):
33
+ def collect_package_stats (packages : pack .PackageCollection , cwes , filter ):
39
34
"""
40
35
Collects coverage statistics for packages matching the given filter. `filter` is a `lambda` that for example (i) matches
41
36
packages to frameworks, or (2) matches packages that were previously not processed.
@@ -48,20 +43,21 @@ def collect_package_stats(packages, cwes, filter):
48
43
framework_cwes = {}
49
44
processed_packages = set ()
50
45
51
- for package in packages :
46
+ for package in packages .get_packages ():
47
+ package : pack .Package = package
52
48
if filter (package ):
53
49
processed_packages .add (package )
54
- sources += int ( packages [ package ][ "kind" ]. get ("source:remote" , 0 ) )
55
- steps += int ( packages [ package ][ "part" ]. get ("summary" , 0 ) )
56
- sinks += int ( packages [ package ][ "part" ]. get ("sink" , 0 ) )
50
+ sources += package . get_kind_count ("source:remote" )
51
+ steps += package . get_part_count ("summary" )
52
+ sinks += package . get_part_count ("sink" )
57
53
58
54
for cwe in cwes :
59
55
sink = "sink:" + cwes [cwe ]["sink" ]
60
- if sink in packages [package ]["kind" ]:
56
+ count = package .get_kind_count (sink )
57
+ if count > 0 :
61
58
if cwe not in framework_cwes :
62
59
framework_cwes [cwe ] = 0
63
- framework_cwes [cwe ] += int (
64
- packages [package ]["kind" ][sink ])
60
+ framework_cwes [cwe ] += count
65
61
66
62
return sources , steps , sinks , framework_cwes , processed_packages
67
63
@@ -137,37 +133,12 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
137
133
utils .run_codeql_query (config .ql_path , db , ql_output )
138
134
shutil .rmtree (db )
139
135
140
- packages = {}
141
- parts = set ()
142
- kinds = set ()
143
-
144
- # Read the generated CSV file, and collect package statistics.
145
- with open (ql_output ) as csvfile :
146
- reader = csv .reader (csvfile )
147
- for row in reader :
148
- # row: "android.util",1,"remote","source",16
149
- package = row [0 ]
150
- if package not in packages :
151
- packages [package ] = {
152
- "count" : row [1 ],
153
- # part: "summary", "sink", or "source"
154
- "part" : {},
155
- # kind: "source:remote", "sink:create-file", ...
156
- "kind" : {}
157
- }
158
-
159
- part = row [3 ]
160
- parts .add (part )
161
- increment_dict_item (row [4 ], packages [package ]["part" ], part )
162
-
163
- kind = part + ":" + row [2 ]
164
- kinds .add (kind )
165
- increment_dict_item (row [4 ], packages [package ]["kind" ], kind )
136
+ packages = pack .PackageCollection (ql_output )
166
137
167
138
os .remove (ql_output )
168
139
169
- parts = sorted ( parts )
170
- kinds = sorted ( kinds )
140
+ parts = packages . get_parts ( )
141
+ kinds = packages . get_kinds ( )
171
142
172
143
# Write the denormalized package statistics to a CSV file.
173
144
with open (output_csv .format (language = lang ), 'w' , newline = '' ) as csvfile :
@@ -179,44 +150,21 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
179
150
180
151
csvwriter .writerow (headers )
181
152
182
- for package in sorted (packages ):
183
- row = [package ]
153
+ for package in packages .get_packages ():
154
+ package : pack .Package = package
155
+ row = [package .name ]
184
156
for part in parts :
185
- append_csv_dict_item (row , packages [ package ][ " part" ], part )
157
+ append_csv_number (row , package . get_part_count ( part ) )
186
158
for kind in kinds :
187
- append_csv_dict_item (row , packages [ package ][ " kind" ], kind )
159
+ append_csv_number (row , package . get_kind_count ( kind ) )
188
160
csvwriter .writerow (row )
189
161
190
162
# Read the additional framework data, such as URL, friendly name
191
- frameworks = {}
192
-
193
- with open (input_framework_csv .format (language = lang )) as csvfile :
194
- reader = csv .reader (csvfile )
195
- next (reader )
196
- for row in reader :
197
- # row: Hibernate,https://hibernate.org/,org.hibernate
198
- framwork = row [0 ]
199
- if framwork not in frameworks :
200
- frameworks [framwork ] = {
201
- "package" : row [2 ],
202
- "url" : row [1 ]
203
- }
163
+ frameworks = fr .FrameworkCollection (
164
+ input_framework_csv .format (language = lang ))
204
165
205
166
# Read the additional CWE data
206
- cwes = {}
207
-
208
- with open (input_cwe_sink_csv .format (language = lang )) as csvfile :
209
- reader = csv .reader (csvfile )
210
- next (reader )
211
- for row in reader :
212
- # row: CWE-89,sql,SQL injection
213
- cwe = row [0 ]
214
- if cwe not in cwes :
215
- cwes [cwe ] = {
216
- "sink" : row [1 ],
217
- "label" : row [2 ]
218
- }
219
-
167
+ cwes = utils .read_cwes (input_cwe_sink_csv .format (language = lang ))
220
168
sorted_cwes = sorted (cwes )
221
169
222
170
with open (output_rst .format (language = lang ), 'w' , newline = '' ) as rst_file :
@@ -246,34 +194,24 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
246
194
247
195
processed_packages = set ()
248
196
249
- all_package_patterns = set (
250
- (frameworks [fr ]["package" ] for fr in frameworks ))
251
-
252
197
# Write a row for each framework.
253
- for framework in sorted (frameworks ):
198
+ for framework in frameworks .get_frameworks ():
199
+ framework : fr .Framework = framework
254
200
row = []
255
201
256
202
# Add the framework name to the row
257
- if not frameworks [ framework ][ " url" ] :
258
- row .append (row_prefix + framework )
203
+ if not framework . url :
204
+ row .append (row_prefix + framework . name )
259
205
else :
260
206
row .append (
261
- row_prefix + "`" + framework + " <" + frameworks [ framework ][ " url" ] + ">`_" )
207
+ row_prefix + "`" + framework . name + " <" + framework . url + ">`_" )
262
208
263
209
# Add the package name to the row
264
- row .append ("``" + frameworks [framework ]["package" ] + "``" )
265
-
266
- current_package_pattern = frameworks [framework ]["package" ]
210
+ row .append ("``" + framework .package_pattern + "``" )
267
211
268
212
# Collect statistics on the current framework
269
- # current_package_pattern is either full name, such as "org.hibernate", or a prefix, such as "java.*"
270
- # Package patterns might overlap, in case of 'org.apache.commons.io' and 'org.apache.*', the statistics for
271
- # the latter will not include the statistics for the former.
272
- def package_match (package_name , pattern ): return (pattern .endswith (
273
- "*" ) and package_name .startswith (pattern [:- 1 ])) or (not pattern .endswith ("*" ) and pattern == package_name )
274
-
275
213
def collect_framework (): return collect_package_stats (
276
- packages , cwes , lambda p : package_match ( p , current_package_pattern ) and all ( len ( current_package_pattern ) >= len ( pattern ) or not package_match ( p , pattern ) for pattern in all_package_patterns ))
214
+ packages , cwes , frameworks . get_package_filter ( framework ))
277
215
278
216
row , f_processed_packages = add_package_stats_to_row (
279
217
row , sorted_cwes , collect_framework )
@@ -290,8 +228,8 @@ def collect_others(): return collect_package_stats(
290
228
row , other_packages = add_package_stats_to_row (
291
229
row , sorted_cwes , collect_others )
292
230
293
- row [1 ] = ", " .join ("``{0}``" .format (p )
294
- for p in sorted (other_packages ))
231
+ row [1 ] = ", " .join ("``{0}``" .format (p . name )
232
+ for p in sorted (other_packages , key = lambda x : x . name ))
295
233
296
234
csvwriter .writerow (row )
297
235
0 commit comments