1
1
import subprocess
2
- import json
3
2
import csv
4
3
import sys
5
4
import os
5
+ import shutil
6
6
7
7
"""
8
8
This script runs the CSV coverage report QL query, and transforms it to a more readable format.
@@ -32,6 +32,7 @@ def run_codeql_query(query, database, output):
32
32
"--database" , database , "--output" , output + ".bqrs" ])
33
33
subprocess_run (["codeql" , "bqrs" , "decode" , output + ".bqrs" ,
34
34
"--format=csv" , "--no-titles" , "--output" , output ])
35
+ os .remove (output + ".bqrs" )
35
36
36
37
37
38
def append_csv_number (list , value ):
@@ -120,13 +121,27 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
120
121
print ("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting." , file = sys .stderr )
121
122
raise e
122
123
123
- query_prefix = ""
124
- data_prefix = ""
124
+ # The script can be run in two modes:
125
+ # (i) dev: run on the local developer machine, and collect the coverage data. The output is generated into the expected
126
+ # folders: {language}/documentation/library-coverage/
127
+ # (ii) ci: run in a CI action. The output is generated to the root folder, and then in a subsequent step packaged as a
128
+ # build artifact.
129
+ mode = "dev"
125
130
if len (sys .argv ) > 1 :
126
- query_prefix = sys .argv [1 ] + "/"
131
+ mode = sys .argv [1 ]
132
+
133
+ if mode != "dev" and mode != "ci" :
134
+ print ("Unknown execution mode: " + mode +
135
+ ". Expected either 'dev' or 'ci'." , file = sys .stderr )
136
+ exit (1 )
127
137
138
+ query_prefix = ""
139
+ data_prefix = ""
128
140
if len (sys .argv ) > 2 :
129
- data_prefix = sys .argv [2 ] + "/"
141
+ query_prefix = sys .argv [2 ] + "/"
142
+
143
+ if len (sys .argv ) > 3 :
144
+ data_prefix = sys .argv [3 ] + "/"
130
145
131
146
# Languages for which we want to generate coverage reports.
132
147
configs = [
@@ -135,100 +150,109 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
135
150
]
136
151
137
152
# The names of input and output files. The placeholder {language} is replaced with the language name.
138
- output_rst = "flow-model- coverage.rst "
153
+ documentation_folder = "{language}/documentation/library- coverage/ "
139
154
output_ql_csv = "output-{language}.csv"
140
- output_csv = "csv-flow-model-coverage-{language}.csv"
141
- input_framework_csv = data_prefix + "misc/scripts/frameworks-{language}.csv"
142
- input_cwe_sink_csv = data_prefix + "misc/scripts/cwe-sink-{language}.csv"
143
-
144
- with open (output_rst , 'w' , newline = '' ) as rst_file :
145
- for config in configs :
146
- lang = config .lang
147
- db = "empty-" + lang
148
- ql_output = output_ql_csv .format (language = lang )
149
- create_empty_database (lang , config .ext , db )
150
- run_codeql_query (config .ql_path , db , ql_output )
151
-
152
- packages = {}
153
- parts = set ()
154
- kinds = set ()
155
-
156
- # Read the generated CSV file, and collect package statistics.
157
- with open (ql_output ) as csvfile :
158
- reader = csv .reader (csvfile )
159
- for row in reader :
160
- # row: "android.util",1,"remote","source",16
161
- package = row [0 ]
162
- if package not in packages :
163
- packages [package ] = {
164
- "count" : row [1 ],
165
- # part: "summary", "sink", or "source"
166
- "part" : {},
167
- # kind: "source:remote", "sink:create-file", ...
168
- "kind" : {}
169
- }
170
-
171
- part = row [3 ]
172
- parts .add (part )
173
- increment_dict_item (row [4 ], packages [package ]["part" ], part )
174
-
175
- kind = part + ":" + row [2 ]
176
- kinds .add (kind )
177
- increment_dict_item (row [4 ], packages [package ]["kind" ], kind )
178
-
179
- parts = sorted (parts )
180
- kinds = sorted (kinds )
181
-
182
- # Write the denormalized package statistics to a CSV file.
183
- with open (output_csv .format (language = lang ), 'w' , newline = '' ) as csvfile :
184
- csvwriter = csv .writer (csvfile )
185
-
186
- headers = ["package" ]
187
- headers .extend (parts )
188
- headers .extend (kinds )
189
-
190
- csvwriter .writerow (headers )
191
-
192
- for package in sorted (packages ):
193
- row = [package ]
194
- for part in parts :
195
- append_csv_dict_item (row , packages [package ]["part" ], part )
196
- for kind in kinds :
197
- append_csv_dict_item (row , packages [package ]["kind" ], kind )
198
- csvwriter .writerow (row )
199
-
200
- # Read the additional framework data, such as URL, friendly name
201
- frameworks = {}
202
-
203
- with open (input_framework_csv .format (language = lang )) as csvfile :
204
- reader = csv .reader (csvfile )
205
- next (reader )
206
- for row in reader :
207
- # row: Hibernate,https://hibernate.org/,org.hibernate
208
- framwork = row [0 ]
209
- if framwork not in frameworks :
210
- frameworks [framwork ] = {
211
- "package" : row [2 ],
212
- "url" : row [1 ]
213
- }
214
-
215
- # Read the additional CWE data
216
- cwes = {}
217
-
218
- with open (input_cwe_sink_csv .format (language = lang )) as csvfile :
219
- reader = csv .reader (csvfile )
220
- next (reader )
221
- for row in reader :
222
- # row: CWE-89,sql,SQL injection
223
- cwe = row [0 ]
224
- if cwe not in cwes :
225
- cwes [cwe ] = {
226
- "sink" : row [1 ],
227
- "label" : row [2 ]
228
- }
229
-
230
- sorted_cwes = sorted (cwes )
155
+ input_framework_csv = data_prefix + documentation_folder + "frameworks.csv"
156
+ input_cwe_sink_csv = data_prefix + documentation_folder + "cwe-sink.csv"
157
+
158
+ if mode == "dev" :
159
+ output_rst = data_prefix + documentation_folder + "flow-model-coverage.rst"
160
+ output_csv = data_prefix + documentation_folder + "flow-model-coverage.csv"
161
+ else :
162
+ output_rst = "flow-model-coverage-{language}.rst"
163
+ output_csv = "flow-model-coverage-{language}.csv"
164
+
165
+ for config in configs :
166
+ lang = config .lang
167
+ db = "empty-" + lang
168
+ ql_output = output_ql_csv .format (language = lang )
169
+ create_empty_database (lang , config .ext , db )
170
+ run_codeql_query (config .ql_path , db , ql_output )
171
+ shutil .rmtree (db )
172
+
173
+ packages = {}
174
+ parts = set ()
175
+ kinds = set ()
176
+
177
+ # Read the generated CSV file, and collect package statistics.
178
+ with open (ql_output ) as csvfile :
179
+ reader = csv .reader (csvfile )
180
+ for row in reader :
181
+ # row: "android.util",1,"remote","source",16
182
+ package = row [0 ]
183
+ if package not in packages :
184
+ packages [package ] = {
185
+ "count" : row [1 ],
186
+ # part: "summary", "sink", or "source"
187
+ "part" : {},
188
+ # kind: "source:remote", "sink:create-file", ...
189
+ "kind" : {}
190
+ }
191
+
192
+ part = row [3 ]
193
+ parts .add (part )
194
+ increment_dict_item (row [4 ], packages [package ]["part" ], part )
195
+
196
+ kind = part + ":" + row [2 ]
197
+ kinds .add (kind )
198
+ increment_dict_item (row [4 ], packages [package ]["kind" ], kind )
199
+
200
+ os .remove (ql_output )
201
+
202
+ parts = sorted (parts )
203
+ kinds = sorted (kinds )
204
+
205
+ # Write the denormalized package statistics to a CSV file.
206
+ with open (output_csv .format (language = lang ), 'w' , newline = '' ) as csvfile :
207
+ csvwriter = csv .writer (csvfile )
208
+
209
+ headers = ["package" ]
210
+ headers .extend (parts )
211
+ headers .extend (kinds )
212
+
213
+ csvwriter .writerow (headers )
214
+
215
+ for package in sorted (packages ):
216
+ row = [package ]
217
+ for part in parts :
218
+ append_csv_dict_item (row , packages [package ]["part" ], part )
219
+ for kind in kinds :
220
+ append_csv_dict_item (row , packages [package ]["kind" ], kind )
221
+ csvwriter .writerow (row )
231
222
223
+ # Read the additional framework data, such as URL, friendly name
224
+ frameworks = {}
225
+
226
+ with open (input_framework_csv .format (language = lang )) as csvfile :
227
+ reader = csv .reader (csvfile )
228
+ next (reader )
229
+ for row in reader :
230
+ # row: Hibernate,https://hibernate.org/,org.hibernate
231
+ framwork = row [0 ]
232
+ if framwork not in frameworks :
233
+ frameworks [framwork ] = {
234
+ "package" : row [2 ],
235
+ "url" : row [1 ]
236
+ }
237
+
238
+ # Read the additional CWE data
239
+ cwes = {}
240
+
241
+ with open (input_cwe_sink_csv .format (language = lang )) as csvfile :
242
+ reader = csv .reader (csvfile )
243
+ next (reader )
244
+ for row in reader :
245
+ # row: CWE-89,sql,SQL injection
246
+ cwe = row [0 ]
247
+ if cwe not in cwes :
248
+ cwes [cwe ] = {
249
+ "sink" : row [1 ],
250
+ "label" : row [2 ]
251
+ }
252
+
253
+ sorted_cwes = sorted (cwes )
254
+
255
+ with open (output_rst .format (language = lang ), 'w' , newline = '' ) as rst_file :
232
256
rst_file .write (
233
257
config .capitalized_lang + " framework & library support\n " )
234
258
rst_file .write ("================================\n \n " )
@@ -314,4 +338,4 @@ def collect_total(): return collect_package_stats(packages, cwes, lambda p: True
314
338
315
339
csvwriter .writerow (row )
316
340
317
- rst_file .write ("\n \n " )
341
+ rst_file .write ("\n " )
0 commit comments