1
1
#!/usr/bin/python3
2
2
3
+ import helpers
3
4
import json
4
5
import os
5
6
import os .path
8
9
import sys
9
10
import tempfile
10
11
12
+ def quote_if_needed (row ):
13
+ if row != "true" and row != "false" :
14
+ return "\" " + row + "\" "
15
+ # subtypes column
16
+ return row
17
+
18
+ def parseData (data ):
19
+ rows = ""
20
+ for (row ) in data :
21
+ d = row [0 ].split (';' )
22
+ d = map (quote_if_needed , d )
23
+ rows += " - [" + ', ' .join (d ) + ']\n '
24
+
25
+ return rows
26
+
11
27
class Generator :
12
28
def __init__ (self , language ):
13
29
self .language = language
@@ -17,55 +33,52 @@ def __init__ (self, language):
17
33
self .generateNegativeSummaries = False
18
34
self .generateTypeBasedSummaries = False
19
35
self .dryRun = False
36
+ self .dirname = "model-generator"
20
37
21
38
22
39
def printHelp (self ):
23
40
print (f"""Usage:
24
- python3 GenerateFlowModel .py <library-database> <outputQll > [<friendlyFrameworkName>] [--with-sinks] [--with-sources] [--with-summaries] [--with-typebased-summaries] [--dry-run]
41
+ python3 GenerateFlowModelExtensions .py <library-database> <outputYml > [<friendlyFrameworkName>] [--with-sinks] [--with-sources] [--with-summaries] [--with-typebased-summaries] [--dry-run]
25
42
26
43
This generates summary, source and sink models for the code in the database.
27
- The files will be placed in `{ self .language } /ql/lib/semmle/code/ { self . language } /frameworks/<outputQll> ` where
28
- outputQll is the name (and path) of the output QLL file. Usually, models are grouped by their
44
+ The files will be placed in `{ self .language } /ql/lib/ext/generated/<outputYml>.model.yml ` where
45
+ outputYml is the name (and path) of the output YAML file. Usually, models are grouped by their
29
46
respective frameworks.
30
- If negative summaries are produced a file prefixed with `Negative` will be generated and stored in the same folder.
31
47
32
48
Which models are generated is controlled by the flags:
33
49
--with-sinks
34
50
--with-sources
35
51
--with-summaries
36
52
--with-negative-summaries
37
- --with-typebased-summaries
38
- If none of these flags are specified, all models are generated.
53
+ --with-typebased-summaries (Experimental - only for C#)
54
+ If none of these flags are specified, all models are generated except for the type based models .
39
55
40
56
--dry-run: Only run the queries, but don't write to file.
41
57
42
58
Example invocations:
43
- $ python3 GenerateFlowModel .py /tmp/dbs/my_library_db " mylibrary/Framework.qll"
44
- $ python3 GenerateFlowModel .py /tmp/dbs/my_library_db " mylibrary/Framework.qll" "Friendly Name of Framework"
45
- $ python3 GenerateFlowModel .py /tmp/dbs/my_library_db "mylibrary/FrameworkSinks.qll" --with-sinks
59
+ $ python3 GenerateFlowModelExtensions .py /tmp/dbs/my_library_db mylibrary
60
+ $ python3 GenerateFlowModelExtensions .py /tmp/dbs/my_library_db mylibrary "Friendly Name of Framework"
61
+ $ python3 GenerateFlowModelExtensions .py /tmp/dbs/my_library_db --with-sinks
46
62
47
63
Requirements: `codeql` should both appear on your path.
48
64
""" )
49
65
50
66
51
67
def setenvironment (self , target , database , friendlyName ):
52
68
self .codeQlRoot = subprocess .check_output (["git" , "rev-parse" , "--show-toplevel" ]).decode ("utf-8" ).strip ()
53
- if not target .endswith (".qll " ):
54
- target += ".qll "
69
+ if not target .endswith (".model.yml " ):
70
+ target += ".model.yml "
55
71
filename = os .path .basename (target )
56
- dirname = os .path .dirname (target )
57
72
if friendlyName is not None :
58
73
self .friendlyname = friendlyName
59
74
else :
60
- self .friendlyname = filename [:- 4 ]
61
- self .shortname = filename [:- 4 ]
75
+ self .friendlyname = filename [:- 10 ]
76
+ self .shortname = filename [:- 10 ]
62
77
self .database = database
63
78
self .generatedFrameworks = os .path .join (
64
- self .codeQlRoot , f"{ self .language } /ql/lib/semmle/code/{ self .language } /frameworks/" )
65
- self .frameworkTarget = os .path .join (self .generatedFrameworks , dirname , filename )
66
- self .negativeFrameworkTarget = os .path .join (self .generatedFrameworks , dirname , "Negative" + filename )
67
- self .typeBasedFrameworkTarget = os .path .join (self .generatedFrameworks , dirname , "TypeBased" + filename )
68
-
79
+ self .codeQlRoot , f"{ self .language } /ql/lib/ext/generated/" )
80
+ self .frameworkTarget = os .path .join (self .generatedFrameworks , filename )
81
+ self .typeBasedFrameworkTarget = os .path .join (self .generatedFrameworks , "TypeBased" + filename )
69
82
self .workDir = tempfile .mkdtemp ()
70
83
os .makedirs (self .generatedFrameworks , exist_ok = True )
71
84
@@ -114,166 +127,93 @@ def make(language):
114
127
115
128
generator .setenvironment (sys .argv [2 ], sys .argv [1 ], friendlyName )
116
129
return generator
130
+
117
131
118
-
119
- def runQuery (self , infoMessage , query ):
120
- print ("########## Querying " + infoMessage + "..." )
121
- queryFile = os .path .join (self .codeQlRoot , f"{ self .language } /ql/src/utils/model-generator" , query )
132
+ def runQuery (self , query ):
133
+ print ("########## Querying " + query + "..." )
134
+ queryFile = os .path .join (self .codeQlRoot , f"{ self .language } /ql/src/utils/{ self .dirname } " , query )
122
135
resultBqrs = os .path .join (self .workDir , "out.bqrs" )
123
- cmd = ['codeql' , 'query' , 'run' , queryFile , '--database' ,
124
- self .database , '--output' , resultBqrs , '--threads' , '8' ]
125
136
126
- ret = subprocess .call (cmd )
127
- if ret != 0 :
128
- print ("Failed to generate " + infoMessage +
129
- ". Failed command was: " + shlex .join (cmd ))
130
- sys .exit (1 )
131
- return self .readRows (resultBqrs )
132
-
133
-
134
- def readRows (self , bqrsFile ):
135
- generatedJson = os .path .join (self .workDir , "out.json" )
136
- cmd = ['codeql' , 'bqrs' , 'decode' , bqrsFile ,
137
- '--format=json' , '--output' , generatedJson ]
138
- ret = subprocess .call (cmd )
139
- if ret != 0 :
140
- print ("Failed to decode BQRS. Failed command was: " + shlex .join (cmd ))
141
- sys .exit (1 )
137
+ helpers .run_cmd (['codeql' , 'query' , 'run' , queryFile , '--database' ,
138
+ self .database , '--output' , resultBqrs , '--threads' , '8' ], "Failed to generate " + query )
142
139
143
- with open (generatedJson ) as f :
144
- results = json .load (f )
140
+ return helpers .readData (self .workDir , resultBqrs )
145
141
146
- try :
147
- results ['#select' ]['tuples' ]
148
- except KeyError :
149
- print ('Unexpected JSON output - no tuples found' )
150
- exit (1 )
151
142
152
- rows = ""
153
- for (row ) in results ['#select' ]['tuples' ]:
154
- rows += " \" " + row [0 ] + "\" ,\n "
155
-
156
- return rows [:- 2 ]
157
-
158
-
159
- def asCsvModel (self , superclass , kind , rows ):
160
- classTemplate = """
161
- private class {0}{1}Csv extends {2} {{
162
- override predicate row(string row) {{
163
- row =
164
- [
165
- {3}
166
- ]
167
- }}
168
- }}
169
- """
143
+ def asAddsTo (self , rows , predicate ):
170
144
if rows .strip () == "" :
171
145
return ""
172
- return classTemplate .format (self .shortname [0 ].upper () + self .shortname [1 :], kind .capitalize (), superclass , rows )
146
+ return helpers .addsToTemplate .format (f"codeql/{ self .language } -all" , predicate , rows )
147
+
148
+
149
+ def getAddsTo (self , query , predicate ):
150
+ data = self .runQuery (query )
151
+ rows = parseData (data )
152
+ return self .asAddsTo (rows , predicate )
173
153
174
154
175
155
def makeContent (self ):
176
156
if self .generateSummaries :
177
- summaryRows = self .runQuery ("summary models" , "CaptureSummaryModels.ql" )
178
- summaryCsv = self .asCsvModel ("SummaryModelCsv" , "summary" , summaryRows )
157
+ summaryAddsTo = self .getAddsTo ("CaptureSummaryModels.ql" , helpers .summaryModelPredicate )
179
158
else :
180
- summaryCsv = ""
159
+ summaryAddsTo = ""
181
160
182
161
if self .generateSinks :
183
- sinkRows = self .runQuery ("sink models" , "CaptureSinkModels.ql" )
184
- sinkCsv = self .asCsvModel ("SinkModelCsv" , "sinks" , sinkRows )
162
+ sinkAddsTo = self .getAddsTo ("CaptureSinkModels.ql" , helpers .sinkModelPredicate )
185
163
else :
186
- sinkCsv = ""
164
+ sinkAddsTo = ""
187
165
188
166
if self .generateSources :
189
- sourceRows = self .runQuery ("source models" , "CaptureSourceModels.ql" )
190
- sourceCsv = self .asCsvModel ("SourceModelCsv" , "sources" , sourceRows )
167
+ sourceAddsTo = self .getAddsTo ("CaptureSourceModels.ql" , helpers .sourceModelPredicate )
191
168
else :
192
- sourceCsv = ""
193
-
194
- return f"""
195
- /**
196
- * THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
197
- * Definitions of taint steps in the { self .friendlyname } framework.
198
- */
169
+ sourceAddsTo = ""
199
170
200
- import { self .language }
201
- private import semmle.code.{ self .language } .dataflow.ExternalFlow
202
-
203
- { sinkCsv }
204
- { sourceCsv }
205
- { summaryCsv }
206
-
207
- """
208
-
209
- def makeNegativeContent (self ):
210
171
if self .generateNegativeSummaries :
211
- negativeSummaryRows = self .runQuery ("negative summary models" , "CaptureNegativeSummaryModels.ql" )
212
- negativeSummaryCsv = self .asCsvModel ("NegativeSummaryModelCsv" , "NegativeSummary" , negativeSummaryRows )
172
+ negativeSummaryAddsTo = self .getAddsTo ("CaptureNegativeSummaryModels.ql" , "extNegativeSummaryModel" )
213
173
else :
214
- negativeSummaryCsv = ""
215
-
216
- return f"""
217
- /**
218
- * THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
219
- * Definitions of negative summaries in the { self .friendlyname } framework.
220
- */
221
-
222
- import { self .language }
223
- private import semmle.code.{ self .language } .dataflow.ExternalFlow
224
-
225
- { negativeSummaryCsv }
226
-
174
+ negativeSummaryAddsTo = ""
175
+
176
+ return f"""
177
+ # THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
178
+ # Definitions of taint steps in the { self .friendlyname } framework.
179
+
180
+ extensions:
181
+ { sinkAddsTo }
182
+ { sourceAddsTo }
183
+ { summaryAddsTo }
184
+ { negativeSummaryAddsTo }
227
185
"""
228
186
229
187
def makeTypeBasedContent (self ):
230
188
if self .generateTypeBasedSummaries :
231
- typeBasedSummaryRows = self .runQuery ("type based summary models" , "CaptureTypeBasedSummaryModels.ql" )
232
- typeBasedSummaryCsv = self .asCsvModel ("SummaryModelCsv" , "TypeBasedSummary" , typeBasedSummaryRows )
189
+ typeBasedSummaryAddsTo = self .getAddsTo ("CaptureTypeBasedSummaryModels.ql" , "extSummaryModel" )
233
190
else :
234
- typeBasedSummaryCsv = ""
191
+ typeBasedSummaryAddsTo = ""
235
192
236
193
return f"""
237
- /**
238
- * THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
239
- * Definitions of type based summaries in the { self .friendlyname } framework.
240
- */
241
-
242
- import { self .language }
243
- private import semmle.code.{ self .language } .dataflow.ExternalFlow
244
-
245
- { typeBasedSummaryCsv }
194
+ # THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
195
+ # Definitions of type based summaries in the { self .friendlyname } framework.
246
196
197
+ extensions:
198
+ { typeBasedSummaryAddsTo }
247
199
"""
248
200
249
201
def save (self , content , target ):
250
- with open (target , "w" ) as targetQll :
251
- targetQll .write (content )
252
-
253
- cmd = ['codeql' , 'query' , 'format' , '--in-place' , target ]
254
- ret = subprocess .call (cmd )
255
- if ret != 0 :
256
- print ("Failed to format query. Failed command was: " + shlex .join (cmd ))
257
- sys .exit (1 )
258
-
259
- print ("" )
260
- print ("CSV model written to " + target )
202
+ with open (target , "w" ) as targetYml :
203
+ targetYml .write (content )
204
+ print ("Models as data extensions written to " + target )
261
205
262
206
263
207
def run (self ):
264
208
content = self .makeContent ()
265
- negativeContent = self .makeNegativeContent ()
266
209
typeBasedContent = self .makeTypeBasedContent ()
267
210
268
211
if self .dryRun :
269
- print ("CSV Models generated, but not written to file." )
212
+ print ("Models as data extensions generated, but not written to file." )
270
213
sys .exit (0 )
271
214
272
215
if self .generateSinks or self .generateSinks or self .generateSummaries :
273
216
self .save (content , self .frameworkTarget )
274
217
275
- if self .generateNegativeSummaries :
276
- self .save (negativeContent , self .negativeFrameworkTarget )
277
-
278
218
if self .generateTypeBasedSummaries :
279
219
self .save (typeBasedContent , self .typeBasedFrameworkTarget )
0 commit comments