|
| 1 | +//global:registry-exclude |
| 2 | + |
| 3 | +package python |
| 4 | + |
| 5 | +import ( |
| 6 | + "fmt" |
| 7 | + "reflect" |
| 8 | + |
| 9 | + sitter "github.com/smacker/go-tree-sitter" |
| 10 | + "globstar.dev/analysis" |
| 11 | +) |
| 12 | + |
| 13 | +var DataFlowAnalyzer = &analysis.Analyzer{ |
| 14 | + Name: "py-dataflow-analyzer", |
| 15 | + Language: analysis.LangPy, |
| 16 | + Description: "Create a data flow graph for Python", |
| 17 | + Category: analysis.CategorySecurity, |
| 18 | + Severity: analysis.SeverityWarning, |
| 19 | + Run: createPyDFG, |
| 20 | + ResultType: reflect.TypeOf(&DataFlowGraph{}), |
| 21 | + Requires: []*analysis.Analyzer{ScopeAnalyzer}, |
| 22 | +} |
| 23 | + |
| 24 | +type DataFlowNode struct { |
| 25 | + Node *sitter.Node |
| 26 | + Sources []*DataFlowNode |
| 27 | + Scope *analysis.Scope |
| 28 | + Variable *analysis.Variable |
| 29 | + FuncDef *FunctionDefinition |
| 30 | +} |
| 31 | + |
| 32 | +type FunctionDefinition struct { |
| 33 | + Node *sitter.Node |
| 34 | + Parameters []*analysis.Variable |
| 35 | + Body *sitter.Node |
| 36 | + Scope *analysis.Scope |
| 37 | +} |
| 38 | + |
| 39 | +type ClassDefinition struct { |
| 40 | + Node *sitter.Node |
| 41 | + Properties []*analysis.Variable |
| 42 | + Methods []*FunctionDefinition |
| 43 | + Scope *analysis.Scope |
| 44 | +} |
| 45 | + |
| 46 | +type DataFlowGraph struct { |
| 47 | + Graph map[*analysis.Variable]*DataFlowNode |
| 48 | + ScopeTree *analysis.ScopeTree |
| 49 | + FunDefs map[string]*FunctionDefinition |
| 50 | + ClassDefs map[*analysis.Variable]*ClassDefinition |
| 51 | +} |
| 52 | + |
| 53 | +var functionDefinitions = make(map[string]*FunctionDefinition) |
| 54 | +var classDefinitions = make(map[*analysis.Variable]*ClassDefinition) |
| 55 | + |
| 56 | +func createPyDFG(pass *analysis.Pass) (interface{}, error) { |
| 57 | + scopeResult, err := buildScopeTree(pass) |
| 58 | + if err != nil { |
| 59 | + return nil, fmt.Errorf("failed to build the source tree") |
| 60 | + } |
| 61 | + |
| 62 | + scopeTree := scopeResult.(*analysis.ScopeTree) |
| 63 | + |
| 64 | + dfg := &DataFlowGraph{ |
| 65 | + Graph: make(map[*analysis.Variable]*DataFlowNode), |
| 66 | + ScopeTree: scopeTree, |
| 67 | + FunDefs: make(map[string]*FunctionDefinition), |
| 68 | + } |
| 69 | + |
| 70 | + analysis.Preorder(pass, func(node *sitter.Node) { |
| 71 | + if node == nil { |
| 72 | + return |
| 73 | + } |
| 74 | + |
| 75 | + currentScope := scopeTree.GetScope(node) |
| 76 | + if currentScope == nil { |
| 77 | + return |
| 78 | + } |
| 79 | + |
| 80 | + // track variable declarations and assignments |
| 81 | + if node.Type() == "assignment" { |
| 82 | + var nameNode, valueNode *sitter.Node |
| 83 | + |
| 84 | + nameNode = node.ChildByFieldName("left") |
| 85 | + valueNode = node.ChildByFieldName("right") |
| 86 | + |
| 87 | + if nameNode != nil && nameNode.Type() == "identifier" && valueNode != nil { |
| 88 | + var dfNode *DataFlowNode |
| 89 | + varName := nameNode.Content(pass.FileContext.Source) |
| 90 | + variable := currentScope.Lookup(varName) |
| 91 | + |
| 92 | + if variable == nil { |
| 93 | + dfNode = &DataFlowNode{ |
| 94 | + Node: nameNode, |
| 95 | + Sources: []*DataFlowNode{}, |
| 96 | + Scope: currentScope, |
| 97 | + Variable: variable, |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + switch valueNode.Type() { |
| 102 | + case "identifier": |
| 103 | + // if value is another variable, link to its data flow node |
| 104 | + sourceVarName := valueNode.Content(pass.FileContext.Source) |
| 105 | + currVar := currentScope.Lookup(sourceVarName) |
| 106 | + if sourceNode, exists := dfg.Graph[currVar]; exists { |
| 107 | + dfNode.Sources = append(dfNode.Sources, sourceNode) |
| 108 | + } |
| 109 | + |
| 110 | + case "call": |
| 111 | + handleFunctionCallDataFlow(valueNode, dfNode, dfg.Graph, pass.FileContext.Source, currentScope) |
| 112 | + |
| 113 | + case "binary_operator": |
| 114 | + handleBinaryExprDataFlow(valueNode, dfNode, dfg.Graph, pass.FileContext.Source, currentScope) |
| 115 | + |
| 116 | + // analyze the variables in an f-string |
| 117 | + case "string": |
| 118 | + if valueNode.Content(pass.FileContext.Source)[0] == 'f' { |
| 119 | + handleFStringDataFlow(valueNode, dfNode, dfg.Graph, pass.FileContext.Source, currentScope) |
| 120 | + } |
| 121 | + |
| 122 | + // lambda expressions are also functions |
| 123 | + case "lambda": |
| 124 | + lambdaScope := scopeTree.GetScope(valueNode) |
| 125 | + lambdaBody := valueNode.ChildByFieldName("body") |
| 126 | + if lambdaBody == nil { |
| 127 | + return |
| 128 | + } |
| 129 | + |
| 130 | + funcDef := &FunctionDefinition{ |
| 131 | + Node: valueNode, |
| 132 | + Body: lambdaBody, |
| 133 | + Scope: lambdaScope, |
| 134 | + } |
| 135 | + |
| 136 | + for _, param := range lambdaScope.Variables { |
| 137 | + funcDef.Parameters = append(funcDef.Parameters, param) |
| 138 | + } |
| 139 | + |
| 140 | + functionDefinitions[varName] = funcDef |
| 141 | + dfNode.FuncDef = funcDef |
| 142 | + } |
| 143 | + dfg.Graph[variable] = dfNode |
| 144 | + |
| 145 | + } |
| 146 | + } |
| 147 | + |
| 148 | + if node.Type() == "function_definition" { |
| 149 | + funcNameNode := node.ChildByFieldName("name") |
| 150 | + if funcNameNode == nil { |
| 151 | + return |
| 152 | + } |
| 153 | + |
| 154 | + funcName := funcNameNode.Content(pass.FileContext.Source) |
| 155 | + funcDef := &FunctionDefinition{ |
| 156 | + Node: node, |
| 157 | + Body: node.ChildByFieldName("body"), |
| 158 | + Scope: currentScope, |
| 159 | + } |
| 160 | + |
| 161 | + funcVar := currentScope.Lookup(funcName) |
| 162 | + if funcVar == nil { |
| 163 | + return |
| 164 | + } |
| 165 | + |
| 166 | + for _, param := range currentScope.Variables { |
| 167 | + funcDef.Parameters = append(funcDef.Parameters, param) |
| 168 | + } |
| 169 | + |
| 170 | + functionDefinitions[funcName] = funcDef |
| 171 | + dfg.Graph[funcVar] = &DataFlowNode{ |
| 172 | + Node: funcNameNode, |
| 173 | + Sources: []*DataFlowNode{}, |
| 174 | + Scope: currentScope, |
| 175 | + Variable: funcVar, |
| 176 | + FuncDef: funcDef, |
| 177 | + } |
| 178 | + } |
| 179 | + |
| 180 | + if node.Type() == "class_definition" { |
| 181 | + var dfNode *DataFlowNode |
| 182 | + className := node.ChildByFieldName("name") |
| 183 | + if className == nil { |
| 184 | + return |
| 185 | + } |
| 186 | + |
| 187 | + varClassName := className.Content(pass.FileContext.Source) |
| 188 | + classNameVar := currentScope.Lookup(varClassName) |
| 189 | + classScope := scopeTree.GetScope(classNameVar.DeclNode) |
| 190 | + if classScope == nil { |
| 191 | + return |
| 192 | + } |
| 193 | + |
| 194 | + classBody := node.ChildByFieldName("body") |
| 195 | + if classBody == nil { |
| 196 | + return |
| 197 | + } |
| 198 | + |
| 199 | + var classMethods []*FunctionDefinition |
| 200 | + var classProperties []*analysis.Variable |
| 201 | + |
| 202 | + dfNode = &DataFlowNode{ |
| 203 | + Node: classNameVar.DeclNode, |
| 204 | + Scope: classScope, |
| 205 | + Variable: classNameVar, |
| 206 | + } |
| 207 | + |
| 208 | + dfg.Graph[dfNode.Variable] = dfNode |
| 209 | + |
| 210 | + for i := range int(classBody.NamedChildCount()) { |
| 211 | + classChild := classBody.NamedChild(i) |
| 212 | + if classChild == nil { |
| 213 | + return |
| 214 | + } |
| 215 | + |
| 216 | + if classChild.Type() == "function_definition" { |
| 217 | + classMethodNameNode := classChild.ChildByFieldName("name") |
| 218 | + if classMethodNameNode != nil && classMethodNameNode.Type() == "identifier" { |
| 219 | + methodDef := &FunctionDefinition{ |
| 220 | + Node: classChild, |
| 221 | + Body: classChild.ChildByFieldName("body"), |
| 222 | + Parameters: []*analysis.Variable{}, |
| 223 | + Scope: classScope, |
| 224 | + } |
| 225 | + |
| 226 | + params := node.ChildByFieldName("parameters") |
| 227 | + if params != nil { |
| 228 | + for i := range int(params.NamedChildCount()) { |
| 229 | + param := params.NamedChild(i) |
| 230 | + if param.Type() == "identifier" { |
| 231 | + paramName := param.Content(pass.FileContext.Source) |
| 232 | + paramVar := currentScope.Lookup(paramName) |
| 233 | + if paramVar != nil { |
| 234 | + methodDef.Parameters = append(methodDef.Parameters, paramVar) |
| 235 | + } |
| 236 | + } |
| 237 | + } |
| 238 | + } |
| 239 | + classMethods = append(classMethods, methodDef) |
| 240 | + } |
| 241 | + } else if classChild.Type() == "assignment" { |
| 242 | + classVarNameNode := classChild.ChildByFieldName("left") |
| 243 | + if classVarNameNode != nil && classVarNameNode.Type() == "identifier" { |
| 244 | + classVarName := classVarNameNode.Content(pass.FileContext.Source) |
| 245 | + classVar := classScope.Children[0].Lookup(classVarName) |
| 246 | + if classVar != nil { |
| 247 | + classProperties = append(classProperties, classVar) |
| 248 | + } |
| 249 | + } |
| 250 | + } |
| 251 | + } |
| 252 | + |
| 253 | + classDef := &ClassDefinition{ |
| 254 | + Node: node, |
| 255 | + Properties: classProperties, |
| 256 | + Methods: classMethods, |
| 257 | + Scope: classScope, |
| 258 | + } |
| 259 | + |
| 260 | + classDefinitions[classNameVar] = classDef |
| 261 | + } |
| 262 | + }) |
| 263 | + |
| 264 | + dfg.FunDefs = functionDefinitions |
| 265 | + dfg.ClassDefs = classDefinitions |
| 266 | + |
| 267 | + return dfg, nil |
| 268 | +} |
| 269 | + |
| 270 | +func handleFStringDataFlow(node *sitter.Node, dfNode *DataFlowNode, dfg map[*analysis.Variable]*DataFlowNode, source []byte, scope *analysis.Scope) { |
| 271 | + if node == nil || node.Type() != "string" { |
| 272 | + return |
| 273 | + } |
| 274 | + |
| 275 | + interpolations := analysis.ChildrenWithFieldName(node, "interpolation") |
| 276 | + for _, interpNode := range interpolations { |
| 277 | + exprNode := interpNode.ChildByFieldName("expression") |
| 278 | + if exprNode != nil && exprNode.Type() == "identifier" { |
| 279 | + varName := exprNode.Content(source) |
| 280 | + if variable := scope.Lookup(varName); variable != nil { |
| 281 | + if sourceNode, exists := dfg[variable]; exists { |
| 282 | + dfNode.Sources = append(dfNode.Sources, sourceNode) |
| 283 | + } |
| 284 | + } |
| 285 | + } |
| 286 | + } |
| 287 | +} |
| 288 | + |
| 289 | +func handleBinaryExprDataFlow(node *sitter.Node, dfNode *DataFlowNode, dfg map[*analysis.Variable]*DataFlowNode, source []byte, scope *analysis.Scope) { |
| 290 | + if node == nil || node.Type() != "binary_operator" { |
| 291 | + return |
| 292 | + } |
| 293 | + |
| 294 | + left := node.ChildByFieldName("left") |
| 295 | + right := node.ChildByFieldName("right") |
| 296 | + |
| 297 | + if left != nil && left.Type() == "identifier" { |
| 298 | + leftVar := left.Content(source) |
| 299 | + if variable := scope.Lookup(leftVar); variable != nil { |
| 300 | + if sourceNode, exists := dfg[variable]; exists { |
| 301 | + dfNode.Sources = append(dfNode.Sources, sourceNode) |
| 302 | + } |
| 303 | + } |
| 304 | + } |
| 305 | + |
| 306 | + if right != nil && right.Type() == "identifier" { |
| 307 | + rightVar := right.Content(source) |
| 308 | + if variable := scope.Lookup(rightVar); variable != nil { |
| 309 | + if sourceNode, exists := dfg[variable]; exists { |
| 310 | + dfNode.Sources = append(dfNode.Sources, sourceNode) |
| 311 | + } |
| 312 | + } |
| 313 | + } |
| 314 | + |
| 315 | + // process nested binary expression |
| 316 | + if left != nil && left.Type() == "binary_operator" { |
| 317 | + handleBinaryExprDataFlow(left, dfNode, dfg, source, scope) |
| 318 | + } |
| 319 | + |
| 320 | + if right != nil && right.Type() == "binary_operator" { |
| 321 | + handleBinaryExprDataFlow(right, dfNode, dfg, source, scope) |
| 322 | + } |
| 323 | +} |
| 324 | + |
| 325 | +func handleFunctionCallDataFlow(node *sitter.Node, dfNode *DataFlowNode, dfg map[*analysis.Variable]*DataFlowNode, source []byte, scope *analysis.Scope) { |
| 326 | + if node == nil || node.Type() != "call" { |
| 327 | + return |
| 328 | + } |
| 329 | + |
| 330 | + args := node.ChildByFieldName("arguments") |
| 331 | + if args == nil || args.Type() != "argument_list" { |
| 332 | + return |
| 333 | + } |
| 334 | + |
| 335 | + for i := range int(args.NamedChildCount()) { |
| 336 | + arg := args.NamedChild(i) |
| 337 | + if arg == nil { |
| 338 | + continue |
| 339 | + } |
| 340 | + |
| 341 | + if arg.Type() == "identifier" { |
| 342 | + argName := arg.Content(source) |
| 343 | + if variable := scope.Lookup(argName); variable != nil { |
| 344 | + if sourceNode, exists := dfg[variable]; exists { |
| 345 | + dfNode.Sources = append(dfNode.Sources, sourceNode) |
| 346 | + } |
| 347 | + } |
| 348 | + } |
| 349 | + } |
| 350 | +} |
0 commit comments