Skip to content

Commit d71667a

Browse files
committed
data flow analysis for python
1 parent e26b851 commit d71667a

File tree

2 files changed

+372
-0
lines changed

2 files changed

+372
-0
lines changed

checkers/python/py_dataflow.go

Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
//global:registry-exclude
2+
3+
package python
4+
5+
import (
6+
"fmt"
7+
"reflect"
8+
9+
sitter "github.com/smacker/go-tree-sitter"
10+
"globstar.dev/analysis"
11+
)
12+
13+
var DataFlowAnalyzer = &analysis.Analyzer{
14+
Name: "py-dataflow-analyzer",
15+
Language: analysis.LangPy,
16+
Description: "Create a data flow graph for Python",
17+
Category: analysis.CategorySecurity,
18+
Severity: analysis.SeverityWarning,
19+
Run: createPyDFG,
20+
ResultType: reflect.TypeOf(&DataFlowGraph{}),
21+
Requires: []*analysis.Analyzer{ScopeAnalyzer},
22+
}
23+
24+
type DataFlowNode struct {
25+
Node *sitter.Node
26+
Sources []*DataFlowNode
27+
Scope *analysis.Scope
28+
Variable *analysis.Variable
29+
FuncDef *FunctionDefinition
30+
}
31+
32+
type FunctionDefinition struct {
33+
Node *sitter.Node
34+
Parameters []*analysis.Variable
35+
Body *sitter.Node
36+
Scope *analysis.Scope
37+
}
38+
39+
type ClassDefinition struct {
40+
Node *sitter.Node
41+
Properties []*analysis.Variable
42+
Methods []*FunctionDefinition
43+
Scope *analysis.Scope
44+
}
45+
46+
type DataFlowGraph struct {
47+
Graph map[*analysis.Variable]*DataFlowNode
48+
ScopeTree *analysis.ScopeTree
49+
FunDefs map[string]*FunctionDefinition
50+
ClassDefs map[*analysis.Variable]*ClassDefinition
51+
}
52+
53+
var functionDefinitions = make(map[string]*FunctionDefinition)
54+
var classDefinitions = make(map[*analysis.Variable]*ClassDefinition)
55+
56+
func createPyDFG(pass *analysis.Pass) (interface{}, error) {
57+
scopeResult, err := buildScopeTree(pass)
58+
if err != nil {
59+
return nil, fmt.Errorf("failed to build the source tree")
60+
}
61+
62+
scopeTree := scopeResult.(*analysis.ScopeTree)
63+
64+
dfg := &DataFlowGraph{
65+
Graph: make(map[*analysis.Variable]*DataFlowNode),
66+
ScopeTree: scopeTree,
67+
FunDefs: make(map[string]*FunctionDefinition),
68+
}
69+
70+
analysis.Preorder(pass, func(node *sitter.Node) {
71+
if node == nil {
72+
return
73+
}
74+
75+
currentScope := scopeTree.GetScope(node)
76+
if currentScope == nil {
77+
return
78+
}
79+
80+
// track variable declarations and assignments
81+
if node.Type() == "assignment" {
82+
var nameNode, valueNode *sitter.Node
83+
84+
nameNode = node.ChildByFieldName("left")
85+
valueNode = node.ChildByFieldName("right")
86+
87+
if nameNode != nil && nameNode.Type() == "identifier" && valueNode != nil {
88+
var dfNode *DataFlowNode
89+
varName := nameNode.Content(pass.FileContext.Source)
90+
variable := currentScope.Lookup(varName)
91+
92+
if variable == nil {
93+
dfNode = &DataFlowNode{
94+
Node: nameNode,
95+
Sources: []*DataFlowNode{},
96+
Scope: currentScope,
97+
Variable: variable,
98+
}
99+
}
100+
101+
switch valueNode.Type() {
102+
case "identifier":
103+
// if value is another variable, link to its data flow node
104+
sourceVarName := valueNode.Content(pass.FileContext.Source)
105+
currVar := currentScope.Lookup(sourceVarName)
106+
if sourceNode, exists := dfg.Graph[currVar]; exists {
107+
dfNode.Sources = append(dfNode.Sources, sourceNode)
108+
}
109+
110+
case "call":
111+
handleFunctionCallDataFlow(valueNode, dfNode, dfg.Graph, pass.FileContext.Source, currentScope)
112+
113+
case "binary_operator":
114+
handleBinaryExprDataFlow(valueNode, dfNode, dfg.Graph, pass.FileContext.Source, currentScope)
115+
116+
// analyze the variables in an f-string
117+
case "string":
118+
if valueNode.Content(pass.FileContext.Source)[0] == 'f' {
119+
handleFStringDataFlow(valueNode, dfNode, dfg.Graph, pass.FileContext.Source, currentScope)
120+
}
121+
122+
// lambda expressions are also functions
123+
case "lambda":
124+
lambdaScope := scopeTree.GetScope(valueNode)
125+
lambdaBody := valueNode.ChildByFieldName("body")
126+
if lambdaBody == nil {
127+
return
128+
}
129+
130+
funcDef := &FunctionDefinition{
131+
Node: valueNode,
132+
Body: lambdaBody,
133+
Scope: lambdaScope,
134+
}
135+
136+
for _, param := range lambdaScope.Variables {
137+
funcDef.Parameters = append(funcDef.Parameters, param)
138+
}
139+
140+
functionDefinitions[varName] = funcDef
141+
dfNode.FuncDef = funcDef
142+
}
143+
dfg.Graph[variable] = dfNode
144+
145+
}
146+
}
147+
148+
if node.Type() == "function_definition" {
149+
funcNameNode := node.ChildByFieldName("name")
150+
if funcNameNode == nil {
151+
return
152+
}
153+
154+
funcName := funcNameNode.Content(pass.FileContext.Source)
155+
funcDef := &FunctionDefinition{
156+
Node: node,
157+
Body: node.ChildByFieldName("body"),
158+
Scope: currentScope,
159+
}
160+
161+
funcVar := currentScope.Lookup(funcName)
162+
if funcVar == nil {
163+
return
164+
}
165+
166+
for _, param := range currentScope.Variables {
167+
funcDef.Parameters = append(funcDef.Parameters, param)
168+
}
169+
170+
functionDefinitions[funcName] = funcDef
171+
dfg.Graph[funcVar] = &DataFlowNode{
172+
Node: funcNameNode,
173+
Sources: []*DataFlowNode{},
174+
Scope: currentScope,
175+
Variable: funcVar,
176+
FuncDef: funcDef,
177+
}
178+
}
179+
180+
if node.Type() == "class_definition" {
181+
var dfNode *DataFlowNode
182+
className := node.ChildByFieldName("name")
183+
if className == nil {
184+
return
185+
}
186+
187+
varClassName := className.Content(pass.FileContext.Source)
188+
classNameVar := currentScope.Lookup(varClassName)
189+
classScope := scopeTree.GetScope(classNameVar.DeclNode)
190+
if classScope == nil {
191+
return
192+
}
193+
194+
classBody := node.ChildByFieldName("body")
195+
if classBody == nil {
196+
return
197+
}
198+
199+
var classMethods []*FunctionDefinition
200+
var classProperties []*analysis.Variable
201+
202+
dfNode = &DataFlowNode{
203+
Node: classNameVar.DeclNode,
204+
Scope: classScope,
205+
Variable: classNameVar,
206+
}
207+
208+
dfg.Graph[dfNode.Variable] = dfNode
209+
210+
for i := range int(classBody.NamedChildCount()) {
211+
classChild := classBody.NamedChild(i)
212+
if classChild == nil {
213+
return
214+
}
215+
216+
if classChild.Type() == "function_definition" {
217+
classMethodNameNode := classChild.ChildByFieldName("name")
218+
if classMethodNameNode != nil && classMethodNameNode.Type() == "identifier" {
219+
methodDef := &FunctionDefinition{
220+
Node: classChild,
221+
Body: classChild.ChildByFieldName("body"),
222+
Parameters: []*analysis.Variable{},
223+
Scope: classScope,
224+
}
225+
226+
params := node.ChildByFieldName("parameters")
227+
if params != nil {
228+
for i := range int(params.NamedChildCount()) {
229+
param := params.NamedChild(i)
230+
if param.Type() == "identifier" {
231+
paramName := param.Content(pass.FileContext.Source)
232+
paramVar := currentScope.Lookup(paramName)
233+
if paramVar != nil {
234+
methodDef.Parameters = append(methodDef.Parameters, paramVar)
235+
}
236+
}
237+
}
238+
}
239+
classMethods = append(classMethods, methodDef)
240+
}
241+
} else if classChild.Type() == "assignment" {
242+
classVarNameNode := classChild.ChildByFieldName("left")
243+
if classVarNameNode != nil && classVarNameNode.Type() == "identifier" {
244+
classVarName := classVarNameNode.Content(pass.FileContext.Source)
245+
classVar := classScope.Children[0].Lookup(classVarName)
246+
if classVar != nil {
247+
classProperties = append(classProperties, classVar)
248+
}
249+
}
250+
}
251+
}
252+
253+
classDef := &ClassDefinition{
254+
Node: node,
255+
Properties: classProperties,
256+
Methods: classMethods,
257+
Scope: classScope,
258+
}
259+
260+
classDefinitions[classNameVar] = classDef
261+
}
262+
})
263+
264+
dfg.FunDefs = functionDefinitions
265+
dfg.ClassDefs = classDefinitions
266+
267+
return dfg, nil
268+
}
269+
270+
func handleFStringDataFlow(node *sitter.Node, dfNode *DataFlowNode, dfg map[*analysis.Variable]*DataFlowNode, source []byte, scope *analysis.Scope) {
271+
if node == nil || node.Type() != "string" {
272+
return
273+
}
274+
275+
interpolations := analysis.ChildrenWithFieldName(node, "interpolation")
276+
for _, interpNode := range interpolations {
277+
exprNode := interpNode.ChildByFieldName("expression")
278+
if exprNode != nil && exprNode.Type() == "identifier" {
279+
varName := exprNode.Content(source)
280+
if variable := scope.Lookup(varName); variable != nil {
281+
if sourceNode, exists := dfg[variable]; exists {
282+
dfNode.Sources = append(dfNode.Sources, sourceNode)
283+
}
284+
}
285+
}
286+
}
287+
}
288+
289+
func handleBinaryExprDataFlow(node *sitter.Node, dfNode *DataFlowNode, dfg map[*analysis.Variable]*DataFlowNode, source []byte, scope *analysis.Scope) {
290+
if node == nil || node.Type() != "binary_operator" {
291+
return
292+
}
293+
294+
left := node.ChildByFieldName("left")
295+
right := node.ChildByFieldName("right")
296+
297+
if left != nil && left.Type() == "identifier" {
298+
leftVar := left.Content(source)
299+
if variable := scope.Lookup(leftVar); variable != nil {
300+
if sourceNode, exists := dfg[variable]; exists {
301+
dfNode.Sources = append(dfNode.Sources, sourceNode)
302+
}
303+
}
304+
}
305+
306+
if right != nil && right.Type() == "identifier" {
307+
rightVar := right.Content(source)
308+
if variable := scope.Lookup(rightVar); variable != nil {
309+
if sourceNode, exists := dfg[variable]; exists {
310+
dfNode.Sources = append(dfNode.Sources, sourceNode)
311+
}
312+
}
313+
}
314+
315+
// process nested binary expression
316+
if left != nil && left.Type() == "binary_operator" {
317+
handleBinaryExprDataFlow(left, dfNode, dfg, source, scope)
318+
}
319+
320+
if right != nil && right.Type() == "binary_operator" {
321+
handleBinaryExprDataFlow(right, dfNode, dfg, source, scope)
322+
}
323+
}
324+
325+
func handleFunctionCallDataFlow(node *sitter.Node, dfNode *DataFlowNode, dfg map[*analysis.Variable]*DataFlowNode, source []byte, scope *analysis.Scope) {
326+
if node == nil || node.Type() != "call" {
327+
return
328+
}
329+
330+
args := node.ChildByFieldName("arguments")
331+
if args == nil || args.Type() != "argument_list" {
332+
return
333+
}
334+
335+
for i := range int(args.NamedChildCount()) {
336+
arg := args.NamedChild(i)
337+
if arg == nil {
338+
continue
339+
}
340+
341+
if arg.Type() == "identifier" {
342+
argName := arg.Content(source)
343+
if variable := scope.Lookup(argName); variable != nil {
344+
if sourceNode, exists := dfg[variable]; exists {
345+
dfNode.Sources = append(dfNode.Sources, sourceNode)
346+
}
347+
}
348+
}
349+
}
350+
}

checkers/python/scope.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//globstar:registry-exclude
2+
// scope resolution for Python files
3+
4+
package python
5+
6+
import (
7+
"globstar.dev/analysis"
8+
"reflect"
9+
)
10+
11+
var ScopeAnalyzer = &analysis.Analyzer{
12+
Name: "py-scope",
13+
ResultType: reflect.TypeOf(&analysis.ScopeTree{}),
14+
Run: buildScopeTree,
15+
Language: analysis.LangPy,
16+
}
17+
18+
func buildScopeTree(pass *analysis.Pass) (any, error) {
19+
// creates scope builder for python
20+
scope := analysis.MakeScopeTree(pass.Analyzer.Language, pass.FileContext.Ast, pass.FileContext.Source)
21+
return scope, nil
22+
}

0 commit comments

Comments
 (0)