diff --git a/.gitignore b/.gitignore index 1191c253..bcd6e219 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,4 @@ dist/ .globstar/ checkers/registry.go .idea/ +.vscode/ diff --git a/analysis/ts_scope.go b/analysis/ts_scope.go index fee3f131..81d611bd 100644 --- a/analysis/ts_scope.go +++ b/analysis/ts_scope.go @@ -33,7 +33,6 @@ var ScopeNodes = []string{ "program", "arrow_function", "class_body", - // "class_declaration", "method_definition", } @@ -50,7 +49,7 @@ func (ts *TsScopeBuilder) DeclaresVariable(node *sitter.Node) bool { typ == "function_declaration" || typ == "method_definition" || typ == "class_declaration" || - typ == "export_statement" || typ == "assignment_expression" || typ == "public_field_definition" + typ == "export_statement" || typ == "assignment_expression" || typ == "public_field_definition" || typ == "call_expression" // To handle cases of inbuilt functions like setTimeout etc. } func (ts *TsScopeBuilder) scanDecl(idOrPattern, declarator *sitter.Node, decls []*Variable) []*Variable { @@ -245,6 +244,15 @@ func (ts *TsScopeBuilder) CollectVariables(node *sitter.Node) []*Variable { DeclNode: fieldName, }) } + case "call_expression": + funcName := node.ChildByFieldName("function") + if funcName != nil && funcName.Type() == "identifier" { + declaredVars = append(declaredVars, &Variable{ + Kind: VarKindFunction, + Name: funcName.Content(ts.source), + DeclNode: funcName, + }) + } } return declaredVars diff --git a/checkers/javascript/eval_express.go b/checkers/javascript/eval_express.go new file mode 100644 index 00000000..15038b97 --- /dev/null +++ b/checkers/javascript/eval_express.go @@ -0,0 +1,166 @@ +package javascript + +import ( + "slices" + + sitter "github.com/smacker/go-tree-sitter" + "globstar.dev/analysis" +) + +var EvalExpress = &analysis.Analyzer{ + Name: "eval_express", + Language: analysis.LangJs, + Description: "Avoid dynamically evaluating untrusted input, which can lead to a code injection vulnerability.", + Category: analysis.CategorySecurity, + Severity: analysis.SeverityCritical, + Requires: []*analysis.Analyzer{DataFlowAnalyzer}, + Run: detectEvalExpress, +} + +func detectEvalExpress(pass *analysis.Pass) (interface{}, error) { + dfg := pass.ResultOf[DataFlowAnalyzer].(*DataFlowGraph) + if dfg == nil { + return nil, nil + } + + scopeTree := dfg.ScopeTree + if scopeTree == nil { + return nil, nil + } + + flowGraph := dfg.Graph + if flowGraph == nil { + return nil, nil + } + + funcCall := dfg.FuncCalls + if funcCall == nil { + return nil, nil + } + + // Common user-input sources for JavaScript applications. + userInputSrc := []string{"req.query.input", "req.params.id", "req.body", "req.cookies.sessionId", "localStorage.getItem"} + + // These methods call eval directly, or under the hood. This leads to vulnerability. + vulnMethods := []string{"eval", "setTimeout", "Function"} + taintedNodes := []*DataFlowNode{} + + analysis.Preorder(pass, func(node *sitter.Node) { + if node == nil { + return + } + if len(vulnMethods) < 0 && len(userInputSrc) < 0 { + return + } + + if node.Type() == "variable_declarator" { + currScope := scopeTree.GetScope(node) + + nameNode := node.ChildByFieldName("name") + valueNode := node.ChildByFieldName("value") + + var nameVar *analysis.Variable + if nameNode != nil { + nameVar = currScope.Lookup(nameNode.Content(pass.FileContext.Source)) + } + + var nameDfNode *DataFlowNode + + if dfnode, ok := flowGraph[nameVar]; ok { + nameDfNode = dfnode + } + + if valueNode != nil { + switch valueNode.Type() { + // Track and mark any user-input sources, on the data-flow node level. + case "member_expression", "identifier": + valueContent := valueNode.Content(pass.FileContext.Source) + if slices.Contains(userInputSrc, valueContent) { + taintedNodes = append(taintedNodes, nameDfNode) + } + + // Check for any possibility of vulnerability + case "call_expression": + funcNode := valueNode.ChildByFieldName("function") + var funcName string + if funcNode != nil { + funcName = funcNode.Content(pass.FileContext.Source) + } + if slices.Contains(vulnMethods, funcName) { + if ContainsAny(nameDfNode.Sources, taintedNodes) { + pass.Report(pass, node, "Eval attempt on user input, code injection vulnerability.") + } + } + + case "new_expression": + ctor := valueNode.ChildByFieldName("constructor") + var ctorName string + if ctor != nil { + ctorName = ctor.Content(pass.FileContext.Source) + } + if ctorName == "Function" { + args := valueNode.ChildByFieldName("arguments") + if args != nil { + for i := 0; i < int(args.NamedChildCount()); i++ { + child := args.NamedChild(i) + if child != nil && child.Type() == "template_string" { + for j := 0; j < int(child.NamedChildCount()); j++ { + exprNode := child.NamedChild(j) + if exprNode != nil && exprNode.Type() == "template_substitution" { + idNode := exprNode.NamedChild(0) + if idNode != nil && idNode.Type() == "identifier" { + varName := idNode.Content(pass.FileContext.Source) + if variable := currScope.Lookup(varName); variable != nil { + if slices.Contains(taintedNodes, flowGraph[variable]) { + pass.Report(pass, node, "Eval attempt on user input, code injection vulnerability.") + } + } + } + } + + } + } + } + } + } + } + + } + } + + if node.Type() == "expression_statement" { + callNode := node.NamedChild(0) + funcNode := callNode.ChildByFieldName("function") + var funcName string + if funcNode != nil { + funcName = funcNode.Content(pass.FileContext.Source) + } + if slices.Contains(vulnMethods, funcName) { + if funcCall, ok := functionCalls[callNode]; ok { + if ContainsAny(funcCall.Sources, taintedNodes) { + pass.Report(pass, node, "Eval attempt on user input, code injection vulnerability.") + } + } + + } + } + + }) + + return nil, nil +} + +func ContainsAny[T comparable](a, b []T) bool { + for _, x := range a { + for _, y := range b { + if x == y { + return true + } + } + } + return false +} + +// TODO: +// - [] Add vuln detection for call Expressions. +// - [] Updated DFG to handle New Function() statements, instead of brute-forcing here diff --git a/checkers/javascript/js_dataflow.go b/checkers/javascript/js_dataflow.go index bbd56b16..66b7d695 100644 --- a/checkers/javascript/js_dataflow.go +++ b/checkers/javascript/js_dataflow.go @@ -37,6 +37,12 @@ type FunctionDefinition struct { Scope *analysis.Scope } +type FunctionCall struct { + Node *sitter.Node + Sources []*DataFlowNode + DfNode *DataFlowNode +} + type ClassDefinition struct { Node *sitter.Node Properties []*analysis.Variable @@ -49,10 +55,12 @@ type DataFlowGraph struct { ScopeTree *analysis.ScopeTree FuncDefs map[string]*FunctionDefinition ClassDefs map[*analysis.Variable]*ClassDefinition + FuncCalls map[*sitter.Node]*FunctionCall } var functionDefinitions = make(map[string]*FunctionDefinition) var classDefinitions = make(map[*analysis.Variable]*ClassDefinition) +var functionCalls = make(map[*sitter.Node]*FunctionCall) // var DataFlowGraph = make(map[*analysis.Variable]*DataFlowNode) @@ -85,6 +93,7 @@ func createDataFlowGraph(pass *analysis.Pass) (interface{}, error) { } // Track variable declarations and assignments + // TODO: Add sources for the nameNode. if node.Type() == "variable_declarator" || node.Type() == "assignment_expression" { var nameNode, valueNode *sitter.Node @@ -175,6 +184,7 @@ func createDataFlowGraph(pass *analysis.Pass) (interface{}, error) { dfNode.FuncDef = funcDef } + dataFlowGraph.Graph[variable] = dfNode } @@ -216,36 +226,66 @@ func createDataFlowGraph(pass *analysis.Pass) (interface{}, error) { immidiateFunc := node.ChildByFieldName("function") // Used to verify that the call_expression is actually pointing to an IIFE(immidiately invoked function expression) // also filters out false positives of regular call expressions like console.log(), foo(x) etc. - if immidiateFunc == nil || immidiateFunc.Type() != "parenthesized_expression" { - return - } + if immidiateFunc != nil && immidiateFunc.Type() == "parenthesized_expression" { + funcExpr := immidiateFunc.NamedChild(0) + if funcExpr == nil { + return + } - funcExpr := immidiateFunc.NamedChild(0) - if funcExpr == nil { - return - } + funcDef := &FunctionDefinition{ + Node: funcExpr, + Body: funcExpr.ChildByFieldName("body"), + Scope: currentScope, + } - funcDef := &FunctionDefinition{ - Node: funcExpr, - Body: funcExpr.ChildByFieldName("body"), - Scope: currentScope, - } + params := node.ChildByFieldName("parameters") + if params != nil { + for i := 0; i < int(params.NamedChildCount()); i++ { + param := params.NamedChild(i) + if param.Type() == "identifier" { + paramName := param.Content(pass.FileContext.Source) + paramVar := currentScope.Lookup(paramName) + if paramVar != nil { + funcDef.Parameters = append(funcDef.Parameters, paramVar) + } - params := node.ChildByFieldName("parameters") - if params != nil { - for i := 0; i < int(params.NamedChildCount()); i++ { - param := params.NamedChild(i) - if param.Type() == "identifier" { - paramName := param.Content(pass.FileContext.Source) - paramVar := currentScope.Lookup(paramName) - if paramVar != nil { - funcDef.Parameters = append(funcDef.Parameters, paramVar) } + } + } + // Create a data flow node for the IIFE + } + if immidiateFunc != nil && immidiateFunc.Type() == "identifier" { + + funcname := immidiateFunc.Content(pass.FileContext.Source) + _, exists := functionDefinitions[funcname] + if !exists { + funcVar := currentScope.Lookup(funcname) + if funcVar == nil { + return + } + dfNode := &DataFlowNode{ + Node: immidiateFunc, + Sources: []*DataFlowNode{}, + Scope: currentScope, + Variable: funcVar, } + + handleCallExpressionDataFlow(node, dfNode, dataFlowGraph.Graph, pass.FileContext.Source, currentScope) + + functionCalls[node] = &FunctionCall{ + Node: immidiateFunc, + Sources: dfNode.Sources, + DfNode: dfNode, + } + } } - // Create a data flow node for the IIFE + + // Need a way to track function calls like setTimeout etc. + // Can do something like this: + // check if the function exits in the function definitions map. If it doesn't exist, then it must be a function call specific to the language. + // Then handle it accordingly, for sources etc. } if node.Type() == "class_declaration" { @@ -325,7 +365,6 @@ func createDataFlowGraph(pass *analysis.Pass) (interface{}, error) { } } - fmt.Println(classChild) } } @@ -346,6 +385,7 @@ func createDataFlowGraph(pass *analysis.Pass) (interface{}, error) { }) dataFlowGraph.FuncDefs = functionDefinitions dataFlowGraph.ClassDefs = classDefinitions + dataFlowGraph.FuncCalls = functionCalls return dataFlowGraph, nil } @@ -414,7 +454,6 @@ func handleTemplateStringDataFlow(node *sitter.Node, dfNode *DataFlowNode, DataF if variable := scope.Lookup(varName); variable != nil { if sourceNode, exists := DataFlowGraph[variable]; exists { dfNode.Sources = append(dfNode.Sources, sourceNode) - } } } @@ -427,7 +466,6 @@ func handleCallExpressionDataFlow(node *sitter.Node, dfNode *DataFlowNode, DataF if node == nil || node.Type() != "call_expression" { return } - args := node.ChildByFieldName("arguments") if args == nil { return @@ -436,6 +474,7 @@ func handleCallExpressionDataFlow(node *sitter.Node, dfNode *DataFlowNode, DataF // Check each argument for taint for i := 0; i < int(args.NamedChildCount()); i++ { arg := args.NamedChild(i) + if arg == nil { continue } @@ -447,7 +486,17 @@ func handleCallExpressionDataFlow(node *sitter.Node, dfNode *DataFlowNode, DataF dfNode.Sources = append(dfNode.Sources, sourceNode) } } + } + + // Add handling of template strings inside a function call + if arg.Type() == "template_string" { + // fmt.Println(arg.Content(sourceCode)) + handleTemplateStringDataFlow(arg, dfNode, DataFlowGraph, sourceCode, scope) + } + if arg.Type() == "binary_expression" { + handleBinaryExpressionDataFlow(arg, dfNode, DataFlowGraph, sourceCode, scope) } } + } diff --git a/checkers/javascript/js_dataflow_test.go b/checkers/javascript/js_dataflow_test.go index 1848dd4a..e8cdeb24 100644 --- a/checkers/javascript/js_dataflow_test.go +++ b/checkers/javascript/js_dataflow_test.go @@ -21,117 +21,117 @@ func TestDataFlowAnalysis(t *testing.T) { // - Update test-cases to handle dataflow implementation instead of sql-injection cases. // - Add other edge-cases including function blocks, Class Blocks etc. - // t.Run("variable_data_flow", func(t *testing.T) { - // source := ` - // var a = 10 - // function f(x){ - // a = x * 2 - // return a; - // } - - // f(a) - // console.log(a) - // ` - // parseResult := parseJsCode(t, []byte(source)) + t.Run("variable_data_flow", func(t *testing.T) { + source := ` + var a = 10 + function f(x){ + a = x * 2 + return a; + } - // pass := &ana.Pass{ - // Analyzer: DataFlowAnalyzer, - // FileContext: parseResult, - // } + f(a) + console.log(a) + ` + parseResult := parseJsCode(t, []byte(source)) - // dfgStruct, err := createDataFlowGraph(pass) - // assert.NoError(t, err) + pass := &ana.Pass{ + Analyzer: DataFlowAnalyzer, + FileContext: parseResult, + } - // dfg := dfgStruct.(*DataFlowGraph) + dfgStruct, err := createDataFlowGraph(pass) + assert.NoError(t, err) - // flowGraph := dfg.Graph - // assert.NotNil(t, flowGraph) + dfg := dfgStruct.(*DataFlowGraph) - // scopeTree := dfg.ScopeTree - // assert.NotNil(t, scopeTree) + flowGraph := dfg.Graph + assert.NotNil(t, flowGraph) - // aVar := scopeTree.Root.Children[0].Lookup("a") - // assert.NotNil(t, aVar) + scopeTree := dfg.ScopeTree + assert.NotNil(t, scopeTree) - // funcVar := scopeTree.Root.Children[0].Lookup("f") - // assert.NotNil(t, funcVar) + aVar := scopeTree.Root.Children[0].Lookup("a") + assert.NotNil(t, aVar) - // funcScope := scopeTree.GetScope(funcVar.DeclNode.ChildByFieldName("body")) - // assert.NotNil(t, funcScope) + funcVar := scopeTree.Root.Children[0].Lookup("f") + assert.NotNil(t, funcVar) - // aFuncVar := funcScope.Lookup("a") - // assert.NotNil(t, aFuncVar) + funcScope := scopeTree.GetScope(funcVar.DeclNode.ChildByFieldName("body")) + assert.NotNil(t, funcScope) - // // no re-assignment of a inside the function body. - // assert.Equal(t, aVar, aFuncVar) + aFuncVar := funcScope.Lookup("a") + assert.NotNil(t, aFuncVar) - // }) + // no re-assignment of a inside the function body. + assert.Equal(t, aVar, aFuncVar) - // t.Run("variable_data_flow_through_multiple_functions", func(t *testing.T) { - // source := ` - // var a = 10 - // function f(x){ - // a = x * 2 - // return a; - // } - - // f(a) - // console.log(a) - - // function g(x) { - // var a = x * 2; - // return a; - // } - - // g(a) - // console.log(a) - // ` - // parseResult := parseJsCode(t, []byte(source)) + }) - // pass := &ana.Pass{ - // Analyzer: DataFlowAnalyzer, - // FileContext: parseResult, - // } + t.Run("variable_data_flow_through_multiple_functions", func(t *testing.T) { + source := ` + var a = 10 + function f(x){ + a = x * 2 + return a; + } - // dfgStruct, err := createDataFlowGraph(pass) - // assert.NoError(t, err) + f(a) + console.log(a) - // dfg := dfgStruct.(*DataFlowGraph) + function g(x) { + var a = x * 2; + return a; + } - // flowGraph := dfg.Graph - // assert.NotNil(t, flowGraph) + g(a) + console.log(a) + ` + parseResult := parseJsCode(t, []byte(source)) - // scopeTree := dfg.ScopeTree - // assert.NotNil(t, scopeTree) + pass := &ana.Pass{ + Analyzer: DataFlowAnalyzer, + FileContext: parseResult, + } + + dfgStruct, err := createDataFlowGraph(pass) + assert.NoError(t, err) - // aVar := scopeTree.Root.Children[0].Lookup("a") - // assert.NotNil(t, aVar) + dfg := dfgStruct.(*DataFlowGraph) - // f1Var := scopeTree.Root.Children[0].Lookup("f") - // assert.NotNil(t, f1Var) + flowGraph := dfg.Graph + assert.NotNil(t, flowGraph) - // f1Scope := scopeTree.GetScope(f1Var.DeclNode.ChildByFieldName("body")) - // assert.NotNil(t, f1Scope) + scopeTree := dfg.ScopeTree + assert.NotNil(t, scopeTree) - // aF1Var := f1Scope.Lookup("a") - // assert.NotNil(t, aF1Var) + aVar := scopeTree.Root.Children[0].Lookup("a") + assert.NotNil(t, aVar) - // // no re-assignment of a inside the function body. - // assert.Equal(t, aVar, aF1Var) + f1Var := scopeTree.Root.Children[0].Lookup("f") + assert.NotNil(t, f1Var) - // f2Var := scopeTree.Root.Children[0].Lookup("g") - // assert.NotNil(t, f2Var) + f1Scope := scopeTree.GetScope(f1Var.DeclNode.ChildByFieldName("body")) + assert.NotNil(t, f1Scope) - // f2Scope := scopeTree.GetScope(f2Var.DeclNode.ChildByFieldName("body")) - // assert.NotNil(t, f2Scope) + aF1Var := f1Scope.Lookup("a") + assert.NotNil(t, aF1Var) - // aF2Var := f2Scope.Lookup("a") - // assert.NotNil(t, aF2Var) + // no re-assignment of a inside the function body. + assert.Equal(t, aVar, aF1Var) - // // reassignment of `a` inside of g(x) causes it to be a different variable inside aF2Var - // assert.NotEqual(t, aVar, aF2Var) + f2Var := scopeTree.Root.Children[0].Lookup("g") + assert.NotNil(t, f2Var) - // }) + f2Scope := scopeTree.GetScope(f2Var.DeclNode.ChildByFieldName("body")) + assert.NotNil(t, f2Scope) + + aF2Var := f2Scope.Lookup("a") + assert.NotNil(t, aF2Var) + + // reassignment of `a` inside of g(x) causes it to be a different variable inside aF2Var + assert.NotEqual(t, aVar, aF2Var) + + }) t.Run("variable_assignment_data_flow", func(t *testing.T) { // Taint Logic not implemented after refactoring the data_flow_analyzer. @@ -157,9 +157,6 @@ func TestDataFlowAnalysis(t *testing.T) { scopeTree := dfg.ScopeTree assert.NotNil(t, scopeTree) - t.Log(scopeTree.Root.Children[0].Variables) - t.Logf("+++++++%v++++++++\n", dfg.Graph) - // for variable, node := range dfg.Graph { // t.Logf("Variable in graph: %s, Kind: %v\n", variable.Name, variable.Kind) // t.Logf("Node details: %+v\n", node) @@ -210,3 +207,71 @@ func TestClassDataFlow(t *testing.T) { assert.Greater(t, len(classDef[classVar].Properties), 0) } + +func TestCallExp(t *testing.T) { + t.Run("function_call_expression", func(t *testing.T) { + source := ` + const input = req.query.input; + var y = Function("foo", "bar",` + "`return ${input}(a,b)`" + `) + var x = new Function("bar", input) + ` + + parseResult := parseJsCode(t, []byte(source)) + pass := &ana.Pass{ + Analyzer: DataFlowAnalyzer, + FileContext: parseResult, + } + + dfgStruct, err := createDataFlowGraph(pass) + assert.NoError(t, err) + dfg := dfgStruct.(*DataFlowGraph) + scopeTree := dfg.ScopeTree + assert.NotNil(t, scopeTree) + flowGraph := dfg.Graph + assert.NotNil(t, flowGraph) + + input := scopeTree.Root.Children[0].Lookup("input") + assert.NotNil(t, input) + + y := scopeTree.Root.Children[0].Lookup("y") + assert.NotNil(t, y) + assert.Contains(t, flowGraph[y].Sources, flowGraph[input]) + + }) + + // t.Run("call_expressions_without_definition", func(t *testing.T) { + // source := ` + // const input = req.query.input; + // eval(input); + // eval("alert");` + + // parseResult := parseJsCode(t, []byte(source)) + // pass := &ana.Pass{ + // Analyzer: DataFlowAnalyzer, + // FileContext: parseResult, + // } + + // dfgStruct, err := createDataFlowGraph(pass) + // assert.NoError(t, err) + // assert.NotNil(t, dfgStruct) + // dfg := dfgStruct.(*DataFlowGraph) + // scopeTree := dfg.ScopeTree + // assert.NotNil(t, scopeTree) + // flowGraph := dfg.Graph + // assert.NotNil(t, flowGraph) + // inputVar := scopeTree.Root.Children[0].Lookup("input") + // assert.NotNil(t, inputVar) + // dfVar := flowGraph[inputVar] + + // assert.NotNil(t, dfVar, "node for input should exist in dfg") + + // callVar := scopeTree.Root.Children[0].Lookup("eval") + // assert.NotNil(t, callVar) + // assert.NotNil(t, flowGraph[callVar], "node for eval should exist in dfg") + // sources := flowGraph[callVar].Sources + // assert.Greater(t, len(sources), 0) + // assert.Contains(t, sources, dfVar, "input should be a source for setTimeout") + + // }) + +} diff --git a/checkers/javascript/testdata/eval_express.test.js b/checkers/javascript/testdata/eval_express.test.js new file mode 100644 index 00000000..eeb8ea08 --- /dev/null +++ b/checkers/javascript/testdata/eval_express.test.js @@ -0,0 +1,26 @@ +const input = req.query.input; + +// ok +eval("alert"); + +// +eval(input); + +// +var x = new Function("a", "b", `return ${input}(a,b)`); + +// +var y = Function("a", "b", input); + +setTimeout(() => { + // ok + console.log("Delayed for 1 second." + input); +}, 1000); + +setTimeout(function () { + // ok + console.log("Delayed for 1 second." + input); +}, 1000); + +// +setTimeout("console.log(" + input + ")", 1000);