Skip to content

Commit fb4458d

Browse files
committed
checkers(python): add checkers to detect event tainted HTML response in AWS Lambda handler functions
Signed-off-by: Maharshi Basu <basumaharshi10@gmail.com>
1 parent 17d8c32 commit fb4458d

File tree

3 files changed

+328
-5
lines changed

3 files changed

+328
-5
lines changed

checkers/checker.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ func LoadCustomYamlCheckers(dir string) (map[analysis.Language][]analysis.YamlCh
5858
return checkersMap, err
5959
}
6060

61+
type Analyzer struct {
62+
TestDir string
63+
Analyzers []*goAnalysis.Analyzer
64+
}
65+
6166
func LoadGoCheckers() []*goAnalysis.Analyzer {
6267
analyzers := []*goAnalysis.Analyzer{}
6368

@@ -67,11 +72,6 @@ func LoadGoCheckers() []*goAnalysis.Analyzer {
6772
return analyzers
6873
}
6974

70-
type Analyzer struct {
71-
TestDir string
72-
Analyzers []*goAnalysis.Analyzer
73-
}
74-
7575
func RunAnalyzerTests(analyzerRegistry []Analyzer) (bool, []error) {
7676
passed := true
7777
errors := []error{}
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
package python
2+
3+
import (
4+
"strings"
5+
6+
sitter "github.com/smacker/go-tree-sitter"
7+
"globstar.dev/analysis"
8+
)
9+
10+
var AwsLambdaTaintedHtmlResponse *analysis.Analyzer = &analysis.Analyzer{
11+
Name: "aws-lambda-tainted-html-resp",
12+
Language: analysis.LangPy,
13+
Description: "Directly inserting user input into HTML can create cross-site scripting (XSS) vulnerabilities. This potentially allows attackers to steal sensitive user data. Always use secure HTML rendering methods instead of manual HTML construction.",
14+
Category: analysis.CategorySecurity,
15+
Severity: analysis.SeverityWarning,
16+
Run: checkAwsLambdaTaintedHtmlResponse,
17+
}
18+
19+
func checkAwsLambdaTaintedHtmlResponse(pass *analysis.Pass) (interface{}, error) {
20+
htmlVarMap := make(map[string]bool)
21+
eventVarMap := make(map[string]bool)
22+
intermVarMap := make(map[string]bool)
23+
24+
// first pass: check for variable storing html string
25+
analysis.Preorder(pass, func(node *sitter.Node) {
26+
if node.Type() != "assignment" {
27+
return
28+
}
29+
30+
leftNode := node.ChildByFieldName("left")
31+
rightNode := node.ChildByFieldName("right")
32+
33+
if rightNode == nil {
34+
return
35+
}
36+
37+
// f-strings will not be detected here
38+
if rightNode.Type() == "string" && rightNode.Content(pass.FileContext.Source)[0] == 'f' {
39+
return
40+
}
41+
42+
if rightNode.Type() == "string" {
43+
htmlVarMap[leftNode.Content(pass.FileContext.Source)] = true
44+
}
45+
46+
if isEventSubscript(rightNode, pass.FileContext.Source) {
47+
eventVarMap[leftNode.Content(pass.FileContext.Source)] = true
48+
}
49+
})
50+
51+
// second pass: check for intermediate variables with string formatting
52+
analysis.Preorder(pass, func(node *sitter.Node) {
53+
if node.Type() != "assignment" {
54+
return
55+
}
56+
57+
leftNode := node.ChildByFieldName("left")
58+
rightNode := node.ChildByFieldName("right")
59+
60+
if rightNode == nil {
61+
return
62+
}
63+
64+
if isTaintedArg(rightNode, pass.FileContext.Source, htmlVarMap, eventVarMap, intermVarMap) {
65+
intermVarMap[leftNode.Content(pass.FileContext.Source)] = true
66+
}
67+
})
68+
69+
// detect insecure html response in an assignment
70+
analysis.Preorder(pass, func(node *sitter.Node) {
71+
if node.Type() != "assignment" {
72+
return
73+
}
74+
75+
// leftNode := node.ChildByFieldName("left")
76+
rightNode := node.ChildByFieldName("right")
77+
78+
if rightNode == nil {
79+
return
80+
}
81+
82+
if rightNode.Type() != "dictionary" {
83+
return
84+
}
85+
86+
allDictPairNodes := getNamedChildren(rightNode, 0)
87+
88+
for _, pairNode := range allDictPairNodes {
89+
if isDangerousPair(pairNode, pass.FileContext.Source, htmlVarMap, eventVarMap, intermVarMap) {
90+
pass.Report(pass, pairNode, "Detected potential XSS vulnerability - avoid direct user input in HTML response")
91+
}
92+
}
93+
})
94+
95+
// detected insecure html response in return statement
96+
analysis.Preorder(pass, func(node *sitter.Node) {
97+
if node.Type() != "return_statement" {
98+
return
99+
}
100+
101+
dictionaryNode := node.NamedChild(0)
102+
if dictionaryNode.Type() != "dictionary" {
103+
return
104+
}
105+
106+
allDictPairNodes := getNamedChildren(dictionaryNode, 0)
107+
108+
for _, pairNode := range allDictPairNodes {
109+
if isDangerousPair(pairNode, pass.FileContext.Source, htmlVarMap, eventVarMap, intermVarMap) {
110+
pass.Report(pass, pairNode, "Detected potential XSS vulnerability - avoid direct user input in HTML response")
111+
}
112+
}
113+
})
114+
115+
return nil, nil
116+
}
117+
118+
func isDangerousPair(node *sitter.Node, source []byte, htmlVarMap, eventVarMap, intermVarMap map[string]bool) bool {
119+
if node.Type() != "pair" {
120+
return false
121+
}
122+
keyNode := node.ChildByFieldName("key")
123+
if keyNode.Type() != "string" || trimQuotes(keyNode.Content(source)) != "body" {
124+
return false
125+
}
126+
127+
valueNode := node.ChildByFieldName("value")
128+
if isEventSubscript(valueNode, source) {
129+
return true
130+
}
131+
132+
return isEventSubscript(valueNode, source) || isTaintedArg(valueNode, source, htmlVarMap, eventVarMap, intermVarMap)
133+
134+
}
135+
136+
func isTaintedArg(arg *sitter.Node, source []byte, htmlVarMap, eventVarMap, intermVarMap map[string]bool) bool {
137+
switch arg.Type() {
138+
case "call":
139+
funcAttrNode := arg.ChildByFieldName("function")
140+
if funcAttrNode.Type() != "attribute" {
141+
return false
142+
}
143+
144+
funcNameObjectNode := funcAttrNode.ChildByFieldName("object")
145+
if funcNameObjectNode.Type() != "identifier" && funcNameObjectNode.Type() != "string" {
146+
return false
147+
}
148+
149+
ishtml := false
150+
for codevar := range htmlVarMap {
151+
if codevar == funcNameObjectNode.Content(source) {
152+
ishtml = true
153+
}
154+
}
155+
156+
// this check only applies when the function object is not a string
157+
if !ishtml && funcNameObjectNode.Type() == "identifier" {
158+
return false
159+
}
160+
161+
funcNameAttrVar := funcAttrNode.ChildByFieldName("attribute")
162+
if funcNameAttrVar.Type() != "identifier" && funcNameAttrVar.Content(source) != "format" {
163+
return false
164+
}
165+
166+
argsListNode := arg.ChildByFieldName("arguments")
167+
if argsListNode.Type() != "argument_list" {
168+
return false
169+
}
170+
171+
argsNode := getNamedChildren(argsListNode, 0)
172+
for _, callArg := range argsNode {
173+
if isEventSubscript(callArg, source) {
174+
return true
175+
}
176+
177+
if callArg.Type() == "identifier" {
178+
for eventvar := range eventVarMap {
179+
if eventvar == callArg.Content(source) {
180+
return true
181+
}
182+
}
183+
}
184+
}
185+
186+
case "binary_operator":
187+
leftNode := arg.ChildByFieldName("left")
188+
rightNode := arg.ChildByFieldName("right")
189+
190+
if leftNode.Type() != "identifier" {
191+
return false
192+
}
193+
194+
iscode := false
195+
for codevar := range htmlVarMap {
196+
if codevar == leftNode.Content(source) {
197+
iscode = true
198+
}
199+
}
200+
201+
if !iscode {
202+
return false
203+
}
204+
205+
if isEventSubscript(rightNode, source) {
206+
return true
207+
}
208+
209+
return eventVarMap[rightNode.Content(source)]
210+
211+
case "string":
212+
if arg.Content(source)[0] != 'f' {
213+
return false
214+
}
215+
stringChildrenNodes := getNamedChildren(arg, 0)
216+
for _, strchild := range stringChildrenNodes {
217+
if strchild.Type() == "interpolation" {
218+
exprNode := strchild.ChildByFieldName("expression")
219+
220+
if exprNode.Type() == "subscript" {
221+
return isEventSubscript(exprNode, source)
222+
} else if exprNode.Type() == "identifier" {
223+
return intermVarMap[exprNode.Content(source)]
224+
} else {
225+
return false
226+
}
227+
}
228+
}
229+
230+
case "identifier":
231+
return intermVarMap[arg.Content(source)] || eventVarMap[arg.Content(source)]
232+
233+
}
234+
235+
return false
236+
}
237+
238+
239+
func isEventSubscript(node *sitter.Node, source []byte) bool {
240+
if node.Type() != "subscript" {
241+
return false
242+
}
243+
244+
valueNode := node.ChildByFieldName("value")
245+
if valueNode.Type() != "identifier" {
246+
return false
247+
}
248+
249+
eventIdentifier := valueNode.Content(source)
250+
return strings.Contains(eventIdentifier, "event")
251+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
def lambda_handler1(event, context):
2+
eventarg = event['input']
3+
html3 = "<div> {} </div>"
4+
html1 = f"<div>{event['input']}</div>"
5+
html2 = "<div> %s </div".format(eventarg)
6+
interm3 = html3.format(eventarg)
7+
foo = {
8+
# <no-error>
9+
"data": event['foo']
10+
}
11+
bar(foo)
12+
13+
result = {
14+
"statusCode": 200,
15+
# <expect-error>
16+
"body": html1,
17+
"headers": {
18+
"Content-Type": "text/html"
19+
}
20+
}
21+
22+
result = {
23+
"statusCode": 200,
24+
# <expect-error>
25+
"body": html2,
26+
"headers": {
27+
"Content-Type": "text/html"
28+
}
29+
}
30+
31+
result = {
32+
"statusCode": 200,
33+
# <expect-error>
34+
"body": eventarg,
35+
"headers": {
36+
"Content-Type": "text/html"
37+
}
38+
}
39+
40+
result = {
41+
"statusCode": 200,
42+
# <expect-error>
43+
"body": interm3,
44+
"headers": {
45+
"Content-Type": "text/html"
46+
}
47+
}
48+
49+
result = {
50+
"statusCode": 200,
51+
# <expect-error>
52+
"body": event['url'],
53+
"headers": {
54+
"Content-Type": "text/html"
55+
}
56+
}
57+
return result
58+
59+
60+
def handler(event, context):
61+
eventarg = event['input']
62+
html = "<div> %s </div>"
63+
interm = html.format(eventarg)
64+
65+
return {
66+
"statusCode": 200,
67+
# <expect-error>
68+
"body": interm,
69+
"headers": {
70+
"Content-Type": "text/html"
71+
}
72+
}

0 commit comments

Comments
 (0)