1+ package python
2+
3+ import (
4+ "strings"
5+
6+ sitter "github.com/smacker/go-tree-sitter"
7+ "globstar.dev/analysis"
8+ )
9+
10+ var AwsLambdaTaintedHtmlResponse * analysis.Analyzer = & analysis.Analyzer {
11+ Name : "aws-lambda-tainted-html-resp" ,
12+ Language : analysis .LangPy ,
13+ Description : "Directly inserting user input into HTML can create cross-site scripting (XSS) vulnerabilities. This potentially allows attackers to steal sensitive user data. Always use secure HTML rendering methods instead of manual HTML construction." ,
14+ Category : analysis .CategorySecurity ,
15+ Severity : analysis .SeverityWarning ,
16+ Run : checkAwsLambdaTaintedHtmlResponse ,
17+ }
18+
19+ func checkAwsLambdaTaintedHtmlResponse (pass * analysis.Pass ) (interface {}, error ) {
20+ htmlVarMap := make (map [string ]bool )
21+ eventVarMap := make (map [string ]bool )
22+ intermVarMap := make (map [string ]bool )
23+
24+ // first pass: check for variable storing html string
25+ analysis .Preorder (pass , func (node * sitter.Node ) {
26+ if node .Type () != "assignment" {
27+ return
28+ }
29+
30+ leftNode := node .ChildByFieldName ("left" )
31+ rightNode := node .ChildByFieldName ("right" )
32+
33+ if rightNode == nil {
34+ return
35+ }
36+
37+ // f-strings will not be detected here
38+ if rightNode .Type () == "string" && rightNode .Content (pass .FileContext .Source )[0 ] == 'f' {
39+ return
40+ }
41+
42+ if rightNode .Type () == "string" {
43+ htmlVarMap [leftNode .Content (pass .FileContext .Source )] = true
44+ }
45+
46+ if isEventSubscript (rightNode , pass .FileContext .Source ) {
47+ eventVarMap [leftNode .Content (pass .FileContext .Source )] = true
48+ }
49+ })
50+
51+ // second pass: check for intermediate variables with string formatting
52+ analysis .Preorder (pass , func (node * sitter.Node ) {
53+ if node .Type () != "assignment" {
54+ return
55+ }
56+
57+ leftNode := node .ChildByFieldName ("left" )
58+ rightNode := node .ChildByFieldName ("right" )
59+
60+ if rightNode == nil {
61+ return
62+ }
63+
64+ if isTaintedArg (rightNode , pass .FileContext .Source , htmlVarMap , eventVarMap , intermVarMap ) {
65+ intermVarMap [leftNode .Content (pass .FileContext .Source )] = true
66+ }
67+ })
68+
69+ // detect insecure html response in an assignment
70+ analysis .Preorder (pass , func (node * sitter.Node ) {
71+ if node .Type () != "assignment" {
72+ return
73+ }
74+
75+ // leftNode := node.ChildByFieldName("left")
76+ rightNode := node .ChildByFieldName ("right" )
77+
78+ if rightNode == nil {
79+ return
80+ }
81+
82+ if rightNode .Type () != "dictionary" {
83+ return
84+ }
85+
86+ allDictPairNodes := getNamedChildren (rightNode , 0 )
87+
88+ for _ , pairNode := range allDictPairNodes {
89+ if isDangerousPair (pairNode , pass .FileContext .Source , htmlVarMap , eventVarMap , intermVarMap ) {
90+ pass .Report (pass , pairNode , "Detected potential XSS vulnerability - avoid direct user input in HTML response" )
91+ }
92+ }
93+ })
94+
95+ // detected insecure html response in return statement
96+ analysis .Preorder (pass , func (node * sitter.Node ) {
97+ if node .Type () != "return_statement" {
98+ return
99+ }
100+
101+ dictionaryNode := node .NamedChild (0 )
102+ if dictionaryNode .Type () != "dictionary" {
103+ return
104+ }
105+
106+ allDictPairNodes := getNamedChildren (dictionaryNode , 0 )
107+
108+ for _ , pairNode := range allDictPairNodes {
109+ if isDangerousPair (pairNode , pass .FileContext .Source , htmlVarMap , eventVarMap , intermVarMap ) {
110+ pass .Report (pass , pairNode , "Detected potential XSS vulnerability - avoid direct user input in HTML response" )
111+ }
112+ }
113+ })
114+
115+ return nil , nil
116+ }
117+
118+ func isDangerousPair (node * sitter.Node , source []byte , htmlVarMap , eventVarMap , intermVarMap map [string ]bool ) bool {
119+ if node .Type () != "pair" {
120+ return false
121+ }
122+ keyNode := node .ChildByFieldName ("key" )
123+ if keyNode .Type () != "string" || trimQuotes (keyNode .Content (source )) != "body" {
124+ return false
125+ }
126+
127+ valueNode := node .ChildByFieldName ("value" )
128+ if isEventSubscript (valueNode , source ) {
129+ return true
130+ }
131+
132+ return isEventSubscript (valueNode , source ) || isTaintedArg (valueNode , source , htmlVarMap , eventVarMap , intermVarMap )
133+
134+ }
135+
136+ func isTaintedArg (arg * sitter.Node , source []byte , htmlVarMap , eventVarMap , intermVarMap map [string ]bool ) bool {
137+ switch arg .Type () {
138+ case "call" :
139+ funcAttrNode := arg .ChildByFieldName ("function" )
140+ if funcAttrNode .Type () != "attribute" {
141+ return false
142+ }
143+
144+ funcNameObjectNode := funcAttrNode .ChildByFieldName ("object" )
145+ if funcNameObjectNode .Type () != "identifier" && funcNameObjectNode .Type () != "string" {
146+ return false
147+ }
148+
149+ ishtml := false
150+ for codevar := range htmlVarMap {
151+ if codevar == funcNameObjectNode .Content (source ) {
152+ ishtml = true
153+ }
154+ }
155+
156+ // this check only applies when the function object is not a string
157+ if ! ishtml && funcNameObjectNode .Type () == "identifier" {
158+ return false
159+ }
160+
161+ funcNameAttrVar := funcAttrNode .ChildByFieldName ("attribute" )
162+ if funcNameAttrVar .Type () != "identifier" && funcNameAttrVar .Content (source ) != "format" {
163+ return false
164+ }
165+
166+ argsListNode := arg .ChildByFieldName ("arguments" )
167+ if argsListNode .Type () != "argument_list" {
168+ return false
169+ }
170+
171+ argsNode := getNamedChildren (argsListNode , 0 )
172+ for _ , callArg := range argsNode {
173+ if isEventSubscript (callArg , source ) {
174+ return true
175+ }
176+
177+ if callArg .Type () == "identifier" {
178+ for eventvar := range eventVarMap {
179+ if eventvar == callArg .Content (source ) {
180+ return true
181+ }
182+ }
183+ }
184+ }
185+
186+ case "binary_operator" :
187+ leftNode := arg .ChildByFieldName ("left" )
188+ rightNode := arg .ChildByFieldName ("right" )
189+
190+ if leftNode .Type () != "identifier" {
191+ return false
192+ }
193+
194+ iscode := false
195+ for codevar := range htmlVarMap {
196+ if codevar == leftNode .Content (source ) {
197+ iscode = true
198+ }
199+ }
200+
201+ if ! iscode {
202+ return false
203+ }
204+
205+ if isEventSubscript (rightNode , source ) {
206+ return true
207+ }
208+
209+ return eventVarMap [rightNode .Content (source )]
210+
211+ case "string" :
212+ if arg .Content (source )[0 ] != 'f' {
213+ return false
214+ }
215+ stringChildrenNodes := getNamedChildren (arg , 0 )
216+ for _ , strchild := range stringChildrenNodes {
217+ if strchild .Type () == "interpolation" {
218+ exprNode := strchild .ChildByFieldName ("expression" )
219+
220+ if exprNode .Type () == "subscript" {
221+ return isEventSubscript (exprNode , source )
222+ } else if exprNode .Type () == "identifier" {
223+ return intermVarMap [exprNode .Content (source )]
224+ } else {
225+ return false
226+ }
227+ }
228+ }
229+
230+ case "identifier" :
231+ return intermVarMap [arg .Content (source )] || eventVarMap [arg .Content (source )]
232+
233+ }
234+
235+ return false
236+ }
237+
238+
239+ func isEventSubscript (node * sitter.Node , source []byte ) bool {
240+ if node .Type () != "subscript" {
241+ return false
242+ }
243+
244+ valueNode := node .ChildByFieldName ("value" )
245+ if valueNode .Type () != "identifier" {
246+ return false
247+ }
248+
249+ eventIdentifier := valueNode .Content (source )
250+ return strings .Contains (eventIdentifier , "event" )
251+ }
0 commit comments