Skip to content

Commit 3e9c190

Browse files
author
Alvaro Muñoz
committed
Improve bash and source regexpps
1 parent c6e3baf commit 3e9c190

File tree

17 files changed

+820
-437
lines changed

17 files changed

+820
-437
lines changed

ql/lib/codeql/actions/Helper.qll

Lines changed: 169 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -10,148 +10,202 @@ string normalizeExpr(string expr) {
1010
}
1111

1212
bindingset[regex]
13-
string wrapRegexp(string regex) {
14-
result =
15-
[
16-
"\\b" + regex + "\\b", "fromJSON\\(\\s*" + regex + "\\s*\\)",
17-
"toJSON\\(\\s*" + regex + "\\s*\\)"
18-
]
13+
string wrapRegexp(string regex) { result = "\\b" + regex + "\\b" }
14+
15+
bindingset[regex]
16+
string wrapJsonRegexp(string regex) {
17+
result = ["fromJSON\\(\\s*" + regex + "\\s*\\)", "toJSON\\(\\s*" + regex + "\\s*\\)"]
1918
}
2019

2120
bindingset[str]
2221
private string trimQuotes(string str) {
2322
result = str.trim().regexpReplaceAll("^(\"|')", "").regexpReplaceAll("(\"|')$", "")
2423
}
2524

26-
bindingset[line, var]
27-
predicate extractLineAssignment(string line, string var, string key, string value) {
28-
exists(string assignment |
29-
// single line assignment
30-
assignment =
31-
line.regexpCapture("(echo|Write-Output)\\s+(.*)>>\\s*(\"|')?\\$(\\{)?GITHUB_" +
32-
var.toUpperCase() + "(\\})?(\"|')?", 2) and
33-
count(assignment.splitAt("=")) = 2 and
34-
key = trimQuotes(assignment.splitAt("=", 0)) and
35-
value = trimQuotes(assignment.splitAt("=", 1))
25+
/** Checks if expr is a bash parameter expansion */
26+
bindingset[expr]
27+
predicate isBashParameterExpansion(string expr, string parameter, string operator, string params) {
28+
exists(string regexp |
29+
// $VAR
30+
regexp = "\\$([a-zA-Z_][a-zA-Z0-9_]+)\\b" and
31+
parameter = expr.regexpCapture(regexp, 1) and
32+
operator = "" and
33+
params = ""
34+
or
35+
// ${VAR}
36+
regexp = "\\$\\{([a-zA-Z_][a-zA-Z0-9_]*)\\}" and
37+
parameter = expr.regexpCapture(regexp, 1) and
38+
operator = "" and
39+
params = ""
3640
or
37-
// workflow command assignment
38-
assignment =
39-
line.regexpCapture("(echo|Write-Output)\\s+(\"|')?::set-" + var.toLowerCase() +
40-
"\\s+name=(.*)(\"|')?", 3).regexpReplaceAll("^\"", "").regexpReplaceAll("\"$", "") and
41-
key = trimQuotes(assignment.splitAt("::", 0)) and
42-
value = trimQuotes(assignment.splitAt("::", 1))
41+
// ${!VAR}
42+
regexp = "\\$\\{([!#])([a-zA-Z_][a-zA-Z0-9_]*)\\}" and
43+
parameter = expr.regexpCapture(regexp, 2) and
44+
operator = expr.regexpCapture(regexp, 1) and
45+
params = ""
46+
or
47+
// ${VAR<OP><PARAMS>}, ...
48+
regexp = "\\$\\{([a-zA-Z_][a-zA-Z0-9_]*)([#%/:^,\\-+]{1,2})?(.*?)\\}" and
49+
parameter = expr.regexpCapture(regexp, 1) and
50+
operator = expr.regexpCapture(regexp, 2) and
51+
params = expr.regexpCapture(regexp, 3)
4352
)
4453
}
4554

46-
bindingset[var]
47-
private string multilineAssignmentRegex(string var) {
48-
// eg:
49-
// echo "PR_TITLE<<EOF" >> $GITHUB_ENV
50-
// echo "$TITLE" >> $GITHUB_ENV
51-
// echo "EOF" >> $GITHUB_ENV
52-
result =
53-
".*(echo|Write-Output)\\s+(.*)<<[\\-]*\\s*([A-Z]*)EOF(.+)(echo|Write-Output)\\s+(\"|')?([A-Z]*)EOF(\"|')?\\s*>>\\s*(\"|')?\\$(\\{)?GITHUB_"
54-
+ var.toUpperCase() + "(\\})?(\"|')?.*"
55-
}
56-
57-
bindingset[var]
58-
private string multilineBlockAssignmentRegex(string var) {
59-
// eg:
60-
// {
61-
// echo 'JSON_RESPONSE<<EOF'
62-
// echo "$TITLE" >> "$GITHUB_ENV"
63-
// echo EOF
64-
// } >> "$GITHUB_ENV"
65-
result =
66-
".*\\{(\\s|::NEW_LINE::)*(echo|Write-Output)\\s+(.*)<<[\\-]*\\s*([A-Z]*)EOF(.+)(echo|Write-Output)\\s+(\"|')?([A-Z]*)EOF(\"|')?(\\s|::NEW_LINE::)*\\}\\s*>>\\s*(\"|')?\\$(\\{)?GITHUB_"
67-
+ var.toUpperCase() + "(\\})?(\"|')?.*"
55+
// TODO, the followinr test fails
56+
bindingset[raw_content]
57+
predicate extractVariableAndValue(string raw_content, string key, string value) {
58+
exists(string regexp, string content | content = trimQuotes(raw_content) |
59+
regexp = "(?msi).*^([a-zA-Z_][a-zA-Z0-9_]*)\\s*<<\\s*['\"]?(\\S+)['\"]?\\s*\n(.*?)\n\\2\\s*$" and
60+
key = trimQuotes(content.regexpCapture(regexp, 1)) and
61+
value = trimQuotes(content.regexpCapture(regexp, 3))
62+
or
63+
exists(string line |
64+
line = content.splitAt("\n") and
65+
regexp = "(?i)^([a-zA-Z_][a-zA-Z0-9_\\-]*)\\s*=\\s*(.*)$" and
66+
key = trimQuotes(line.regexpCapture(regexp, 1)) and
67+
value = trimQuotes(line.regexpCapture(regexp, 2))
68+
)
69+
)
6870
}
6971

70-
bindingset[var]
71-
private string multilineHereDocAssignmentRegex(string var) {
72-
// eg:
73-
// cat <<-EOF >> "$GITHUB_ENV"
74-
// echo "FOO=$TITLE"
75-
// EOF
76-
result =
77-
".*cat\\s*<<[\\-]*\\s*[A-Z]*EOF\\s*>>\\s*[\"']*\\$[\\{]*GITHUB_.*" + var.toUpperCase() +
78-
"[\\}]*[\"']*.*(echo|Write-Output)\\s+([^=]+)=(.*)::NEW_LINE::.*EOF.*"
72+
bindingset[script]
73+
predicate singleLineFileWrite(string script, string cmd, string file, string content, string filters) {
74+
exists(string regexp |
75+
regexp = "(?i)(echo|write-output)\\s*(.*?)\\s*(>>|>)\\s*(\\S+)" and
76+
cmd = script.regexpCapture(regexp, 1) and
77+
file = trimQuotes(script.regexpCapture(regexp, 4)) and
78+
filters = "" and
79+
content = script.regexpCapture(regexp, 2)
80+
)
7981
}
8082

81-
bindingset[script, var]
82-
predicate extractMultilineAssignment(string script, string var, string key, string value) {
83-
// multiline assignment
84-
exists(string flattenedScript |
85-
flattenedScript = script.replaceAll("\n", "::NEW_LINE::") and
86-
value =
87-
"$(" +
88-
trimQuotes(flattenedScript.regexpCapture(multilineAssignmentRegex(var), 4))
89-
.regexpReplaceAll("\\s*>>\\s*(\"|')?\\$(\\{)?GITHUB_" + var.toUpperCase() +
90-
"(\\})?(\"|')?", "")
91-
.replaceAll("::NEW_LINE::", "\n")
92-
.trim()
93-
.splitAt("\n") + ")" and
94-
key = trimQuotes(flattenedScript.regexpCapture(multilineAssignmentRegex(var), 2))
83+
bindingset[script]
84+
predicate singleLineWorkflowCmd(string script, string cmd, string key, string value) {
85+
exists(string regexp |
86+
regexp = "(?i)(echo|write-output)\\s*(['|\"])?::(set-[a-z]+)\\s*name\\s*=\\s*(.*?)::(.*)" and
87+
cmd = script.regexpCapture(regexp, 3) and
88+
key = script.regexpCapture(regexp, 4) and
89+
value = trimQuotes(script.regexpCapture(regexp, 5))
90+
or
91+
regexp = "(?i)(echo|write-output)\\s*(['|\"])?::(add-[a-z]+)\\s*::(.*)" and
92+
cmd = script.regexpCapture(regexp, 3) and
93+
key = "" and
94+
value = trimQuotes(script.regexpCapture(regexp, 4))
9595
)
96-
or
97-
// multiline block assignment
98-
exists(string flattenedScript |
99-
flattenedScript = script.replaceAll("\n", "::NEW_LINE::") and
100-
value =
101-
"$(" +
102-
trimQuotes(flattenedScript.regexpCapture(multilineBlockAssignmentRegex(var), 5))
103-
.regexpReplaceAll("\\s*>>\\s*(\"|')?\\$(\\{)?GITHUB_" + var.toUpperCase() +
104-
"(\\})?(\"|')?", "")
105-
.replaceAll("::NEW_LINE::", "\n")
106-
.trim()
107-
.splitAt("\n") + ")" and
108-
key = trimQuotes(flattenedScript.regexpCapture(multilineBlockAssignmentRegex(var), 3))
96+
}
97+
98+
bindingset[script]
99+
predicate heredocFileWrite(string script, string cmd, string file, string content, string filters) {
100+
exists(string regexp |
101+
regexp = "(?msi).*^(cat)\\s*(>>|>)\\s*(\\S+)\\s*<<\\s*['\"]?(\\S+)['\"]?\\s*\n(.*?)\n\\4\\s*$.*" and
102+
cmd = script.regexpCapture(regexp, 1) and
103+
file = trimQuotes(script.regexpCapture(regexp, 3)) and
104+
content = script.regexpCapture(regexp, 5) and
105+
filters = ""
106+
or
107+
regexp =
108+
"(?msi).*^(cat)\\s*(<<|<)\\s*[-]?['\"]?(\\S+)['\"]?\\s*([^>]*)(>>|>)\\s*(\\S+)\\s*\n(.*?)\n\\3\\s*$.*" and
109+
cmd = script.regexpCapture(regexp, 1) and
110+
file = trimQuotes(script.regexpCapture(regexp, 6)) and
111+
filters = script.regexpCapture(regexp, 4) and
112+
content = script.regexpCapture(regexp, 7)
109113
)
110-
or
111-
// multiline heredoc assignment
112-
exists(string flattenedScript |
113-
flattenedScript = script.replaceAll("\n", "::NEW_LINE::") and
114-
value =
115-
trimQuotes(flattenedScript.regexpCapture(multilineHereDocAssignmentRegex(var), 3))
116-
.regexpReplaceAll("\\s*>>\\s*(\"|')?\\$(\\{)?GITHUB_" + var.toUpperCase() +
117-
"(\\})?(\"|')?", "")
118-
.replaceAll("::NEW_LINE::", "\n")
119-
.trim()
120-
.splitAt("\n") and
121-
key = trimQuotes(flattenedScript.regexpCapture(multilineHereDocAssignmentRegex(var), 2))
114+
}
115+
116+
bindingset[script]
117+
predicate linesFileWrite(string script, string cmd, string file, string content, string filters) {
118+
exists(string regexp |
119+
regexp =
120+
"(?msi).*(echo\\s+['|\"]?(.*?<<(\\S+))['|\"]?\\s*>>\\s*(\\S+)\\s*[\r\n]+)" +
121+
"(((.*?)\\s*>>\\s*\\S+\\s*[\r\n]+)+)" +
122+
"(echo\\s+['|\"]?(EOF)['|\"]?\\s*>>\\s*\\S+\\s*[\r\n]*).*" and
123+
content =
124+
trimQuotes(script.regexpCapture(regexp, 2)) + "\n" + "$(" +
125+
trimQuotes(script.regexpCapture(regexp, 5)) +
126+
// TODO: there are some >> $GITHUB_ENV, >> $GITHUB_OUTPUT, >> "$GITHUB_ENV" lefotvers in content
127+
//.regexpReplaceAll("\\s*(>|>>)\\s*\\$[{]*" + file + "(.*?)[}]*", "")
128+
")\n" + trimQuotes(script.regexpCapture(regexp, 3)) and
129+
cmd = "echo" and
130+
file = trimQuotes(script.regexpCapture(regexp, 4)) and
131+
filters = ""
122132
)
123133
}
124134

125-
bindingset[line]
126-
predicate extractPathAssignment(string line, string value) {
127-
exists(string path |
128-
// single path assignment
129-
path =
130-
line.regexpCapture("(echo|Write-Output)\\s+(.*)>>\\s*(\"|')?\\$(\\{)?GITHUB_PATH(\\})?(\"|')?",
131-
2) and
132-
value = trimQuotes(path)
133-
or
134-
// workflow command assignment
135-
path =
136-
line.regexpCapture("(echo|Write-Output)\\s+(\"|')?::add-path::(.*)(\"|')?", 3)
137-
.regexpReplaceAll("^\"", "")
138-
.regexpReplaceAll("\"$", "") and
139-
value = trimQuotes(path)
135+
bindingset[script]
136+
predicate blockFileWrite(string script, string cmd, string file, string content, string filters) {
137+
exists(string regexp |
138+
regexp =
139+
"(?msi).*^\\s*\\{\\s*[\r\n]" +
140+
//
141+
"(.*?)" +
142+
//
143+
"(\\s*\\}\\s*(>>|>)\\s*(\\S+))\\s*$.*" and
144+
content =
145+
script
146+
.regexpCapture(regexp, 1)
147+
.regexpReplaceAll("(?m)^[ ]*echo\\s*['\"](.*?)['\"]", "$1")
148+
.regexpReplaceAll("(?m)^[ ]*echo\\s*", "") and
149+
file = trimQuotes(script.regexpCapture(regexp, 4)) and
150+
cmd = "echo" and
151+
filters = ""
152+
)
153+
}
154+
155+
bindingset[script]
156+
predicate multiLineFileWrite(string script, string cmd, string file, string content, string filters) {
157+
heredocFileWrite(script, cmd, file, content, filters)
158+
or
159+
linesFileWrite(script, cmd, file, content, filters)
160+
or
161+
blockFileWrite(script, cmd, file, content, filters)
162+
}
163+
164+
bindingset[script, file_var]
165+
predicate extractFileWrite(string script, string file_var, string content) {
166+
// single line assignment
167+
exists(string file_expr, string raw_content |
168+
isBashParameterExpansion(file_expr, file_var, _, _) and
169+
singleLineFileWrite(script.splitAt("\n"), _, file_expr, raw_content, _) and
170+
content = trimQuotes(raw_content)
171+
)
172+
or
173+
// workflow command assignment
174+
exists(string key, string value, string cmd |
175+
(
176+
file_var = "GITHUB_ENV" and
177+
cmd = "set-env" and
178+
content = key + "=" + value
179+
or
180+
file_var = "GITHUB_OUTPUT" and
181+
cmd = "set-output" and
182+
content = key + "=" + value
183+
or
184+
file_var = "GITHUB_PATH" and
185+
cmd = "add-path" and
186+
content = value
187+
) and
188+
singleLineWorkflowCmd(script.splitAt("\n"), cmd, key, value)
189+
)
190+
or
191+
// multiline assignment
192+
exists(string file_expr, string raw_content |
193+
multiLineFileWrite(script, _, file_expr, raw_content, _) and
194+
isBashParameterExpansion(file_expr, file_var, _, _) and
195+
content = trimQuotes(raw_content)
140196
)
141197
}
142198

143-
predicate writeToGitHubEnv(Run run, string key, string value) {
144-
extractLineAssignment(run.getScript().splitAt("\n"), "ENV", key, value) or
145-
extractMultilineAssignment(run.getScript(), "ENV", key, value)
199+
predicate writeToGitHubEnv(Run run, string content) {
200+
extractFileWrite(run.getScript(), "GITHUB_ENV", content)
146201
}
147202

148-
predicate writeToGitHubOutput(Run run, string key, string value) {
149-
extractLineAssignment(run.getScript().splitAt("\n"), "OUTPUT", key, value) or
150-
extractMultilineAssignment(run.getScript(), "OUTPUT", key, value)
203+
predicate writeToGitHubOutput(Run run, string content) {
204+
extractFileWrite(run.getScript(), "GITHUB_OUTPUT", content)
151205
}
152206

153-
predicate writeToGitHubPath(Run run, string value) {
154-
extractPathAssignment(run.getScript().splitAt("\n"), value)
207+
predicate writeToGitHubPath(Run run, string content) {
208+
extractFileWrite(run.getScript(), "GITHUB_PATH", content)
155209
}
156210

157211
predicate inPrivilegedCompositeAction(AstNode node) {

ql/lib/codeql/actions/ast/internal/Ast.qll

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -749,12 +749,19 @@ class JobImpl extends AstNodeImpl, TJobNode {
749749
/** Holds if the job can be triggered by an external actor. */
750750
predicate isExternallyTriggerable() {
751751
// the job is triggered by an event that can be triggered externally
752-
externallyTriggerableEventsDataModel(this.getATriggerEvent().getName()) or
752+
externallyTriggerableEventsDataModel(this.getATriggerEvent().getName())
753+
or
753754
// the job is triggered by a workflow_call event that can be triggered externally
754755
this.getATriggerEvent().getName() = "workflow_call" and
755-
(exists(ExpressionImpl e, string external_trigger | e.getEnclosingJob() = this and e.getExpression().matches("%github.event" + external_trigger + "%") and externallyTriggerableEventsDataModel(external_trigger))
756-
or
757-
this.getEnclosingWorkflow().(ReusableWorkflowImpl).getACaller().isExternallyTriggerable())
756+
(
757+
exists(ExpressionImpl e, string external_trigger |
758+
e.getEnclosingJob() = this and
759+
e.getExpression().matches("%github.event" + external_trigger + "%") and
760+
externallyTriggerableEventsDataModel(external_trigger)
761+
)
762+
or
763+
this.getEnclosingWorkflow().(ReusableWorkflowImpl).getACaller().isExternallyTriggerable()
764+
)
758765
}
759766

760767
/** Holds if the job is privileged. */
@@ -781,9 +788,9 @@ class JobImpl extends AstNodeImpl, TJobNode {
781788
private predicate hasExplicitSecretAccess() {
782789
// the job accesses a secret other than GITHUB_TOKEN
783790
exists(SecretsExpressionImpl expr |
784-
(expr.getEnclosingJob() = this or not exists(expr.getEnclosingJob())) and
791+
(expr.getEnclosingJob() = this or not exists(expr.getEnclosingJob())) and
785792
expr.getEnclosingWorkflow() = this.getEnclosingWorkflow() and
786-
not expr.getFieldName() = "GITHUB_TOKEN"
793+
not expr.getFieldName() = "GITHUB_TOKEN"
787794
)
788795
}
789796

@@ -814,7 +821,7 @@ class JobImpl extends AstNodeImpl, TJobNode {
814821
// the Job is triggered by an event other than `pull_request`
815822
count(this.getATriggerEvent()) = 1 and
816823
not this.getATriggerEvent().getName() = "pull_request" and
817-
not this.getATriggerEvent().getName() = "workflow_call"
824+
not this.getATriggerEvent().getName() = "workflow_call"
818825
or
819826
// the Workflow is a Reusable Workflow only and there is
820827
// a privileged caller workflow or we cant find a caller

0 commit comments

Comments
 (0)