Skip to content

Commit 4fbd806

Browse files
committed
Copy AccessPathSyntax.qll to dataflow pack
1 parent 7819dcf commit 4fbd806

File tree

1 file changed

+182
-0
lines changed

1 file changed

+182
-0
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/**
2+
* Module for parsing access paths from MaD models, both the identifying access path used
3+
* by dynamic languages, and the input/output specifications for summary steps.
4+
*
5+
* This file is used by the shared data flow library and by the JavaScript libraries
6+
* (which does not use the shared data flow libraries).
7+
*/
8+
9+
/**
10+
* Convenience-predicate for extracting two capture groups at once.
11+
*/
12+
bindingset[input, regexp]
13+
private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) {
14+
capture1 = input.regexpCapture(regexp, 1) and
15+
capture2 = input.regexpCapture(regexp, 2)
16+
}
17+
18+
/** Companion module to the `AccessPath` class. */
19+
module AccessPath {
20+
/** A string that should be parsed as an access path. */
21+
abstract class Range extends string {
22+
bindingset[this]
23+
Range() { any() }
24+
}
25+
26+
/**
27+
* Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value
28+
* of the constant or any value contained in the interval.
29+
*/
30+
bindingset[arg]
31+
int parseInt(string arg) {
32+
result = arg.toInt()
33+
or
34+
// Match "n1..n2"
35+
exists(string lo, string hi |
36+
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and
37+
result = [lo.toInt() .. hi.toInt()]
38+
)
39+
}
40+
41+
/**
42+
* Parses a lower-bounded interval `n..` and gets the lower bound.
43+
*/
44+
bindingset[arg]
45+
int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() }
46+
47+
/**
48+
* Parses an integer constant or interval (bounded or unbounded) that explicitly
49+
* references the arity, such as `N-1` or `N-3..N-1`.
50+
*
51+
* Note that expressions of form `N-x` will never resolve to a negative index,
52+
* even if `N` is zero (it will have no result in that case).
53+
*/
54+
bindingset[arg, arity]
55+
private int parseIntWithExplicitArity(string arg, int arity) {
56+
result >= 0 and // do not allow N-1 to resolve to a negative index
57+
exists(string lo |
58+
// N-x
59+
lo = arg.regexpCapture("N-(\\d+)", 1) and
60+
result = arity - lo.toInt()
61+
or
62+
// N-x..
63+
lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and
64+
result = [arity - lo.toInt(), arity - 1]
65+
)
66+
or
67+
exists(string lo, string hi |
68+
// x..N-y
69+
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and
70+
result = [lo.toInt() .. arity - hi.toInt()]
71+
or
72+
// N-x..N-y
73+
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and
74+
result = [arity - lo.toInt() .. arity - hi.toInt()] and
75+
result >= 0
76+
or
77+
// N-x..y
78+
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and
79+
result = [arity - lo.toInt() .. hi.toInt()] and
80+
result >= 0
81+
)
82+
}
83+
84+
/**
85+
* Parses an integer constant or interval (bounded or unbounded) and gets any
86+
* of the integers contained within (of which there may be infinitely many).
87+
*
88+
* Has no result for arguments involving an explicit arity, such as `N-1`.
89+
*/
90+
bindingset[arg, result]
91+
int parseIntUnbounded(string arg) {
92+
result = parseInt(arg)
93+
or
94+
result >= parseLowerBound(arg)
95+
}
96+
97+
/**
98+
* Parses an integer constant or interval (bounded or unbounded) that
99+
* may reference the arity of a call, such as `N-1` or `N-3..N-1`.
100+
*
101+
* Note that expressions of form `N-x` will never resolve to a negative index,
102+
* even if `N` is zero (it will have no result in that case).
103+
*/
104+
bindingset[arg, arity]
105+
int parseIntWithArity(string arg, int arity) {
106+
result = parseInt(arg)
107+
or
108+
result in [parseLowerBound(arg) .. arity - 1]
109+
or
110+
result = parseIntWithExplicitArity(arg, arity)
111+
}
112+
}
113+
114+
/** Gets the `n`th token on the access path as a string. */
115+
private string getRawToken(AccessPath path, int n) {
116+
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
117+
// Instead use regexpFind to match valid tokens, and supplement with a final length
118+
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
119+
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
120+
}
121+
122+
/**
123+
* A string that occurs as an access path (either identifying or input/output spec)
124+
* which might be relevant for this database.
125+
*/
126+
class AccessPath extends string instanceof AccessPath::Range {
127+
/** Holds if this string is not a syntactically valid access path. */
128+
predicate hasSyntaxError() {
129+
// If the lengths match, all characters must haven been included in a token
130+
// or seen by the `.` lookahead pattern.
131+
this != "" and
132+
not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1
133+
}
134+
135+
/** Gets the `n`th token on the access path (if there are no syntax errors). */
136+
AccessPathToken getToken(int n) {
137+
result = getRawToken(this, n) and
138+
not this.hasSyntaxError()
139+
}
140+
141+
/** Gets the number of tokens on the path (if there are no syntax errors). */
142+
int getNumToken() {
143+
result = count(int n | exists(getRawToken(this, n))) and
144+
not this.hasSyntaxError()
145+
}
146+
}
147+
148+
/**
149+
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
150+
*/
151+
class AccessPathToken extends string {
152+
AccessPathToken() { this = getRawToken(_, _) }
153+
154+
private string getPart(int part) {
155+
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
156+
}
157+
158+
/** Gets the name of the token, such as `Member` from `Member[x]` */
159+
string getName() { result = this.getPart(1) }
160+
161+
/**
162+
* Gets the argument list, such as `1,2` from `Member[1,2]`,
163+
* or has no result if there are no arguments.
164+
*/
165+
string getArgumentList() { result = this.getPart(2) }
166+
167+
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
168+
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
169+
170+
/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
171+
pragma[nomagic]
172+
string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) }
173+
174+
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
175+
string getAnArgument() { result = this.getArgument(_) }
176+
177+
/** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
178+
string getAnArgument(string name) { result = this.getArgument(name, _) }
179+
180+
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
181+
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
182+
}

0 commit comments

Comments
 (0)