|
| 1 | +/** |
| 2 | + * Module for parsing access paths from MaD models, both the identifying access path used |
| 3 | + * by dynamic languages, and the input/output specifications for summary steps. |
| 4 | + * |
| 5 | + * This file is used by the shared data flow library and by the JavaScript libraries |
| 6 | + * (which does not use the shared data flow libraries). |
| 7 | + */ |
| 8 | + |
| 9 | +/** |
| 10 | + * Convenience-predicate for extracting two capture groups at once. |
| 11 | + */ |
| 12 | +bindingset[input, regexp] |
| 13 | +private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { |
| 14 | + capture1 = input.regexpCapture(regexp, 1) and |
| 15 | + capture2 = input.regexpCapture(regexp, 2) |
| 16 | +} |
| 17 | + |
| 18 | +/** Companion module to the `AccessPath` class. */ |
| 19 | +module AccessPath { |
| 20 | + /** A string that should be parsed as an access path. */ |
| 21 | + abstract class Range extends string { |
| 22 | + bindingset[this] |
| 23 | + Range() { any() } |
| 24 | + } |
| 25 | + |
| 26 | + /** |
| 27 | + * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value |
| 28 | + * of the constant or any value contained in the interval. |
| 29 | + */ |
| 30 | + bindingset[arg] |
| 31 | + int parseInt(string arg) { |
| 32 | + result = arg.toInt() |
| 33 | + or |
| 34 | + // Match "n1..n2" |
| 35 | + exists(string lo, string hi | |
| 36 | + regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and |
| 37 | + result = [lo.toInt() .. hi.toInt()] |
| 38 | + ) |
| 39 | + } |
| 40 | + |
| 41 | + /** |
| 42 | + * Parses a lower-bounded interval `n..` and gets the lower bound. |
| 43 | + */ |
| 44 | + bindingset[arg] |
| 45 | + int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } |
| 46 | + |
| 47 | + /** |
| 48 | + * Parses an integer constant or interval (bounded or unbounded) that explicitly |
| 49 | + * references the arity, such as `N-1` or `N-3..N-1`. |
| 50 | + * |
| 51 | + * Note that expressions of form `N-x` will never resolve to a negative index, |
| 52 | + * even if `N` is zero (it will have no result in that case). |
| 53 | + */ |
| 54 | + bindingset[arg, arity] |
| 55 | + private int parseIntWithExplicitArity(string arg, int arity) { |
| 56 | + result >= 0 and // do not allow N-1 to resolve to a negative index |
| 57 | + exists(string lo | |
| 58 | + // N-x |
| 59 | + lo = arg.regexpCapture("N-(\\d+)", 1) and |
| 60 | + result = arity - lo.toInt() |
| 61 | + or |
| 62 | + // N-x.. |
| 63 | + lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and |
| 64 | + result = [arity - lo.toInt(), arity - 1] |
| 65 | + ) |
| 66 | + or |
| 67 | + exists(string lo, string hi | |
| 68 | + // x..N-y |
| 69 | + regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and |
| 70 | + result = [lo.toInt() .. arity - hi.toInt()] |
| 71 | + or |
| 72 | + // N-x..N-y |
| 73 | + regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and |
| 74 | + result = [arity - lo.toInt() .. arity - hi.toInt()] and |
| 75 | + result >= 0 |
| 76 | + or |
| 77 | + // N-x..y |
| 78 | + regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and |
| 79 | + result = [arity - lo.toInt() .. hi.toInt()] and |
| 80 | + result >= 0 |
| 81 | + ) |
| 82 | + } |
| 83 | + |
| 84 | + /** |
| 85 | + * Parses an integer constant or interval (bounded or unbounded) and gets any |
| 86 | + * of the integers contained within (of which there may be infinitely many). |
| 87 | + * |
| 88 | + * Has no result for arguments involving an explicit arity, such as `N-1`. |
| 89 | + */ |
| 90 | + bindingset[arg, result] |
| 91 | + int parseIntUnbounded(string arg) { |
| 92 | + result = parseInt(arg) |
| 93 | + or |
| 94 | + result >= parseLowerBound(arg) |
| 95 | + } |
| 96 | + |
| 97 | + /** |
| 98 | + * Parses an integer constant or interval (bounded or unbounded) that |
| 99 | + * may reference the arity of a call, such as `N-1` or `N-3..N-1`. |
| 100 | + * |
| 101 | + * Note that expressions of form `N-x` will never resolve to a negative index, |
| 102 | + * even if `N` is zero (it will have no result in that case). |
| 103 | + */ |
| 104 | + bindingset[arg, arity] |
| 105 | + int parseIntWithArity(string arg, int arity) { |
| 106 | + result = parseInt(arg) |
| 107 | + or |
| 108 | + result in [parseLowerBound(arg) .. arity - 1] |
| 109 | + or |
| 110 | + result = parseIntWithExplicitArity(arg, arity) |
| 111 | + } |
| 112 | +} |
| 113 | + |
| 114 | +/** Gets the `n`th token on the access path as a string. */ |
| 115 | +private string getRawToken(AccessPath path, int n) { |
| 116 | + // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. |
| 117 | + // Instead use regexpFind to match valid tokens, and supplement with a final length |
| 118 | + // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. |
| 119 | + result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) |
| 120 | +} |
| 121 | + |
| 122 | +/** |
| 123 | + * A string that occurs as an access path (either identifying or input/output spec) |
| 124 | + * which might be relevant for this database. |
| 125 | + */ |
| 126 | +class AccessPath extends string instanceof AccessPath::Range { |
| 127 | + /** Holds if this string is not a syntactically valid access path. */ |
| 128 | + predicate hasSyntaxError() { |
| 129 | + // If the lengths match, all characters must haven been included in a token |
| 130 | + // or seen by the `.` lookahead pattern. |
| 131 | + this != "" and |
| 132 | + not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 |
| 133 | + } |
| 134 | + |
| 135 | + /** Gets the `n`th token on the access path (if there are no syntax errors). */ |
| 136 | + AccessPathToken getToken(int n) { |
| 137 | + result = getRawToken(this, n) and |
| 138 | + not this.hasSyntaxError() |
| 139 | + } |
| 140 | + |
| 141 | + /** Gets the number of tokens on the path (if there are no syntax errors). */ |
| 142 | + int getNumToken() { |
| 143 | + result = count(int n | exists(getRawToken(this, n))) and |
| 144 | + not this.hasSyntaxError() |
| 145 | + } |
| 146 | +} |
| 147 | + |
| 148 | +/** |
| 149 | + * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. |
| 150 | + */ |
| 151 | +class AccessPathToken extends string { |
| 152 | + AccessPathToken() { this = getRawToken(_, _) } |
| 153 | + |
| 154 | + private string getPart(int part) { |
| 155 | + result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) |
| 156 | + } |
| 157 | + |
| 158 | + /** Gets the name of the token, such as `Member` from `Member[x]` */ |
| 159 | + string getName() { result = this.getPart(1) } |
| 160 | + |
| 161 | + /** |
| 162 | + * Gets the argument list, such as `1,2` from `Member[1,2]`, |
| 163 | + * or has no result if there are no arguments. |
| 164 | + */ |
| 165 | + string getArgumentList() { result = this.getPart(2) } |
| 166 | + |
| 167 | + /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ |
| 168 | + string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } |
| 169 | + |
| 170 | + /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ |
| 171 | + pragma[nomagic] |
| 172 | + string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } |
| 173 | + |
| 174 | + /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ |
| 175 | + string getAnArgument() { result = this.getArgument(_) } |
| 176 | + |
| 177 | + /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ |
| 178 | + string getAnArgument(string name) { result = this.getArgument(name, _) } |
| 179 | + |
| 180 | + /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ |
| 181 | + int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } |
| 182 | +} |
0 commit comments