Skip to content

Commit 8f2c7c1

Browse files
committed
Rust: parse formatting templates
1 parent e920a4c commit 8f2c7c1

File tree

7 files changed

+487
-4
lines changed

7 files changed

+487
-4
lines changed

rust/ql/.generated.list

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/ql/.gitattributes

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/**
2+
* This module provides the classes modeling formatting templates. See also https://doc.rust-lang.org/std/fmt
3+
*/
4+
5+
private import FormatArgsExpr
6+
private import LiteralExpr
7+
8+
/**
9+
* A regular expression for matching format elements in a formatting template. The
10+
* regular expression is generated from the following python code:
11+
*
12+
* ```python
13+
* identifier = "([A-Za-z_][A-Za-z0-9_]*)"
14+
* integer = "([0-9]+)"
15+
*
16+
* # argument := integer | identifier
17+
* argument = "({integer}|{identifier})".format(integer=integer, identifier=identifier)
18+
*
19+
* # parameter := argument '$'
20+
* parameter = "(({argument})\\$)".format(argument=argument)
21+
*
22+
* # count := parameter | integer
23+
* count = "({parameter}|{integer})".format(integer=integer, parameter=parameter)
24+
*
25+
* # fill := character
26+
* fill = "(.)"
27+
*
28+
* # align := '<' | '^' | '>'
29+
* align = "([<^>])"
30+
*
31+
* # sign := '+' | '-'
32+
* sign = "([+-])"
33+
*
34+
* # width := count
35+
* width = count
36+
*
37+
* # precision := count | '*'
38+
* precision = "({count}|(\\*))".format(count=count)
39+
*
40+
* # type := '' | '?' | 'x?' | 'X?' | identifier
41+
* type = "(|\\?|x\\?|X\\?|{identifier})".format(identifier=identifier)
42+
*
43+
* # format_spec := [[fill]align][sign]['#']['0'][width]['.' precision]type
44+
* format_spec = "({fill}?{align})?{sign}?(#)?(0)?{width}?(\\.{precision})?{type}".format(fill=fill, align=align, sign=sign, width=width, precision=precision, type=type)
45+
*
46+
* # format := '{' [ argument ] [ ':' format_spec ] [ ws ] * '}'
47+
* format = "(\\{{{argument}?(:{format_spec})?\s*}\\})".format(argument=argument, format_spec=format_spec)
48+
*
49+
* ```
50+
*/
51+
private string formatRegex() {
52+
result =
53+
"(\\{(([0-9]+)|([A-Za-z_][A-Za-z0-9_]*))?(:((.)?([<^>]))?([+-])?(#)?(0)?(((([0-9]+)|([A-Za-z_][A-Za-z0-9_]*))\\$)|([0-9]+))?(\\.((((([0-9]+)|([A-Za-z_][A-Za-z0-9_]*))\\$)|([0-9]+))|(\\*)))?(|\\?|x\\?|X\\?|([A-Za-z_][A-Za-z0-9_]*)))?\\s*\\})"
54+
}
55+
56+
private string textRegex() { result = "([^{}]|\\{\\{|\\}\\})+" }
57+
58+
private string part(FormatArgsExpr parent, int occurrenceIndex, int occurrenceOffset) {
59+
result =
60+
parent
61+
.getTemplate()
62+
.(LiteralExpr)
63+
.getTextValue()
64+
// TODO: should also handle surrounding quotes and escaped characters
65+
.regexpFind(textRegex() + "|" + formatRegex(), occurrenceIndex, occurrenceOffset)
66+
}
67+
68+
private newtype TFormatTemplateElem =
69+
TFormat(FormatArgsExpr parent, string text, int index, int offset) {
70+
text = part(parent, index, offset) and text.regexpMatch(formatRegex())
71+
}
72+
73+
private newtype TFormatArgumentKind =
74+
TElement() or
75+
TWidth() or
76+
TPrecision()
77+
78+
private newtype TFormatArgumentT =
79+
TFormatArgument(
80+
TFormat parent, TFormatArgumentKind kind, string value, boolean positional, int offset
81+
) {
82+
exists(string text, int formatOffset, int group |
83+
group = [3, 4] and offset = formatOffset + 1 and kind = TElement()
84+
or
85+
group = [15, 16] and
86+
offset = formatOffset + min(text.indexOf(value + "$")) and
87+
kind = TWidth()
88+
or
89+
group = [23, 24] and
90+
offset = formatOffset + max(text.indexOf(value + "$")) and
91+
kind = TPrecision()
92+
|
93+
parent = TFormat(_, text, _, formatOffset) and
94+
value = text.regexpCapture(formatRegex(), group) and
95+
if group % 2 = 1 then positional = true else positional = false
96+
)
97+
}
98+
99+
/**
100+
* A format element in a formatting template. For example the `{}` in:
101+
* ```rust
102+
* println!("Hello {}", "world");
103+
* ```
104+
*/
105+
class Format extends TFormat {
106+
private FormatArgsExpr parent;
107+
private string text;
108+
private int index;
109+
private int offset;
110+
111+
Format() { this = TFormat(parent, text, index, offset) }
112+
113+
/** Gets a textual representation of this element. */
114+
string toString() { result = text }
115+
116+
/**
117+
* Holds if this element is at the specified location.
118+
* The location spans column `startcolumn` of line `startline` to
119+
* column `endcolumn` of line `endline` in file `filepath`.
120+
* For more information, see
121+
* [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
122+
*/
123+
predicate hasLocationInfo(
124+
string filepath, int startline, int startcolumn, int endline, int endcolumn
125+
) {
126+
parent
127+
.getTemplate()
128+
.getLocation()
129+
.hasLocationInfo(filepath, startline, startcolumn - offset, _, _) and
130+
endline = startline and
131+
endcolumn = startcolumn + text.length() - 1
132+
}
133+
134+
/** Gets a the parent of this `Format`. */
135+
FormatArgsExpr getParent() { result = parent }
136+
137+
/** Gets the index of this `Format` node. */
138+
int getIndex() { result = index }
139+
140+
/**
141+
* Gets the name or position reference of this format, if any. For example `name` and `0` in:
142+
* ```rust
143+
* let name = "Alice";
144+
* println!("{name} in wonderland");
145+
* println!("{0} in wonderland", name);
146+
* ```
147+
*/
148+
FormatArgument getArgumentRef() {
149+
result.getParent() = this and result = TFormatArgument(_, TElement(), _, _, _)
150+
}
151+
152+
/**
153+
* Gets the name or position reference of the width parameter in this format, if any. For example `width` and `1` in:
154+
* ```rust
155+
* let width = 6;
156+
* println!("{:width$}", PI);
157+
* println!("{:1$}", PI, width);
158+
* ```
159+
*/
160+
FormatArgument getWidthArgument() {
161+
result.getParent() = this and result = TFormatArgument(_, TWidth(), _, _, _)
162+
}
163+
164+
/**
165+
* Gets the name or position reference of the width parameter in this format, if any. For example `prec` and `1` in:
166+
* ```rust
167+
* let prec = 6;
168+
* println!("{:.prec$}", PI);
169+
* println!("{:.1$}", PI, prec);
170+
* ```
171+
*/
172+
FormatArgument getPrecisionArgument() {
173+
result.getParent() = this and result = TFormatArgument(_, TPrecision(), _, _, _)
174+
}
175+
}
176+
177+
/**
178+
* An argument in a format element in a formatting template. For example the `width`, `precision`, and `value` in:
179+
* ```rust
180+
* println!("Value {value:#width$.precision$}");
181+
* ```
182+
* or the `0`, `1` and `2` in:
183+
* ```rust
184+
* println!("Value {0:#1$.2$}", value, width, precision);
185+
* ```
186+
*/
187+
class FormatArgument extends TFormatArgumentT {
188+
private Format parent;
189+
string name;
190+
private int offset;
191+
192+
FormatArgument() { this = TFormatArgument(parent, _, name, _, offset) }
193+
194+
/** Gets a textual representation of this element. */
195+
string toString() { result = name }
196+
197+
/**
198+
* Holds if this element is at the specified location.
199+
* The location spans column `startcolumn` of line `startline` to
200+
* column `endcolumn` of line `endline` in file `filepath`.
201+
* For more information, see
202+
* [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
203+
*/
204+
predicate hasLocationInfo(
205+
string filepath, int startline, int startcolumn, int endline, int endcolumn
206+
) {
207+
// TODO: handle locations in multi-line comments
208+
// TODO: handle the case where the template is from a nested macro call
209+
parent
210+
.getParent()
211+
.getTemplate()
212+
.getLocation()
213+
.hasLocationInfo(filepath, startline, startcolumn - offset, _, _) and
214+
endline = startline and
215+
endcolumn = startcolumn + name.length() - 1
216+
}
217+
218+
/** Gets a the parent of this `FormatArgument`. */
219+
Format getParent() { result = parent }
220+
}
221+
222+
/**
223+
* A positional `FormatArgument`. For example `0` in
224+
* ```rust
225+
* let name = "Alice";
226+
* println!("{0} in wonderland", name);
227+
* ```
228+
*/
229+
class PositionalFormatArgument extends FormatArgument {
230+
PositionalFormatArgument() { this = TFormatArgument(_, _, _, true, _) }
231+
232+
/** Gets the index of this positional argument */
233+
int getIndex() { result = name.toInt() }
234+
}
235+
236+
/**
237+
* A named `FormatArgument`. For example `name` in
238+
* ```rust
239+
* let name = "Alice";
240+
* println!("{name} in wonderland");
241+
* ```
242+
*/
243+
class NamedFormatArgument extends FormatArgument {
244+
NamedFormatArgument() { this = TFormatArgument(_, _, _, false, _) }
245+
246+
/** Gets the name of this named argument */
247+
string getName() { result = name }
248+
}
Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,31 @@
1-
// generated by codegen, remove this comment if you wish to edit this file
21
/**
32
* This module provides a hand-modifiable wrapper around the generated class `FormatArgsExpr`.
43
*
54
* INTERNAL: Do not use.
65
*/
76

87
private import codeql.rust.elements.internal.generated.FormatArgsExpr
8+
private import codeql.rust.elements.FormatTemplate
99

1010
/**
1111
* INTERNAL: This module contains the customizable definition of `FormatArgsExpr` and should not
1212
* be referenced directly.
1313
*/
1414
module Impl {
15+
// the following QLdoc is generated: if you need to edit it, do it in the schema file
1516
/**
1617
* A FormatArgsExpr. For example:
1718
* ```rust
1819
* todo!()
1920
* ```
2021
*/
21-
class FormatArgsExpr extends Generated::FormatArgsExpr { }
22+
class FormatArgsExpr extends Generated::FormatArgsExpr {
23+
/**
24+
* Gets the `index`th format of this `FormatArgsExpr`'s formatting template (0-based).
25+
*/
26+
Format getFormat(int index) {
27+
result =
28+
rank[index + 1](Format f, int i | f.getParent() = this and f.getIndex() = i | f order by i)
29+
}
30+
}
2231
}

0 commit comments

Comments
 (0)