Skip to content

Commit cb098df

Browse files
authored
Merge pull request #7334 from github/hmac/regexp-interpolations
Ruby: Resolve simple string interpolations
2 parents dfbde23 + f02aeaf commit cb098df

File tree

20 files changed

+2622
-1540
lines changed

20 files changed

+2622
-1540
lines changed

ruby/ql/lib/codeql/ruby/ast/Literal.qll

Lines changed: 85 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,14 @@ private import codeql.ruby.security.performance.RegExpTreeView as RETV
33
private import internal.AST
44
private import internal.Scope
55
private import internal.TreeSitter
6+
private import codeql.ruby.controlflow.CfgNodes
67

78
/**
89
* A literal.
910
*
1011
* This is the QL root class for all literals.
1112
*/
12-
class Literal extends Expr, TLiteral {
13-
/**
14-
* Gets the source text for this literal, if this is a simple literal.
15-
*
16-
* For complex literals, such as arrays, hashes, and strings with
17-
* interpolations, this predicate has no result.
18-
*/
19-
override string getValueText() { none() }
20-
}
13+
class Literal extends Expr, TLiteral { }
2114

2215
/**
2316
* A numeric literal, i.e. an integer, floating-point, rational, or complex
@@ -281,10 +274,10 @@ class StringComponent extends AstNode, TStringComponent {
281274
* "foo#{ bar() } baz"
282275
* ```
283276
*/
284-
class StringTextComponent extends StringComponent, TStringTextComponent {
277+
class StringTextComponent extends StringComponent, TStringTextComponentNonRegexp {
285278
private Ruby::Token g;
286279

287-
StringTextComponent() { this = TStringTextComponent(g) }
280+
StringTextComponent() { this = TStringTextComponentNonRegexp(g) }
288281

289282
final override string toString() { result = g.getValue() }
290283

@@ -296,10 +289,10 @@ class StringTextComponent extends StringComponent, TStringTextComponent {
296289
/**
297290
* An escape sequence component of a string or string-like literal.
298291
*/
299-
class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequenceComponent {
292+
class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequenceComponentNonRegexp {
300293
private Ruby::EscapeSequence g;
301294

302-
StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponent(g) }
295+
StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponentNonRegexp(g) }
303296

304297
final override string toString() { result = g.getValue() }
305298

@@ -312,10 +305,10 @@ class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequen
312305
* An interpolation expression component of a string or string-like literal.
313306
*/
314307
class StringInterpolationComponent extends StringComponent, StmtSequence,
315-
TStringInterpolationComponent {
308+
TStringInterpolationComponentNonRegexp {
316309
private Ruby::Interpolation g;
317310

318-
StringInterpolationComponent() { this = TStringInterpolationComponent(g) }
311+
StringInterpolationComponent() { this = TStringInterpolationComponentNonRegexp(g) }
319312

320313
final override string toString() { result = "#{...}" }
321314

@@ -326,6 +319,83 @@ class StringInterpolationComponent extends StringComponent, StmtSequence,
326319
final override string getAPrimaryQlClass() { result = "StringInterpolationComponent" }
327320
}
328321

322+
private class TRegExpComponent =
323+
TStringTextComponentRegexp or TStringEscapeSequenceComponentRegexp or
324+
TStringInterpolationComponentRegexp;
325+
326+
/**
327+
* The base class for a component of a regular expression literal.
328+
*/
329+
class RegExpComponent extends AstNode, TRegExpComponent {
330+
/** Gets the source text for this regex component, if any. */
331+
string getValueText() { none() }
332+
}
333+
334+
/**
335+
* A component of a regex literal that is simply text.
336+
*
337+
* For example, the following regex literals all contain `RegExpTextComponent`
338+
* components whose `getValueText()` returns `"foo"`:
339+
*
340+
* ```rb
341+
* 'foo'
342+
* "#{ bar() }foo"
343+
* "foo#{ bar() } baz"
344+
* ```
345+
*/
346+
class RegExpTextComponent extends RegExpComponent, TStringTextComponentRegexp {
347+
private Ruby::Token g;
348+
349+
RegExpTextComponent() { this = TStringTextComponentRegexp(g) }
350+
351+
final override string toString() { result = g.getValue() }
352+
353+
// Exclude components that are children of a free-spacing regex.
354+
// We do this because `ParseRegExp.qll` cannot handle free-spacing regexes.
355+
final override string getValueText() {
356+
not this.getParent().(RegExpLiteral).hasFreeSpacingFlag() and result = g.getValue()
357+
}
358+
359+
final override string getAPrimaryQlClass() { result = "RegExpTextComponent" }
360+
}
361+
362+
/**
363+
* An escape sequence component of a regex literal.
364+
*/
365+
class RegExpEscapeSequenceComponent extends RegExpComponent, TStringEscapeSequenceComponentRegexp {
366+
private Ruby::EscapeSequence g;
367+
368+
RegExpEscapeSequenceComponent() { this = TStringEscapeSequenceComponentRegexp(g) }
369+
370+
final override string toString() { result = g.getValue() }
371+
372+
// Exclude components that are children of a free-spacing regex.
373+
// We do this because `ParseRegExp.qll` cannot handle free-spacing regexes.
374+
final override string getValueText() {
375+
not this.getParent().(RegExpLiteral).hasFreeSpacingFlag() and result = g.getValue()
376+
}
377+
378+
final override string getAPrimaryQlClass() { result = "RegExpEscapeSequenceComponent" }
379+
}
380+
381+
/**
382+
* An interpolation expression component of a regex literal.
383+
*/
384+
class RegExpInterpolationComponent extends RegExpComponent, StmtSequence,
385+
TStringInterpolationComponentRegexp {
386+
private Ruby::Interpolation g;
387+
388+
RegExpInterpolationComponent() { this = TStringInterpolationComponentRegexp(g) }
389+
390+
final override string toString() { result = "#{...}" }
391+
392+
final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
393+
394+
final override string getValueText() { none() }
395+
396+
final override string getAPrimaryQlClass() { result = "RegExpInterpolationComponent" }
397+
}
398+
329399
/**
330400
* A string, symbol, regexp, or subshell literal.
331401
*/
@@ -410,17 +480,6 @@ class StringlikeLiteral extends Literal, TStringlikeLiteral {
410480
result = ""
411481
}
412482

413-
override string getValueText() {
414-
// 0 components should result in the empty string
415-
// if there are any interpolations, there should be no result
416-
// otherwise, concatenate all the components
417-
forall(StringComponent c | c = this.getComponent(_) |
418-
not c instanceof StringInterpolationComponent
419-
) and
420-
result =
421-
concat(StringComponent c, int i | c = this.getComponent(i) | c.getValueText() order by i)
422-
}
423-
424483
override string toString() {
425484
exists(string full, string summary |
426485
full =

ruby/ql/lib/codeql/ruby/ast/internal/AST.qll

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,25 @@ private module Cached {
272272
TStmtSequenceSynth(AST::AstNode parent, int i) { mkSynthChild(StmtSequenceKind(), parent, i) } or
273273
TStringArrayLiteral(Ruby::StringArray g) or
274274
TStringConcatenation(Ruby::ChainedString g) or
275-
TStringEscapeSequenceComponent(Ruby::EscapeSequence g) or
276-
TStringInterpolationComponent(Ruby::Interpolation g) or
277-
TStringTextComponent(Ruby::Token g) {
278-
g instanceof Ruby::StringContent or g instanceof Ruby::HeredocContent
275+
TStringEscapeSequenceComponentNonRegexp(Ruby::EscapeSequence g) {
276+
not g.getParent() instanceof Ruby::Regex
277+
} or
278+
TStringEscapeSequenceComponentRegexp(Ruby::EscapeSequence g) {
279+
g.getParent() instanceof Ruby::Regex
280+
} or
281+
TStringInterpolationComponentNonRegexp(Ruby::Interpolation g) {
282+
not g.getParent() instanceof Ruby::Regex
283+
} or
284+
TStringInterpolationComponentRegexp(Ruby::Interpolation g) {
285+
g.getParent() instanceof Ruby::Regex
286+
} or
287+
TStringTextComponentNonRegexp(Ruby::Token g) {
288+
(g instanceof Ruby::StringContent or g instanceof Ruby::HeredocContent) and
289+
not g.getParent() instanceof Ruby::Regex
290+
} or
291+
TStringTextComponentRegexp(Ruby::Token g) {
292+
(g instanceof Ruby::StringContent or g instanceof Ruby::HeredocContent) and
293+
g.getParent() instanceof Ruby::Regex
279294
} or
280295
TSubExprReal(Ruby::Binary g) { g instanceof @ruby_binary_minus } or
281296
TSubExprSynth(AST::AstNode parent, int i) { mkSynthChild(SubExprKind(), parent, i) } or
@@ -489,9 +504,12 @@ private module Cached {
489504
n = TSplatParameter(result) or
490505
n = TStringArrayLiteral(result) or
491506
n = TStringConcatenation(result) or
492-
n = TStringEscapeSequenceComponent(result) or
493-
n = TStringInterpolationComponent(result) or
494-
n = TStringTextComponent(result) or
507+
n = TStringEscapeSequenceComponentNonRegexp(result) or
508+
n = TStringEscapeSequenceComponentRegexp(result) or
509+
n = TStringInterpolationComponentNonRegexp(result) or
510+
n = TStringInterpolationComponentRegexp(result) or
511+
n = TStringTextComponentNonRegexp(result) or
512+
n = TStringTextComponentRegexp(result) or
495513
n = TSubExprReal(result) or
496514
n = TSubshellLiteral(result) or
497515
n = TSymbolArrayLiteral(result) or
@@ -680,6 +698,14 @@ class TIntegerLiteral = TIntegerLiteralReal or TIntegerLiteralSynth;
680698

681699
class TBooleanLiteral = TTrueLiteral or TFalseLiteral;
682700

701+
class TStringTextComponent = TStringTextComponentNonRegexp or TStringTextComponentRegexp;
702+
703+
class TStringEscapeSequenceComponent =
704+
TStringEscapeSequenceComponentNonRegexp or TStringEscapeSequenceComponentRegexp;
705+
706+
class TStringInterpolationComponent =
707+
TStringInterpolationComponentNonRegexp or TStringInterpolationComponentRegexp;
708+
683709
class TStringComponent =
684710
TStringTextComponent or TStringEscapeSequenceComponent or TStringInterpolationComponent;
685711

0 commit comments

Comments
 (0)