Skip to content

Commit 4f7f924

Browse files
committed
Distinguish regex components from strings
Create a set of classes for components of regex literals, separate from those of string literals. This allows us to special-case components of free-spacing regexes (ones with the /x flag) to not have a `getValueText()`. This in turn is useful because our regex parser can't handle free-spacing regexes, so excluding them ensures that we don't generate erroneous ReDoS alerts.
1 parent ac9cac7 commit 4f7f924

File tree

7 files changed

+193
-78
lines changed

7 files changed

+193
-78
lines changed

ruby/ql/lib/codeql/ruby/ast/Literal.qll

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,9 @@ class StringComponent extends AstNode, TStringComponent {
277277
class StringTextComponent extends StringComponent, TStringTextComponent {
278278
private Ruby::Token g;
279279

280-
StringTextComponent() { this = TStringTextComponent(g) }
280+
StringTextComponent() {
281+
this = TStringTextComponent(g) and not g.getParent() instanceof Ruby::Regex
282+
}
281283

282284
final override string toString() { result = g.getValue() }
283285

@@ -292,7 +294,9 @@ class StringTextComponent extends StringComponent, TStringTextComponent {
292294
class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequenceComponent {
293295
private Ruby::EscapeSequence g;
294296

295-
StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponent(g) }
297+
StringEscapeSequenceComponent() {
298+
this = TStringEscapeSequenceComponent(g) and not g.getParent() instanceof Ruby::Regex
299+
}
296300

297301
final override string toString() { result = g.getValue() }
298302

@@ -308,7 +312,9 @@ class StringInterpolationComponent extends StringComponent, StmtSequence,
308312
TStringInterpolationComponent {
309313
private Ruby::Interpolation g;
310314

311-
StringInterpolationComponent() { this = TStringInterpolationComponent(g) }
315+
StringInterpolationComponent() {
316+
this = TStringInterpolationComponent(g) and not g.getParent() instanceof Ruby::Regex
317+
}
312318

313319
final override string toString() { result = "#{...}" }
314320

@@ -319,6 +325,82 @@ class StringInterpolationComponent extends StringComponent, StmtSequence,
319325
final override string getAPrimaryQlClass() { result = "StringInterpolationComponent" }
320326
}
321327

328+
/**
329+
* The base class for a component of a regular expression literal.
330+
*/
331+
class RegExpComponent extends AstNode, TStringComponent {
332+
private RegExpLiteral parent;
333+
334+
RegExpComponent() { toGenerated(this).getParent() = toGenerated(parent) }
335+
336+
string getValueText() { none() }
337+
}
338+
339+
/**
340+
* A component of a regex literal that is simply text.
341+
*
342+
* For example, the following regex literals all contain `RegExpTextComponent`
343+
* components whose `getValueText()` returns `"foo"`:
344+
*
345+
* ```rb
346+
* 'foo'
347+
* "#{ bar() }foo"
348+
* "foo#{ bar() } baz"
349+
* ```
350+
*/
351+
class RegExpTextComponent extends RegExpComponent, TStringTextComponent {
352+
private Ruby::Token g;
353+
354+
RegExpTextComponent() { this = TStringTextComponent(g) and g.getParent() instanceof Ruby::Regex }
355+
356+
final override string toString() { result = g.getValue() }
357+
358+
// Exclude components that are children of a free-spacing regex.
359+
// We do this because `ParseRegExp.qll` cannot handle free-spacing regexes.
360+
final override string getValueText() {
361+
not this.getParent().(RegExpLiteral).hasFreeSpacingFlag() and result = g.getValue()
362+
}
363+
364+
final override string getAPrimaryQlClass() { result = "RegExpTextComponent" }
365+
}
366+
367+
/**
368+
* An escape sequence component of a regex literal.
369+
*/
370+
class RegExpEscapeSequenceComponent extends RegExpComponent, TStringEscapeSequenceComponent {
371+
private Ruby::EscapeSequence g;
372+
373+
RegExpEscapeSequenceComponent() { this = TStringEscapeSequenceComponent(g) }
374+
375+
final override string toString() { result = g.getValue() }
376+
377+
// Exclude components that are children of a free-spacing regex.
378+
// We do this because `ParseRegExp.qll` cannot handle free-spacing regexes.
379+
final override string getValueText() {
380+
not this.getParent().(RegExpLiteral).hasFreeSpacingFlag() and result = g.getValue()
381+
}
382+
383+
final override string getAPrimaryQlClass() { result = "RegExpEscapeSequenceComponent" }
384+
}
385+
386+
/**
387+
* An interpolation expression component of a regex literal.
388+
*/
389+
class RegExpInterpolationComponent extends RegExpComponent, StmtSequence,
390+
TStringInterpolationComponent {
391+
private Ruby::Interpolation g;
392+
393+
RegExpInterpolationComponent() { this = TStringInterpolationComponent(g) }
394+
395+
final override string toString() { result = "#{...}" }
396+
397+
final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
398+
399+
final override string getValueText() { none() }
400+
401+
final override string getAPrimaryQlClass() { result = "RegExpInterpolationComponent" }
402+
}
403+
322404
/**
323405
* A string, symbol, regexp, or subshell literal.
324406
*/

ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,13 @@ class StringComponentCfgNode extends AstCfgNode {
132132
string getValueText() { result = this.getNode().(StringComponent).getValueText() }
133133
}
134134

135+
/** A control-flow node that wraps a `RegExpComponent` AST expression. */
136+
class RegExpComponentCfgNode extends AstCfgNode {
137+
RegExpComponentCfgNode() { this.getNode() instanceof RegExpComponent }
138+
139+
string getValueText() { result = this.getNode().(RegExpComponent).getValueText() }
140+
}
141+
135142
private AstNode desugar(AstNode n) {
136143
result = n.getDesugared()
137144
or
@@ -474,6 +481,15 @@ module ExprNodes {
474481
final override string getValueText() { result = this.getLastStmt().getValueText() }
475482
}
476483

484+
/** A control-flow node that wraps a `RegExpInterpolationComponent` AST expression. */
485+
class RegExpInterpolationComponentCfgNode extends RegExpComponentCfgNode, StmtSequenceCfgNode {
486+
RegExpInterpolationComponentCfgNode() { this.getNode() instanceof RegExpInterpolationComponent }
487+
488+
// If last statement in the interpolation is a constant or local variable read,
489+
// attempt to look up its definition and return the definition's `getValueText()`.
490+
final override string getValueText() { result = this.getLastStmt().getValueText() }
491+
}
492+
477493
private class StringlikeLiteralChildMapping extends ExprChildMapping, StringlikeLiteral {
478494
override predicate relevantChild(AstNode n) { n = this.getComponent(_) }
479495
}
@@ -510,11 +526,27 @@ module ExprNodes {
510526
final override StringLiteral getExpr() { result = super.getExpr() }
511527
}
512528

529+
private class RegExpLiteralChildMapping extends ExprChildMapping, RegExpLiteral {
530+
override predicate relevantChild(AstNode n) { n = this.getComponent(_) }
531+
}
532+
513533
/** A control-flow node that wraps a `RegExpLiteral` AST expression. */
514534
class RegExpLiteralCfgNode extends ExprCfgNode {
515-
override RegExpLiteral e;
535+
override RegExpLiteralChildMapping e;
536+
537+
RegExpComponentCfgNode getComponent(int n) { e.hasCfgChild(e.getComponent(n), this, result) }
516538

517539
final override RegExpLiteral getExpr() { result = super.getExpr() }
540+
541+
language[monotonicAggregates]
542+
override string getValueText() {
543+
result =
544+
concat(RegExpComponentCfgNode c, int i |
545+
c = this.getComponent(i)
546+
|
547+
c.getValueText() order by i
548+
)
549+
}
518550
}
519551

520552
/** A control-flow node that wraps a `ComparisonOperation` AST expression. */

ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImpl.qll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,13 @@ module Trees {
13511351
}
13521352

13531353
private class StringComponentTree extends LeafTree, StringComponent {
1354-
StringComponentTree() { not this instanceof StringInterpolationComponent }
1354+
StringComponentTree() {
1355+
// Interpolations contain `StmtSequence`s, so they shouldn't be treated as leaf nodes.
1356+
not this instanceof StringInterpolationComponent and
1357+
// In the interests of brevity we treat regexes as string literals when constructing the CFG.
1358+
// Thus we must exclude regex interpolations here too.
1359+
not this instanceof RegExpInterpolationComponent
1360+
}
13551361
}
13561362

13571363
private class ToplevelTree extends BodyStmtTree, Toplevel {

ruby/ql/test/library-tests/ast/Ast.expected

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -946,7 +946,7 @@ control/cases.rb:
946946
# 92| 0: [RegExpCharacterRange] 0-9
947947
# 92| 0: [RegExpConstant, RegExpNormalChar] 0
948948
# 92| 1: [RegExpConstant, RegExpNormalChar] 9
949-
# 92| getComponent: [StringTextComponent] .*abc[0-9]
949+
# 92| getComponent: [RegExpTextComponent] .*abc[0-9]
950950
# 93| getBranch: [InClause] in ... then ...
951951
# 93| getPattern: [RangeLiteral] _ .. _
952952
# 93| getBegin: [IntegerLiteral] 5
@@ -1761,13 +1761,13 @@ literals/literals.rb:
17611761
# 137| 0: [RegExpConstant, RegExpNormalChar] f
17621762
# 137| 1: [RegExpConstant, RegExpNormalChar] o
17631763
# 137| 2: [RegExpConstant, RegExpNormalChar] o
1764-
# 137| getComponent: [StringTextComponent] foo
1764+
# 137| getComponent: [RegExpTextComponent] foo
17651765
# 138| getStmt: [RegExpLiteral] /foo/
17661766
# 138| getParsed: [RegExpSequence] foo
17671767
# 138| 0: [RegExpConstant, RegExpNormalChar] f
17681768
# 138| 1: [RegExpConstant, RegExpNormalChar] o
17691769
# 138| 2: [RegExpConstant, RegExpNormalChar] o
1770-
# 138| getComponent: [StringTextComponent] foo
1770+
# 138| getComponent: [RegExpTextComponent] foo
17711771
# 139| getStmt: [RegExpLiteral] /foo+\sbar\S/
17721772
# 139| getParsed: [RegExpSequence] foo+\sbar\S
17731773
# 139| 0: [RegExpConstant, RegExpNormalChar] f
@@ -1779,10 +1779,10 @@ literals/literals.rb:
17791779
# 139| 5: [RegExpConstant, RegExpNormalChar] a
17801780
# 139| 6: [RegExpConstant, RegExpNormalChar] r
17811781
# 139| 7: [RegExpCharacterClassEscape] \S
1782-
# 139| getComponent: [StringTextComponent] foo+
1783-
# 139| getComponent: [StringEscapeSequenceComponent] \s
1784-
# 139| getComponent: [StringTextComponent] bar
1785-
# 139| getComponent: [StringEscapeSequenceComponent] \S
1782+
# 139| getComponent: [RegExpTextComponent] foo+
1783+
# 139| getComponent: [RegExpEscapeSequenceComponent] \s
1784+
# 139| getComponent: [RegExpTextComponent] bar
1785+
# 139| getComponent: [RegExpEscapeSequenceComponent] \S
17861786
# 140| getStmt: [RegExpLiteral] /foo#{...}bar#{...}#{...}/
17871787
# 140| getParsed: [RegExpSequence] foo2barbarbar
17881788
# 140| 0: [RegExpConstant, RegExpNormalChar] f
@@ -1798,35 +1798,31 @@ literals/literals.rb:
17981798
# 140| 10: [RegExpConstant, RegExpNormalChar] b
17991799
# 140| 11: [RegExpConstant, RegExpNormalChar] a
18001800
# 140| 12: [RegExpConstant, RegExpNormalChar] r
1801-
# 140| getComponent: [StringTextComponent] foo
1802-
# 140| getComponent: [StringInterpolationComponent] #{...}
1801+
# 140| getComponent: [RegExpTextComponent] foo
1802+
# 140| getComponent: [RegExpInterpolationComponent] #{...}
18031803
# 140| getStmt: [AddExpr] ... + ...
18041804
# 140| getAnOperand/getLeftOperand/getReceiver: [IntegerLiteral] 1
18051805
# 140| getAnOperand/getArgument/getRightOperand: [IntegerLiteral] 1
1806-
# 140| getComponent: [StringTextComponent] bar
1807-
# 140| getComponent: [StringInterpolationComponent] #{...}
1806+
# 140| getComponent: [RegExpTextComponent] bar
1807+
# 140| getComponent: [RegExpInterpolationComponent] #{...}
18081808
# 140| getStmt: [LocalVariableAccess] bar
1809-
# 140| getComponent: [StringInterpolationComponent] #{...}
1809+
# 140| getComponent: [RegExpInterpolationComponent] #{...}
18101810
# 140| getStmt: [ConstantReadAccess] BAR
18111811
# 141| getStmt: [RegExpLiteral] /foo/
1812-
# 141| getParsed: [RegExpSequence] foo
1813-
# 141| 0: [RegExpConstant, RegExpNormalChar] f
1814-
# 141| 1: [RegExpConstant, RegExpNormalChar] o
1815-
# 141| 2: [RegExpConstant, RegExpNormalChar] o
1816-
# 141| getComponent: [StringTextComponent] foo
1812+
# 141| getComponent: [RegExpTextComponent] foo
18171813
# 142| getStmt: [RegExpLiteral] //
18181814
# 143| getStmt: [RegExpLiteral] /foo/
18191815
# 143| getParsed: [RegExpSequence] foo
18201816
# 143| 0: [RegExpConstant, RegExpNormalChar] f
18211817
# 143| 1: [RegExpConstant, RegExpNormalChar] o
18221818
# 143| 2: [RegExpConstant, RegExpNormalChar] o
1823-
# 143| getComponent: [StringTextComponent] foo
1819+
# 143| getComponent: [RegExpTextComponent] foo
18241820
# 144| getStmt: [RegExpLiteral] /foo/
18251821
# 144| getParsed: [RegExpSequence] foo
18261822
# 144| 0: [RegExpConstant, RegExpNormalChar] f
18271823
# 144| 1: [RegExpConstant, RegExpNormalChar] o
18281824
# 144| 2: [RegExpConstant, RegExpNormalChar] o
1829-
# 144| getComponent: [StringTextComponent] foo
1825+
# 144| getComponent: [RegExpTextComponent] foo
18301826
# 145| getStmt: [RegExpLiteral] /foo+\sbar\S/
18311827
# 145| getParsed: [RegExpSequence] foo+\sbar\S
18321828
# 145| 0: [RegExpConstant, RegExpNormalChar] f
@@ -1838,10 +1834,10 @@ literals/literals.rb:
18381834
# 145| 5: [RegExpConstant, RegExpNormalChar] a
18391835
# 145| 6: [RegExpConstant, RegExpNormalChar] r
18401836
# 145| 7: [RegExpCharacterClassEscape] \S
1841-
# 145| getComponent: [StringTextComponent] foo+
1842-
# 145| getComponent: [StringEscapeSequenceComponent] \s
1843-
# 145| getComponent: [StringTextComponent] bar
1844-
# 145| getComponent: [StringEscapeSequenceComponent] \S
1837+
# 145| getComponent: [RegExpTextComponent] foo+
1838+
# 145| getComponent: [RegExpEscapeSequenceComponent] \s
1839+
# 145| getComponent: [RegExpTextComponent] bar
1840+
# 145| getComponent: [RegExpEscapeSequenceComponent] \S
18451841
# 146| getStmt: [RegExpLiteral] /foo#{...}bar#{...}#{...}/
18461842
# 146| getParsed: [RegExpSequence] foo2barbarbar
18471843
# 146| 0: [RegExpConstant, RegExpNormalChar] f
@@ -1857,22 +1853,18 @@ literals/literals.rb:
18571853
# 146| 10: [RegExpConstant, RegExpNormalChar] b
18581854
# 146| 11: [RegExpConstant, RegExpNormalChar] a
18591855
# 146| 12: [RegExpConstant, RegExpNormalChar] r
1860-
# 146| getComponent: [StringTextComponent] foo
1861-
# 146| getComponent: [StringInterpolationComponent] #{...}
1856+
# 146| getComponent: [RegExpTextComponent] foo
1857+
# 146| getComponent: [RegExpInterpolationComponent] #{...}
18621858
# 146| getStmt: [AddExpr] ... + ...
18631859
# 146| getAnOperand/getLeftOperand/getReceiver: [IntegerLiteral] 1
18641860
# 146| getAnOperand/getArgument/getRightOperand: [IntegerLiteral] 1
1865-
# 146| getComponent: [StringTextComponent] bar
1866-
# 146| getComponent: [StringInterpolationComponent] #{...}
1861+
# 146| getComponent: [RegExpTextComponent] bar
1862+
# 146| getComponent: [RegExpInterpolationComponent] #{...}
18671863
# 146| getStmt: [LocalVariableAccess] bar
1868-
# 146| getComponent: [StringInterpolationComponent] #{...}
1864+
# 146| getComponent: [RegExpInterpolationComponent] #{...}
18691865
# 146| getStmt: [ConstantReadAccess] BAR
18701866
# 147| getStmt: [RegExpLiteral] /foo/
1871-
# 147| getParsed: [RegExpSequence] foo
1872-
# 147| 0: [RegExpConstant, RegExpNormalChar] f
1873-
# 147| 1: [RegExpConstant, RegExpNormalChar] o
1874-
# 147| 2: [RegExpConstant, RegExpNormalChar] o
1875-
# 147| getComponent: [StringTextComponent] foo
1867+
# 147| getComponent: [RegExpTextComponent] foo
18761868
# 150| getStmt: [StringLiteral] "abcdefghijklmnopqrstuvwxyzabcdef"
18771869
# 150| getComponent: [StringTextComponent] abcdefghijklmnopqrstuvwxyzabcdef
18781870
# 151| getStmt: [StringLiteral] "foobarfoobarfoobarfoobarfooba..."
@@ -2399,7 +2391,7 @@ operations/operations.rb:
23992391
# 65| 2: [RegExpConstant, RegExpNormalChar] o
24002392
# 65| 3: [RegExpStar] .*
24012393
# 65| 0: [RegExpDot] .
2402-
# 65| getComponent: [StringTextComponent] foo.*
2394+
# 65| getComponent: [RegExpTextComponent] foo.*
24032395
# 66| getStmt: [NoRegExpMatchExpr] ... !~ ...
24042396
# 66| getAnOperand/getLeftOperand/getReceiver: [LocalVariableAccess] handle
24052397
# 66| getAnOperand/getArgument/getRightOperand: [RegExpLiteral] /.*bar/
@@ -2409,7 +2401,7 @@ operations/operations.rb:
24092401
# 66| 1: [RegExpConstant, RegExpNormalChar] b
24102402
# 66| 2: [RegExpConstant, RegExpNormalChar] a
24112403
# 66| 3: [RegExpConstant, RegExpNormalChar] r
2412-
# 66| getComponent: [StringTextComponent] .*bar
2404+
# 66| getComponent: [RegExpTextComponent] .*bar
24132405
# 69| getStmt: [AssignAddExpr] ... += ...
24142406
# 69| getAnOperand/getLeftOperand: [LocalVariableAccess] x
24152407
# 69| getAnOperand/getRightOperand: [IntegerLiteral] 128

ruby/ql/test/library-tests/ast/ValueText.expected

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,6 @@
552552
| literals/literals.rb:140:12:140:12 | 1 | 1 |
553553
| literals/literals.rb:140:20:140:22 | bar | bar |
554554
| literals/literals.rb:140:26:140:28 | BAR | bar |
555-
| literals/literals.rb:141:1:141:8 | /foo/ | foo |
556555
| literals/literals.rb:142:1:142:4 | // | |
557556
| literals/literals.rb:143:1:143:7 | /foo/ | foo |
558557
| literals/literals.rb:144:1:144:8 | /foo/ | foo |
@@ -563,7 +562,6 @@
563562
| literals/literals.rb:146:14:146:14 | 1 | 1 |
564563
| literals/literals.rb:146:22:146:24 | bar | bar |
565564
| literals/literals.rb:146:28:146:30 | BAR | bar |
566-
| literals/literals.rb:147:1:147:10 | /foo/ | foo |
567565
| literals/literals.rb:150:1:150:34 | "abcdefghijklmnopqrstuvwxyzabcdef" | abcdefghijklmnopqrstuvwxyzabcdef |
568566
| literals/literals.rb:151:1:151:35 | "foobarfoobarfoobarfoobarfooba..." | foobarfoobarfoobarfoobarfoobarfoo |
569567
| literals/literals.rb:152:1:152:40 | "foobar\\\\foobar\\\\foobar\\\\fooba..." | foobar\\\\foobar\\\\foobar\\\\foobar\\\\foobar |

0 commit comments

Comments
 (0)