Skip to content

Commit 198bdca

Browse files
committed
JS: Make XSS MetacharEscapeSanitizer more precise
1 parent effa52f commit 198bdca

File tree

2 files changed

+63
-9
lines changed

2 files changed

+63
-9
lines changed

javascript/ql/src/semmle/javascript/Regexp.qll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,4 +1114,64 @@ module RegExp {
11141114
or
11151115
result = node.asExpr().(StringLiteral).asRegExp()
11161116
}
1117+
1118+
/**
1119+
* A character that will be analyzed by `RegExp::alwaysMatchesMetaCharacter`.
1120+
*
1121+
* Currently only `<`, `'`, and `"` are considered to be meta-characters, but new meta-characters
1122+
* can be added by subclassing this class.
1123+
*/
1124+
abstract class MetaCharacter extends string {
1125+
bindingset[this]
1126+
MetaCharacter() { any() }
1127+
1128+
/**
1129+
* Holds if the given atomic term matches this meta-character.
1130+
*
1131+
* Does not hold for derived terms like alternatives and groups.
1132+
*
1133+
* By default, `.`, `\W`, `\S`, and `\D` are considered to match any meta-character,
1134+
* but the predicate can be overridden for meta-characters where this is not the case.
1135+
*/
1136+
predicate matchedByAtom(RegExpTerm term) {
1137+
term.(RegExpConstant).getConstantValue() = this
1138+
or
1139+
term instanceof RegExpDot
1140+
or
1141+
term.(RegExpCharacterClassEscape).getValue() = ["\\W", "\\S", "\\D"]
1142+
or
1143+
exists(string lo, string hi |
1144+
term.(RegExpCharacterRange).isRange(lo, hi) and
1145+
lo <= this and
1146+
this <= hi
1147+
)
1148+
}
1149+
}
1150+
1151+
private class DefaultMetaCharacter extends MetaCharacter {
1152+
DefaultMetaCharacter() { this = ["<", "'", "\""] }
1153+
}
1154+
1155+
/**
1156+
* Holds if `term` can match any occurence of `char` within a string (not taking into account
1157+
* the context in which `term` appears).
1158+
*
1159+
* This predicate is under-approximate and never considers sequences to guarantee a match.
1160+
*/
1161+
predicate alwaysMatchesMetaCharacter(RegExpTerm term, MetaCharacter char) {
1162+
not term.getParent() instanceof RegExpSequence and // restrict size of predicate
1163+
char.matchedByAtom(term)
1164+
or
1165+
alwaysMatchesMetaCharacter(term.(RegExpGroup).getAChild(), char)
1166+
or
1167+
alwaysMatchesMetaCharacter(term.(RegExpAlt).getAlternative(), char)
1168+
or
1169+
exists(RegExpCharacterClass class_ | term = class_ |
1170+
not class_.isInverted() and
1171+
char.matchedByAtom(class_.getAChild())
1172+
or
1173+
class_.isInverted() and
1174+
not char.matchedByAtom(class_.getAChild())
1175+
)
1176+
}
11171177
}

javascript/ql/src/semmle/javascript/security/dataflow/Xss.qll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,13 @@ module Shared {
2828
abstract class SanitizerGuard extends TaintTracking::SanitizerGuardNode { }
2929

3030
/**
31-
* A global regexp replacement involving an HTML meta-character, viewed as a sanitizer for
31+
* A global regexp replacement involving the `<`, `'`, or `"` meta-character, viewed as a sanitizer for
3232
* XSS vulnerabilities.
33-
*
34-
* The XSS queries do not attempt to reason about correctness or completeness of sanitizers,
35-
* so any such replacement stops taint propagation.
3633
*/
3734
class MetacharEscapeSanitizer extends Sanitizer, StringReplaceCall {
3835
MetacharEscapeSanitizer() {
39-
this.isGlobal() and
40-
exists(RegExpConstant c |
41-
c.getLiteral() = getRegExp().asExpr() and
42-
c.getValue().regexpMatch("['\"&<>]")
43-
)
36+
isGlobal() and
37+
RegExp::alwaysMatchesMetaCharacter(getRegExp().getRoot(), ["<", "'", "\""])
4438
}
4539
}
4640

0 commit comments

Comments
 (0)