|
| 1 | +/** |
| 2 | + * Provides a flow label for reasoning about URLs with a tainted query and fragment part, |
| 3 | + * which we collectively refer to as the "suffix" of the URL. |
| 4 | + */ |
| 5 | +import javascript |
| 6 | + |
| 7 | +/** |
| 8 | + * Provides a flow label for reasoning about URLs with a tainted query and fragment part, |
| 9 | + * which we collectively refer to as the "suffix" of the URL. |
| 10 | + */ |
| 11 | +module TaintedUrlSuffix { |
| 12 | + private import DataFlow |
| 13 | + |
| 14 | + /** |
| 15 | + * The flow label representing a URL with a tainted query and fragment part. |
| 16 | + * |
| 17 | + * Can also be accessed using `TaintedUrlSuffix::label()`. |
| 18 | + */ |
| 19 | + class TaintedUrlSuffixLabel extends FlowLabel { |
| 20 | + TaintedUrlSuffixLabel() { |
| 21 | + this = "tainted-url-suffix" |
| 22 | + } |
| 23 | + } |
| 24 | + |
| 25 | + /** |
| 26 | + * Gets the flow label representing a URL with a tainted query and fragment part. |
| 27 | + */ |
| 28 | + FlowLabel label() { result instanceof TaintedUrlSuffixLabel } |
| 29 | + |
| 30 | + /** Holds for `pred -> succ` is a step of form `x -> x.p` */ |
| 31 | + private predicate isSafeLocationProp(DataFlow::PropRead read) { |
| 32 | + // Ignore properties that refer to the scheme, domain, port, auth, or path. |
| 33 | + exists (string name | name = read.getPropertyName() | |
| 34 | + name = "protocol" or |
| 35 | + name = "scheme" or |
| 36 | + name = "host" or |
| 37 | + name = "hostname" or |
| 38 | + name = "domain" or |
| 39 | + name = "origin" or |
| 40 | + name = "port" or |
| 41 | + name = "path" or |
| 42 | + name = "pathname" or |
| 43 | + name = "username" or |
| 44 | + name = "password" or |
| 45 | + name = "auth" |
| 46 | + ) |
| 47 | + } |
| 48 | + |
| 49 | + /** |
| 50 | + * Holds if there is a flow step `src -> dst` involving the URL suffix taint label. |
| 51 | + * |
| 52 | + * This handles steps through string operations, promises, URL parsers, and URL accessors. |
| 53 | + */ |
| 54 | + predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) { |
| 55 | + // Inherit all ordinary taint steps except `x -> x.p` steps |
| 56 | + srclbl = label() and |
| 57 | + dstlbl = label() and |
| 58 | + TaintTracking::sharedTaintStep(src, dst) and |
| 59 | + not isSafeLocationProp(dst) |
| 60 | + or |
| 61 | + // Transition from URL suffix to full taint when extracting the query/fragment part. |
| 62 | + srclbl = label() and |
| 63 | + dstlbl.isTaint() and |
| 64 | + ( |
| 65 | + exists(MethodCallNode call, string name | |
| 66 | + src = call.getReceiver() and |
| 67 | + dst = call and |
| 68 | + name = call.getMethodName() |
| 69 | + | |
| 70 | + // Substring that is not a prefix |
| 71 | + name = ["substring", "substr", "slice"] and |
| 72 | + not call.getArgument(0).getIntValue() = 0 |
| 73 | + or |
| 74 | + // Split around '#' or '?' and extract the suffix |
| 75 | + name = "split" and |
| 76 | + call.getArgument(0).getStringValue() = ["#", "?"] and |
| 77 | + not exists(call.getAPropertyRead("0")) // Avoid false flow to the prefix |
| 78 | + or |
| 79 | + // Replace '#' and '?' with nothing |
| 80 | + name = "replace" and |
| 81 | + call.getArgument(0).getStringValue() = ["#", "?"] and |
| 82 | + call.getArgument(1).getStringValue() = "" |
| 83 | + or |
| 84 | + // The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)` |
| 85 | + // The step should be safe since nothing else reachable by this flow label supports a method named 'get'. |
| 86 | + name = "get" |
| 87 | + or |
| 88 | + // Methods on URL objects from the Closure library |
| 89 | + name = "getDecodedQuery" or |
| 90 | + name = "getFragment" or |
| 91 | + name = "getParameterValue" or |
| 92 | + name = "getParameterValues" or |
| 93 | + name = "getQueryData" |
| 94 | + ) |
| 95 | + or |
| 96 | + exists(PropRead read | |
| 97 | + src = read.getBase() and |
| 98 | + dst = read and |
| 99 | + // Unlike the `search` property, the `query` property from `url.parse` does not include the `?`. |
| 100 | + read.getPropertyName() = "query" |
| 101 | + ) |
| 102 | + or |
| 103 | + // Assume calls to regexp.exec always extract query/fragment parameters. |
| 104 | + exists(MethodCallNode call | |
| 105 | + call = any(RegExpLiteral re).flow().(DataFlow::SourceNode).getAMethodCall("exec") and |
| 106 | + src = call.getArgument(0) and |
| 107 | + dst = call |
| 108 | + ) |
| 109 | + ) |
| 110 | + } |
| 111 | +} |
0 commit comments