|
| 1 | +/** |
| 2 | + * For internal use only. |
| 3 | + */ |
| 4 | + |
| 5 | +private import java |
| 6 | +private import semmle.code.Location as Location |
| 7 | +private import semmle.code.java.dataflow.DataFlow |
| 8 | +private import semmle.code.java.dataflow.TaintTracking |
| 9 | +private import semmle.code.java.security.PathCreation |
| 10 | +private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow |
| 11 | +private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl |
| 12 | +private import semmle.code.java.security.ExternalAPIs as ExternalAPIs |
| 13 | +private import semmle.code.java.Expr as Expr |
| 14 | +private import semmle.code.java.security.QueryInjection |
| 15 | +private import semmle.code.java.security.RequestForgery |
| 16 | +private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions |
| 17 | +import AutomodelSharedCharacteristics as SharedCharacteristics |
| 18 | +import AutomodelEndpointTypes as AutomodelEndpointTypes |
| 19 | + |
| 20 | +/** |
| 21 | + * A meta data extractor. Any Java extraction mode needs to implement exactly |
| 22 | + * one instance of this class. |
| 23 | + */ |
| 24 | +abstract class MetadataExtractor extends string { |
| 25 | + bindingset[this] |
| 26 | + MetadataExtractor() { any() } |
| 27 | + |
| 28 | + abstract predicate hasMetadata( |
| 29 | + DataFlow::ParameterNode e, string package, string type, boolean subtypes, string name, |
| 30 | + string signature, int input |
| 31 | + ); |
| 32 | +} |
| 33 | + |
| 34 | +newtype JavaRelatedLocationType = |
| 35 | + MethodDoc() or |
| 36 | + ClassDoc() |
| 37 | + |
| 38 | +/** |
| 39 | + * A candidates implementation for framework mode. |
| 40 | + * |
| 41 | + * Some important notes: |
| 42 | + * - This mode is using parameters as endpoints. |
| 43 | + * - Sink- and neutral-information is being used from MaD models. |
| 44 | + * - When available, we use method- and class-java-docs as related locations. |
| 45 | + */ |
| 46 | +module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { |
| 47 | + // for documentation of the implementations here, see the QLDoc in the CandidateSig signature module. |
| 48 | + class Endpoint = DataFlow::ParameterNode; |
| 49 | + |
| 50 | + class EndpointType = AutomodelEndpointTypes::EndpointType; |
| 51 | + |
| 52 | + class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType; |
| 53 | + |
| 54 | + class RelatedLocation = Location::Top; |
| 55 | + |
| 56 | + class RelatedLocationType = JavaRelatedLocationType; |
| 57 | + |
| 58 | + // Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact. |
| 59 | + predicate isSanitizer(Endpoint e, EndpointType t) { none() } |
| 60 | + |
| 61 | + RelatedLocation asLocation(Endpoint e) { result = e.asParameter() } |
| 62 | + |
| 63 | + predicate isKnownKind(string kind, string humanReadableKind, EndpointType type) { |
| 64 | + kind = "read-file" and |
| 65 | + humanReadableKind = "read file" and |
| 66 | + type instanceof AutomodelEndpointTypes::TaintedPathSinkType |
| 67 | + or |
| 68 | + kind = "create-file" and |
| 69 | + humanReadableKind = "create file" and |
| 70 | + type instanceof AutomodelEndpointTypes::TaintedPathSinkType |
| 71 | + or |
| 72 | + kind = "sql" and |
| 73 | + humanReadableKind = "mad modeled sql" and |
| 74 | + type instanceof AutomodelEndpointTypes::SqlSinkType |
| 75 | + or |
| 76 | + kind = "open-url" and |
| 77 | + humanReadableKind = "open url" and |
| 78 | + type instanceof AutomodelEndpointTypes::RequestForgerySinkType |
| 79 | + or |
| 80 | + kind = "jdbc-url" and |
| 81 | + humanReadableKind = "jdbc url" and |
| 82 | + type instanceof AutomodelEndpointTypes::RequestForgerySinkType |
| 83 | + or |
| 84 | + kind = "command-injection" and |
| 85 | + humanReadableKind = "command injection" and |
| 86 | + type instanceof AutomodelEndpointTypes::CommandInjectionSinkType |
| 87 | + } |
| 88 | + |
| 89 | + predicate isSink(Endpoint e, string kind) { |
| 90 | + exists(string package, string type, string name, string signature, string ext, string input | |
| 91 | + sinkSpec(e, package, type, name, signature, ext, input) and |
| 92 | + ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, _) |
| 93 | + ) |
| 94 | + } |
| 95 | + |
| 96 | + predicate isNeutral(Endpoint e) { |
| 97 | + exists(string package, string type, string name, string signature | |
| 98 | + sinkSpec(e, package, type, name, signature, _, _) and |
| 99 | + ExternalFlow::neutralModel(package, type, name, [signature, ""], _, _) |
| 100 | + ) |
| 101 | + } |
| 102 | + |
| 103 | + additional predicate sinkSpec( |
| 104 | + Endpoint e, string package, string type, string name, string signature, string ext, string input |
| 105 | + ) { |
| 106 | + FrameworkCandidatesImpl::getCallable(e).hasQualifiedName(package, type, name) and |
| 107 | + signature = ExternalFlow::paramsString(getCallable(e)) and |
| 108 | + ext = "" and |
| 109 | + exists(int paramIdx | e.isParameterOf(_, paramIdx) | |
| 110 | + if paramIdx = -1 then input = "Argument[this]" else input = "Argument[" + paramIdx + "]" |
| 111 | + ) |
| 112 | + } |
| 113 | + |
| 114 | + /** |
| 115 | + * Returns the related location for the given endpoint. |
| 116 | + * |
| 117 | + * Related locations can be JavaDoc comments of the class or the method. |
| 118 | + */ |
| 119 | + RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) { |
| 120 | + type = MethodDoc() and |
| 121 | + result = FrameworkCandidatesImpl::getCallable(e).(Documentable).getJavadoc() |
| 122 | + or |
| 123 | + type = ClassDoc() and |
| 124 | + result = FrameworkCandidatesImpl::getCallable(e).getDeclaringType().(Documentable).getJavadoc() |
| 125 | + } |
| 126 | + |
| 127 | + /** |
| 128 | + * Returns the callable that contains the given endpoint. |
| 129 | + * |
| 130 | + * Each Java mode should implement this predicate. |
| 131 | + */ |
| 132 | + additional Callable getCallable(Endpoint e) { result = e.getEnclosingCallable() } |
| 133 | +} |
| 134 | + |
| 135 | +module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<FrameworkCandidatesImpl>; |
| 136 | + |
| 137 | +class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic; |
| 138 | + |
| 139 | +class Endpoint = FrameworkCandidatesImpl::Endpoint; |
| 140 | + |
| 141 | +/* |
| 142 | + * Predicates that are used to surface prompt examples and candidates for classification with an ML model. |
| 143 | + */ |
| 144 | + |
| 145 | +/** |
| 146 | + * A MetadataExtractor that extracts metadata for framework mode. |
| 147 | + */ |
| 148 | +class FrameworkModeMetadataExtractor extends MetadataExtractor { |
| 149 | + FrameworkModeMetadataExtractor() { this = "FrameworkModeMetadataExtractor" } |
| 150 | + |
| 151 | + /** |
| 152 | + * By convention, the subtypes property of the MaD declaration should only be |
| 153 | + * true when there _can_ exist any subtypes with a different implementation. |
| 154 | + * |
| 155 | + * It would technically be ok to always use the value 'true', but this would |
| 156 | + * break convention. |
| 157 | + */ |
| 158 | + boolean considerSubtypes(Callable callable) { |
| 159 | + if |
| 160 | + callable.isStatic() or |
| 161 | + callable.getDeclaringType().isStatic() or |
| 162 | + callable.isFinal() or |
| 163 | + callable.getDeclaringType().isFinal() |
| 164 | + then result = false |
| 165 | + else result = true |
| 166 | + } |
| 167 | + |
| 168 | + override predicate hasMetadata( |
| 169 | + Endpoint e, string package, string type, boolean subtypes, string name, string signature, |
| 170 | + int input |
| 171 | + ) { |
| 172 | + exists(Callable callable | |
| 173 | + e.asParameter() = callable.getParameter(input) and |
| 174 | + package = callable.getDeclaringType().getPackage().getName() and |
| 175 | + type = callable.getDeclaringType().getErasure().(RefType).nestedName() and |
| 176 | + subtypes = this.considerSubtypes(callable) and |
| 177 | + name = e.toString() and |
| 178 | + signature = ExternalFlow::paramsString(callable) |
| 179 | + ) |
| 180 | + } |
| 181 | +} |
| 182 | + |
| 183 | +/* |
| 184 | + * EndpointCharacteristic classes that are specific to Automodel for Java. |
| 185 | + */ |
| 186 | + |
| 187 | +/** |
| 188 | + * A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink. |
| 189 | + * |
| 190 | + * A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return |
| 191 | + * type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does |
| 192 | + * the dangerous/interesting thing, so we want the latter to be modeled as the sink. |
| 193 | + * |
| 194 | + * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks |
| 195 | + */ |
| 196 | +private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { |
| 197 | + UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } |
| 198 | + |
| 199 | + override predicate appliesToEndpoint(Endpoint e) { |
| 200 | + not FrameworkCandidatesImpl::isSink(e, _) and |
| 201 | + FrameworkCandidatesImpl::getCallable(e).getName().matches("is%") and |
| 202 | + FrameworkCandidatesImpl::getCallable(e).getReturnType() instanceof BooleanType |
| 203 | + } |
| 204 | +} |
| 205 | + |
| 206 | +/** |
| 207 | + * A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a |
| 208 | + * sink. |
| 209 | + * |
| 210 | + * A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a |
| 211 | + * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the |
| 212 | + * dangerous/interesting thing, so we want the latter to be modeled as the sink. |
| 213 | + */ |
| 214 | +private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { |
| 215 | + UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } |
| 216 | + |
| 217 | + override predicate appliesToEndpoint(Endpoint e) { |
| 218 | + not FrameworkCandidatesImpl::isSink(e, _) and |
| 219 | + exists(Callable callable | |
| 220 | + callable = FrameworkCandidatesImpl::getCallable(e) and |
| 221 | + callable.getName().toLowerCase() = ["exists", "notexists"] and |
| 222 | + callable.getReturnType() instanceof BooleanType |
| 223 | + ) |
| 224 | + } |
| 225 | +} |
| 226 | + |
| 227 | +/** |
| 228 | + * A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink. |
| 229 | + */ |
| 230 | +private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { |
| 231 | + ExceptionCharacteristic() { this = "exception" } |
| 232 | + |
| 233 | + override predicate appliesToEndpoint(Endpoint e) { |
| 234 | + FrameworkCandidatesImpl::getCallable(e).getDeclaringType().getASupertype*() instanceof |
| 235 | + TypeThrowable |
| 236 | + } |
| 237 | +} |
| 238 | + |
| 239 | +/** |
| 240 | + * A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that |
| 241 | + * are considered worth modeling. |
| 242 | + */ |
| 243 | +private class NotAModelApiParameter extends CharacteristicsImpl::UninterestingToModelCharacteristic { |
| 244 | + NotAModelApiParameter() { this = "not a model API parameter" } |
| 245 | + |
| 246 | + override predicate appliesToEndpoint(Endpoint e) { |
| 247 | + not exists(ModelExclusions::ModelApi api | api.getAParameter() = e.asParameter()) |
| 248 | + } |
| 249 | +} |
| 250 | + |
| 251 | +/** |
| 252 | + * A negative characteristic that filters out non-public methods. Non-public methods are not interesting to include in |
| 253 | + * the standard Java modeling, because they cannot be called from outside the package. |
| 254 | + */ |
| 255 | +private class NonPublicMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic |
| 256 | +{ |
| 257 | + NonPublicMethodCharacteristic() { this = "non-public method" } |
| 258 | + |
| 259 | + override predicate appliesToEndpoint(Endpoint e) { |
| 260 | + not FrameworkCandidatesImpl::getCallable(e).isPublic() |
| 261 | + } |
| 262 | +} |
| 263 | + |
| 264 | +/** |
| 265 | + * Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint |
| 266 | + * characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with |
| 267 | + * an error message indicating why this combination is problematic. |
| 268 | + * |
| 269 | + * Copied from |
| 270 | + * javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql |
| 271 | + */ |
| 272 | +predicate erroneousEndpoints( |
| 273 | + Endpoint endpoint, EndpointCharacteristic characteristic, |
| 274 | + AutomodelEndpointTypes::EndpointType endpointType, float confidence, string errorMessage, |
| 275 | + boolean ignoreKnownModelingErrors |
| 276 | +) { |
| 277 | + // An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one |
| 278 | + // sink/source type (including the negative type). |
| 279 | + exists( |
| 280 | + EndpointCharacteristic characteristic2, AutomodelEndpointTypes::EndpointType endpointClass2, |
| 281 | + float confidence2 |
| 282 | + | |
| 283 | + endpointType != endpointClass2 and |
| 284 | + ( |
| 285 | + endpointType instanceof AutomodelEndpointTypes::SinkType and |
| 286 | + endpointClass2 instanceof AutomodelEndpointTypes::SinkType |
| 287 | + or |
| 288 | + endpointType instanceof AutomodelEndpointTypes::SourceType and |
| 289 | + endpointClass2 instanceof AutomodelEndpointTypes::SourceType |
| 290 | + ) and |
| 291 | + characteristic.appliesToEndpoint(endpoint) and |
| 292 | + characteristic2.appliesToEndpoint(endpoint) and |
| 293 | + characteristic.hasImplications(endpointType, true, confidence) and |
| 294 | + characteristic2.hasImplications(endpointClass2, true, confidence2) and |
| 295 | + confidence > SharedCharacteristics::mediumConfidence() and |
| 296 | + confidence2 > SharedCharacteristics::mediumConfidence() and |
| 297 | + ( |
| 298 | + ignoreKnownModelingErrors = true and |
| 299 | + not knownOverlappingCharacteristics(characteristic, characteristic2) |
| 300 | + or |
| 301 | + ignoreKnownModelingErrors = false |
| 302 | + ) |
| 303 | + ) and |
| 304 | + errorMessage = "Endpoint has high-confidence positive indicators for multiple classes" |
| 305 | + or |
| 306 | + // An endpoint's characteristics should not include positive indicators with medium/high confidence for some class and |
| 307 | + // also include negative indicators with medium/high confidence for this same class. |
| 308 | + exists(EndpointCharacteristic characteristic2, float confidence2 | |
| 309 | + characteristic.appliesToEndpoint(endpoint) and |
| 310 | + characteristic2.appliesToEndpoint(endpoint) and |
| 311 | + characteristic.hasImplications(endpointType, true, confidence) and |
| 312 | + characteristic2.hasImplications(endpointType, false, confidence2) and |
| 313 | + confidence > SharedCharacteristics::mediumConfidence() and |
| 314 | + confidence2 > SharedCharacteristics::mediumConfidence() |
| 315 | + ) and |
| 316 | + ignoreKnownModelingErrors = false and |
| 317 | + errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class" |
| 318 | +} |
| 319 | + |
| 320 | +/** |
| 321 | + * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that |
| 322 | + * have some overlap in their results. This indicates a problem with the underlying Java modeling. Specifically, |
| 323 | + * `PathCreation` is prone to FPs. |
| 324 | + */ |
| 325 | +private predicate knownOverlappingCharacteristics( |
| 326 | + EndpointCharacteristic characteristic1, EndpointCharacteristic characteristic2 |
| 327 | +) { |
| 328 | + characteristic1 != characteristic2 and |
| 329 | + characteristic1 = ["mad taint step", "create path", "read file", "known non-sink"] and |
| 330 | + characteristic2 = ["mad taint step", "create path", "read file", "known non-sink"] |
| 331 | +} |
0 commit comments