Skip to content

Commit 510febf

Browse files
author
Stephan Brandauer
authored
Merge pull request github#12830 from github/kaeluka/parameter-candidate-extraction
Java: Automodel Framework Mode Extraction Queries
2 parents 82e780d + c31ad01 commit 510febf

7 files changed

+848
-0
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/**
2+
* For internal use only.
3+
*
4+
* Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
5+
* only predict classes defined within this file. This file is the source of truth for the integer
6+
* representation of each of these classes.
7+
*/
8+
9+
/** A class that can be predicted by a classifier. */
10+
abstract class EndpointType extends string {
11+
/**
12+
* Holds when the string matches the name of the sink / source type.
13+
*/
14+
bindingset[this]
15+
EndpointType() { any() }
16+
17+
/**
18+
* Gets the name of the sink/source kind for this endpoint type as used in models-as-data.
19+
*
20+
* See https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#LL353C11-L357C31
21+
*/
22+
final string getKind() { result = this }
23+
}
24+
25+
/** A class for sink types that can be predicted by a classifier. */
26+
abstract class SinkType extends EndpointType {
27+
bindingset[this]
28+
SinkType() { any() }
29+
}
30+
31+
/** A class for source types that can be predicted by a classifier. */
32+
abstract class SourceType extends EndpointType {
33+
bindingset[this]
34+
SourceType() { any() }
35+
}
36+
37+
/** The `Negative` class for non-sinks. */
38+
class NegativeSinkType extends SinkType {
39+
NegativeSinkType() { this = "non-sink" }
40+
}
41+
42+
/** A sink relevant to the SQL injection query */
43+
class SqlSinkType extends SinkType {
44+
SqlSinkType() { this = "sql" }
45+
}
46+
47+
/** A sink relevant to the tainted path injection query. */
48+
class TaintedPathSinkType extends SinkType {
49+
TaintedPathSinkType() { this = "tainted-path" }
50+
}
51+
52+
/** A sink relevant to the SSRF query. */
53+
class RequestForgerySinkType extends SinkType {
54+
RequestForgerySinkType() { this = "ssrf" }
55+
}
56+
57+
/** A sink relevant to the command injection query. */
58+
class CommandInjectionSinkType extends SinkType {
59+
CommandInjectionSinkType() { this = "command-injection" }
60+
}
Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
/**
2+
* For internal use only.
3+
*/
4+
5+
private import java
6+
private import semmle.code.Location as Location
7+
private import semmle.code.java.dataflow.DataFlow
8+
private import semmle.code.java.dataflow.TaintTracking
9+
private import semmle.code.java.security.PathCreation
10+
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
11+
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
12+
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
13+
private import semmle.code.java.Expr as Expr
14+
private import semmle.code.java.security.QueryInjection
15+
private import semmle.code.java.security.RequestForgery
16+
private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
17+
import AutomodelSharedCharacteristics as SharedCharacteristics
18+
import AutomodelEndpointTypes as AutomodelEndpointTypes
19+
20+
/**
21+
* A meta data extractor. Any Java extraction mode needs to implement exactly
22+
* one instance of this class.
23+
*/
24+
abstract class MetadataExtractor extends string {
25+
bindingset[this]
26+
MetadataExtractor() { any() }
27+
28+
abstract predicate hasMetadata(
29+
DataFlow::ParameterNode e, string package, string type, boolean subtypes, string name,
30+
string signature, int input
31+
);
32+
}
33+
34+
newtype JavaRelatedLocationType =
35+
MethodDoc() or
36+
ClassDoc()
37+
38+
/**
39+
* A candidates implementation for framework mode.
40+
*
41+
* Some important notes:
42+
* - This mode is using parameters as endpoints.
43+
* - Sink- and neutral-information is being used from MaD models.
44+
* - When available, we use method- and class-java-docs as related locations.
45+
*/
46+
module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
47+
// for documentation of the implementations here, see the QLDoc in the CandidateSig signature module.
48+
class Endpoint = DataFlow::ParameterNode;
49+
50+
class EndpointType = AutomodelEndpointTypes::EndpointType;
51+
52+
class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType;
53+
54+
class RelatedLocation = Location::Top;
55+
56+
class RelatedLocationType = JavaRelatedLocationType;
57+
58+
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
59+
predicate isSanitizer(Endpoint e, EndpointType t) { none() }
60+
61+
RelatedLocation asLocation(Endpoint e) { result = e.asParameter() }
62+
63+
predicate isKnownKind(string kind, string humanReadableKind, EndpointType type) {
64+
kind = "read-file" and
65+
humanReadableKind = "read file" and
66+
type instanceof AutomodelEndpointTypes::TaintedPathSinkType
67+
or
68+
kind = "create-file" and
69+
humanReadableKind = "create file" and
70+
type instanceof AutomodelEndpointTypes::TaintedPathSinkType
71+
or
72+
kind = "sql" and
73+
humanReadableKind = "mad modeled sql" and
74+
type instanceof AutomodelEndpointTypes::SqlSinkType
75+
or
76+
kind = "open-url" and
77+
humanReadableKind = "open url" and
78+
type instanceof AutomodelEndpointTypes::RequestForgerySinkType
79+
or
80+
kind = "jdbc-url" and
81+
humanReadableKind = "jdbc url" and
82+
type instanceof AutomodelEndpointTypes::RequestForgerySinkType
83+
or
84+
kind = "command-injection" and
85+
humanReadableKind = "command injection" and
86+
type instanceof AutomodelEndpointTypes::CommandInjectionSinkType
87+
}
88+
89+
predicate isSink(Endpoint e, string kind) {
90+
exists(string package, string type, string name, string signature, string ext, string input |
91+
sinkSpec(e, package, type, name, signature, ext, input) and
92+
ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, _)
93+
)
94+
}
95+
96+
predicate isNeutral(Endpoint e) {
97+
exists(string package, string type, string name, string signature |
98+
sinkSpec(e, package, type, name, signature, _, _) and
99+
ExternalFlow::neutralModel(package, type, name, [signature, ""], _, _)
100+
)
101+
}
102+
103+
additional predicate sinkSpec(
104+
Endpoint e, string package, string type, string name, string signature, string ext, string input
105+
) {
106+
FrameworkCandidatesImpl::getCallable(e).hasQualifiedName(package, type, name) and
107+
signature = ExternalFlow::paramsString(getCallable(e)) and
108+
ext = "" and
109+
exists(int paramIdx | e.isParameterOf(_, paramIdx) |
110+
if paramIdx = -1 then input = "Argument[this]" else input = "Argument[" + paramIdx + "]"
111+
)
112+
}
113+
114+
/**
115+
* Returns the related location for the given endpoint.
116+
*
117+
* Related locations can be JavaDoc comments of the class or the method.
118+
*/
119+
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
120+
type = MethodDoc() and
121+
result = FrameworkCandidatesImpl::getCallable(e).(Documentable).getJavadoc()
122+
or
123+
type = ClassDoc() and
124+
result = FrameworkCandidatesImpl::getCallable(e).getDeclaringType().(Documentable).getJavadoc()
125+
}
126+
127+
/**
128+
* Returns the callable that contains the given endpoint.
129+
*
130+
* Each Java mode should implement this predicate.
131+
*/
132+
additional Callable getCallable(Endpoint e) { result = e.getEnclosingCallable() }
133+
}
134+
135+
module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<FrameworkCandidatesImpl>;
136+
137+
class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic;
138+
139+
class Endpoint = FrameworkCandidatesImpl::Endpoint;
140+
141+
/*
142+
* Predicates that are used to surface prompt examples and candidates for classification with an ML model.
143+
*/
144+
145+
/**
146+
* A MetadataExtractor that extracts metadata for framework mode.
147+
*/
148+
class FrameworkModeMetadataExtractor extends MetadataExtractor {
149+
FrameworkModeMetadataExtractor() { this = "FrameworkModeMetadataExtractor" }
150+
151+
/**
152+
* By convention, the subtypes property of the MaD declaration should only be
153+
* true when there _can_ exist any subtypes with a different implementation.
154+
*
155+
* It would technically be ok to always use the value 'true', but this would
156+
* break convention.
157+
*/
158+
boolean considerSubtypes(Callable callable) {
159+
if
160+
callable.isStatic() or
161+
callable.getDeclaringType().isStatic() or
162+
callable.isFinal() or
163+
callable.getDeclaringType().isFinal()
164+
then result = false
165+
else result = true
166+
}
167+
168+
override predicate hasMetadata(
169+
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
170+
int input
171+
) {
172+
exists(Callable callable |
173+
e.asParameter() = callable.getParameter(input) and
174+
package = callable.getDeclaringType().getPackage().getName() and
175+
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
176+
subtypes = this.considerSubtypes(callable) and
177+
name = e.toString() and
178+
signature = ExternalFlow::paramsString(callable)
179+
)
180+
}
181+
}
182+
183+
/*
184+
* EndpointCharacteristic classes that are specific to Automodel for Java.
185+
*/
186+
187+
/**
188+
* A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink.
189+
*
190+
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
191+
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
192+
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
193+
*
194+
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
195+
*/
196+
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
197+
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
198+
199+
override predicate appliesToEndpoint(Endpoint e) {
200+
not FrameworkCandidatesImpl::isSink(e, _) and
201+
FrameworkCandidatesImpl::getCallable(e).getName().matches("is%") and
202+
FrameworkCandidatesImpl::getCallable(e).getReturnType() instanceof BooleanType
203+
}
204+
}
205+
206+
/**
207+
* A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a
208+
* sink.
209+
*
210+
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
211+
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
212+
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
213+
*/
214+
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
215+
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
216+
217+
override predicate appliesToEndpoint(Endpoint e) {
218+
not FrameworkCandidatesImpl::isSink(e, _) and
219+
exists(Callable callable |
220+
callable = FrameworkCandidatesImpl::getCallable(e) and
221+
callable.getName().toLowerCase() = ["exists", "notexists"] and
222+
callable.getReturnType() instanceof BooleanType
223+
)
224+
}
225+
}
226+
227+
/**
228+
* A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink.
229+
*/
230+
private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
231+
ExceptionCharacteristic() { this = "exception" }
232+
233+
override predicate appliesToEndpoint(Endpoint e) {
234+
FrameworkCandidatesImpl::getCallable(e).getDeclaringType().getASupertype*() instanceof
235+
TypeThrowable
236+
}
237+
}
238+
239+
/**
240+
* A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
241+
* are considered worth modeling.
242+
*/
243+
private class NotAModelApiParameter extends CharacteristicsImpl::UninterestingToModelCharacteristic {
244+
NotAModelApiParameter() { this = "not a model API parameter" }
245+
246+
override predicate appliesToEndpoint(Endpoint e) {
247+
not exists(ModelExclusions::ModelApi api | api.getAParameter() = e.asParameter())
248+
}
249+
}
250+
251+
/**
252+
* A negative characteristic that filters out non-public methods. Non-public methods are not interesting to include in
253+
* the standard Java modeling, because they cannot be called from outside the package.
254+
*/
255+
private class NonPublicMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic
256+
{
257+
NonPublicMethodCharacteristic() { this = "non-public method" }
258+
259+
override predicate appliesToEndpoint(Endpoint e) {
260+
not FrameworkCandidatesImpl::getCallable(e).isPublic()
261+
}
262+
}
263+
264+
/**
265+
* Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
266+
* characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with
267+
* an error message indicating why this combination is problematic.
268+
*
269+
* Copied from
270+
* javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
271+
*/
272+
predicate erroneousEndpoints(
273+
Endpoint endpoint, EndpointCharacteristic characteristic,
274+
AutomodelEndpointTypes::EndpointType endpointType, float confidence, string errorMessage,
275+
boolean ignoreKnownModelingErrors
276+
) {
277+
// An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one
278+
// sink/source type (including the negative type).
279+
exists(
280+
EndpointCharacteristic characteristic2, AutomodelEndpointTypes::EndpointType endpointClass2,
281+
float confidence2
282+
|
283+
endpointType != endpointClass2 and
284+
(
285+
endpointType instanceof AutomodelEndpointTypes::SinkType and
286+
endpointClass2 instanceof AutomodelEndpointTypes::SinkType
287+
or
288+
endpointType instanceof AutomodelEndpointTypes::SourceType and
289+
endpointClass2 instanceof AutomodelEndpointTypes::SourceType
290+
) and
291+
characteristic.appliesToEndpoint(endpoint) and
292+
characteristic2.appliesToEndpoint(endpoint) and
293+
characteristic.hasImplications(endpointType, true, confidence) and
294+
characteristic2.hasImplications(endpointClass2, true, confidence2) and
295+
confidence > SharedCharacteristics::mediumConfidence() and
296+
confidence2 > SharedCharacteristics::mediumConfidence() and
297+
(
298+
ignoreKnownModelingErrors = true and
299+
not knownOverlappingCharacteristics(characteristic, characteristic2)
300+
or
301+
ignoreKnownModelingErrors = false
302+
)
303+
) and
304+
errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
305+
or
306+
// An endpoint's characteristics should not include positive indicators with medium/high confidence for some class and
307+
// also include negative indicators with medium/high confidence for this same class.
308+
exists(EndpointCharacteristic characteristic2, float confidence2 |
309+
characteristic.appliesToEndpoint(endpoint) and
310+
characteristic2.appliesToEndpoint(endpoint) and
311+
characteristic.hasImplications(endpointType, true, confidence) and
312+
characteristic2.hasImplications(endpointType, false, confidence2) and
313+
confidence > SharedCharacteristics::mediumConfidence() and
314+
confidence2 > SharedCharacteristics::mediumConfidence()
315+
) and
316+
ignoreKnownModelingErrors = false and
317+
errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
318+
}
319+
320+
/**
321+
* Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that
322+
* have some overlap in their results. This indicates a problem with the underlying Java modeling. Specifically,
323+
* `PathCreation` is prone to FPs.
324+
*/
325+
private predicate knownOverlappingCharacteristics(
326+
EndpointCharacteristic characteristic1, EndpointCharacteristic characteristic2
327+
) {
328+
characteristic1 != characteristic2 and
329+
characteristic1 = ["mad taint step", "create path", "read file", "known non-sink"] and
330+
characteristic2 = ["mad taint step", "create path", "read file", "known non-sink"]
331+
}

0 commit comments

Comments
 (0)