Skip to content

Commit dff8259

Browse files
author
Stephan Brandauer
committed
Java: support remote sources in automodel positive example extraction
1 parent fcabca4 commit dff8259

13 files changed

+110
-31
lines changed

java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,13 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
205205
isCustomSink(e, kind) and provenance = "custom-sink"
206206
}
207207

208+
predicate isSource(Endpoint e, string kind, string provenance) {
209+
exists(string package, string type, string name, string signature, string ext, string output |
210+
sourceSpec(e, package, type, name, signature, ext, output) and
211+
ExternalFlow::sourceModel(package, type, _, name, [signature, ""], ext, output, kind, provenance)
212+
)
213+
}
214+
208215
predicate isNeutral(Endpoint e) {
209216
exists(string package, string type, string name, string signature |
210217
sinkSpec(e, package, type, name, signature, _, _) and
@@ -222,6 +229,15 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
222229
input = e.getMaDInput()
223230
}
224231

232+
additional predicate sourceSpec(
233+
Endpoint e, string package, string type, string name, string signature, string ext, string output
234+
) {
235+
ApplicationModeGetCallable::getCallable(e).hasQualifiedName(package, type, name) and
236+
signature = ExternalFlow::paramsString(ApplicationModeGetCallable::getCallable(e)) and
237+
ext = "" and
238+
output = e.getMaDOutput()
239+
}
240+
225241
/**
226242
* Gets the related location for the given endpoint.
227243
*

java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ where
8383
// a non-sink, and we surface only endpoints that have at least one such sink type.
8484
message =
8585
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
86-
not CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and
86+
not CharacteristicsImpl::isKnownAs(endpoint, sinkType, _) and
8787
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
8888
|
8989
sinkType, ", "

java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ private import AutomodelEndpointTypes
1313
private import AutomodelJavaUtil
1414

1515
from
16-
Endpoint endpoint, SinkType sinkType, ApplicationModeMetadataExtractor meta,
16+
Endpoint endpoint, EndpointType endpointType, ApplicationModeMetadataExtractor meta,
1717
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
1818
DollarAtString signature, DollarAtString input, DollarAtString output, DollarAtString isVarargsArray
1919
where
@@ -22,10 +22,10 @@ where
2222
not erroneousEndpoints(endpoint, _, _, _, _, false) and
2323
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
2424
// Extract positive examples of sinks belonging to the existing ATM query configurations.
25-
CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and
25+
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
2626
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
2727
select endpoint.asNode(),
28-
sinkType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@.", //
28+
endpointType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@.", //
2929
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
3030
package, "package", //
3131
type, "type", //

java/ql/automodel/src/AutomodelEndpointTypes.qll

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,6 @@ abstract class SinkType extends EndpointType {
2828
SinkType() { any() }
2929
}
3030

31-
/** A class for source types that can be predicted by a classifier. */
32-
abstract class SourceType extends EndpointType {
33-
bindingset[this]
34-
SourceType() { any() }
35-
}
36-
3731
/** The `Negative` class for non-sinks. */
3832
class NegativeSinkType extends SinkType {
3933
NegativeSinkType() { this = "non-sink" }
@@ -58,3 +52,14 @@ class RequestForgerySinkType extends SinkType {
5852
class CommandInjectionSinkType extends SinkType {
5953
CommandInjectionSinkType() { this = "command-injection" }
6054
}
55+
56+
/** A class for source types that can be predicted by a classifier. */
57+
abstract class SourceType extends EndpointType {
58+
bindingset[this]
59+
SourceType() { any() }
60+
}
61+
62+
/** A source of remote data. */
63+
class RemoteSourceType extends SourceType {
64+
RemoteSourceType() { this = "remote" }
65+
}

java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
117117
)
118118
}
119119

120+
predicate isSource(Endpoint e, string kind, string provenance) {
121+
none() // TODO: implement
122+
}
123+
120124
predicate isNeutral(Endpoint e) {
121125
exists(string package, string type, string name, string signature |
122126
sinkSpec(e, package, type, name, signature, _, _) and

java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ where
4040
// a non-sink, and we surface only endpoints that have at least one such sink type.
4141
message =
4242
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
43-
not CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and
43+
not CharacteristicsImpl::isKnownAs(endpoint, sinkType, _) and
4444
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
4545
|
4646
sinkType, ", "

java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ where
2222
not erroneousEndpoints(endpoint, _, _, _, _, false) and
2323
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, parameterName) and
2424
// Extract positive examples of sinks belonging to the existing ATM query configurations.
25-
CharacteristicsImpl::isKnownSink(endpoint, sinkType, _)
25+
CharacteristicsImpl::isKnownAs(endpoint, sinkType, _)
2626
select endpoint,
2727
sinkType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //
2828
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //

java/ql/automodel/src/AutomodelJavaUtil.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ predicate isKnownKind(string kind, AutomodelEndpointTypes::EndpointType type) {
3939
or
4040
kind = "command-injection" and
4141
type instanceof AutomodelEndpointTypes::CommandInjectionSinkType
42+
or
43+
kind = "remote" and
44+
type instanceof AutomodelEndpointTypes::RemoteSourceType
4245
}
4346

4447
/** Gets the models-as-data description for the method argument with the index `index`. */

java/ql/automodel/src/AutomodelSharedCharacteristics.qll

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ signature module CandidateSig {
6262
*/
6363
predicate isSink(Endpoint e, string kind, string provenance);
6464

65+
/**
66+
* Holds if `e` is a sink with the label `kind`, and provenance `provenance`.
67+
*/
68+
predicate isSource(Endpoint e, string kind, string provenance);
69+
6570
/**
6671
* Holds if `e` is not a sink of any kind.
6772
*/
@@ -92,16 +97,16 @@ module SharedCharacteristics<CandidateSig Candidate> {
9297
predicate isNeutral = Candidate::isNeutral/1;
9398

9499
/**
95-
* Holds if `sink` is a known sink of type `endpointType`.
100+
* Holds if `endpoint` is modeled as `endpointType` (endpoint type must not be negative).
96101
*/
97-
predicate isKnownSink(
98-
Candidate::Endpoint sink, Candidate::EndpointType endpointType,
102+
predicate isKnownAs(
103+
Candidate::Endpoint endpoint, Candidate::EndpointType endpointType,
99104
EndpointCharacteristic characteristic
100105
) {
101106
// If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
102107
// known sink for the class.
103108
not endpointType instanceof Candidate::NegativeEndpointType and
104-
characteristic.appliesToEndpoint(sink) and
109+
characteristic.appliesToEndpoint(endpoint) and
105110
characteristic.hasImplications(endpointType, true, maximalConfidence())
106111
}
107112

@@ -209,6 +214,25 @@ module SharedCharacteristics<CandidateSig Candidate> {
209214
}
210215
}
211216

217+
/**
218+
* A high-confidence characteristic that indicates that an endpoint is a source of a specified type. These endpoints can
219+
* be used as positive samples for training or for a few-shot prompt.
220+
*/
221+
abstract class SourceCharacteristic extends EndpointCharacteristic {
222+
bindingset[this]
223+
SourceCharacteristic() { any() }
224+
225+
abstract Candidate::EndpointType getSourceType();
226+
227+
final override predicate hasImplications(
228+
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
229+
) {
230+
endpointType = this.getSourceType() and
231+
isPositiveIndicator = true and
232+
confidence = maximalConfidence()
233+
}
234+
}
235+
212236
/**
213237
* A high-confidence characteristic that indicates that an endpoint is not a sink of any type. These endpoints can be
214238
* used as negative samples for training or for a few-shot prompt.
@@ -292,6 +316,25 @@ module SharedCharacteristics<CandidateSig Candidate> {
292316
override Candidate::EndpointType getSinkType() { result = endpointType }
293317
}
294318

319+
private class KnownSourceCharacteristic extends SourceCharacteristic {
320+
string madKind;
321+
Candidate::EndpointType endpointType;
322+
string provenance;
323+
324+
KnownSourceCharacteristic() {
325+
Candidate::isKnownKind(madKind, endpointType) and
326+
// bind "this" to a unique string differing from that of the SinkType classes
327+
this = madKind + "_" + provenance + "_characteristic" and
328+
Candidate::isSource(_, madKind, provenance)
329+
}
330+
331+
override predicate appliesToEndpoint(Candidate::Endpoint e) {
332+
Candidate::isSource(e, madKind, provenance)
333+
}
334+
335+
override Candidate::EndpointType getSourceType() { result = endpointType }
336+
}
337+
295338
/**
296339
* A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
297340
*/

0 commit comments

Comments
 (0)