Skip to content

Commit 710b215

Browse files
authored
Merge pull request github#11263 from github/tiferet/extract-training-data
ATM: Extract training data
2 parents 2ffb4b6 + fc078a4 commit 710b215

File tree

8 files changed

+246
-248
lines changed

8 files changed

+246
-248
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ abstract class EndpointCharacteristic extends string {
4545
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
4646
);
4747

48+
/** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */
49+
final float getHighConfidenceThreshold() { result = 0.8 }
50+
4851
// The following are some confidence values that are used in practice by the subclasses. They are defined as named
4952
// constants here to make it easier to change them in the future.
5053
final float maximalConfidence() { result = 1.0 }

javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/DebugResultInclusion.ql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import javascript
1313
import experimental.adaptivethreatmodeling.ATMConfig
14-
import extraction.ExtractEndpointData
14+
import extraction.ExtractEndpointDataTraining
1515

1616
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
1717
query instanceof NosqlInjectionQuery and
@@ -33,7 +33,7 @@ string getDescriptionForAlertCandidate(
3333
) {
3434
result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
3535
or
36-
getAtmCfg(query).isKnownSink(sinkCandidate) and
36+
getDataFlowCfg(query).(AtmConfig).isKnownSink(sinkCandidate) and
3737
result = "excluded[reason=known-sink]"
3838
or
3939
not exists(getAReasonSinkExcluded(sinkCandidate, query)) and

javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.ql

Lines changed: 0 additions & 11 deletions
This file was deleted.

javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll

Lines changed: 0 additions & 215 deletions
This file was deleted.

javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.ql

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,8 @@
44
* Extracts training data we can use to train ML models for ML-powered queries.
55
*/
66

7-
import javascript
8-
import ExtractEndpointData as ExtractEndpointData
7+
private import ExtractEndpointDataTraining as ExtractEndpointDataTraining
98

10-
query predicate endpoints(
11-
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
12-
) {
13-
ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
14-
// only select endpoints that are either Sink or NotASink
15-
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink"], "string") and
16-
// do not select endpoints filtered out by end-to-end evaluation
17-
ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
18-
"boolean") and
19-
// only select endpoints that can be part of a tainted flow
20-
ExtractEndpointData::endpoints(endpoint, queryName, "isConstantExpression", "false", "boolean")
21-
}
9+
query predicate endpoints = ExtractEndpointDataTraining::reformattedTrainingEndpoints/5;
2210

23-
query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
24-
endpoints(endpoint, _, _, _, _) and
25-
ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
26-
}
11+
query predicate tokenFeatures = ExtractEndpointDataTraining::tokenFeatures/3;

0 commit comments

Comments
 (0)