Skip to content

Commit cf4e37a

Browse files
committed
Implement the standard endpoint filters as EndpointCharacteristics
1 parent cb632b3 commit cf4e37a

File tree

2 files changed

+122
-1
lines changed

2 files changed

+122
-1
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ private import semmle.javascript.security.dataflow.SqlInjectionCustomizations
77
private import semmle.javascript.security.dataflow.DomBasedXssCustomizations
88
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
99
private import semmle.javascript.security.dataflow.TaintedPathCustomizations
10+
private import CoreKnowledge as CoreKnowledge
11+
private import semmle.javascript.heuristics.SyntacticHeuristics
12+
private import semmle.javascript.filters.ClassifyFiles as ClassifyFiles
1013

1114
/**
1215
* A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
@@ -135,7 +138,8 @@ private class NosqlInjectionSinkCharacteristic extends EndpointCharacteristic {
135138
}
136139

137140
/*
138-
* Characteristics that are indicative of not being a sink of any type.
141+
* Characteristics that are indicative of not being a sink of any type, and have historically been used to select
142+
* negative samples for training.
139143
*/
140144

141145
/**
@@ -442,3 +446,112 @@ private class BuiltinCallNameCharacteristic extends ArgumentToBuiltinFunctionCha
442446
)
443447
}
444448
}
449+
450+
/*
451+
* Characteristics that have historically acted as endpoint filters to exclude endpoints from scoring at inference time.
452+
*/
453+
454+
/** A characteristic that has historically acted as an endpoint filter for inference-time scoring. */
455+
abstract class EndpointFilterCharacteristic extends EndpointCharacteristic {
456+
bindingset[this]
457+
EndpointFilterCharacteristic() { any() }
458+
}
459+
460+
/**
461+
* An EndpointFilterCharacteristic that indicates that an endpoint is unlikely to be a sink of any type.
462+
* Replaces https://github.com/github/codeql/blob/387e57546bf7352f7c1cfe781daa1a3799b7063e/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/StandardEndpointFilters.qll#LL15C24-L15C24
463+
*/
464+
abstract class StandardEndpointFilterCharacteristic extends EndpointFilterCharacteristic {
465+
bindingset[this]
466+
StandardEndpointFilterCharacteristic() { any() }
467+
468+
override predicate getImplications(
469+
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
470+
) {
471+
endpointClass instanceof NegativeType and
472+
isPositiveIndicator = true and
473+
confidence = mediumConfidence()
474+
}
475+
}
476+
477+
private class IsArgumentToModeledFunctionCharacteristic extends StandardEndpointFilterCharacteristic {
478+
IsArgumentToModeledFunctionCharacteristic() { this = "argument to modeled function" }
479+
480+
override predicate getEndpoints(DataFlow::Node n) {
481+
exists(DataFlow::InvokeNode invk, DataFlow::Node known |
482+
invk.getAnArgument() = n and
483+
invk.getAnArgument() = known and
484+
(
485+
CoreKnowledge::isKnownLibrarySink(known)
486+
or
487+
CoreKnowledge::isKnownStepSrc(known)
488+
or
489+
CoreKnowledge::isOtherModeledArgument(known, _)
490+
)
491+
)
492+
}
493+
}
494+
495+
private class IsArgumentToSinklessLibraryCharacteristic extends StandardEndpointFilterCharacteristic {
496+
IsArgumentToSinklessLibraryCharacteristic() { this = "argument to sinkless library" }
497+
498+
override predicate getEndpoints(DataFlow::Node n) {
499+
exists(DataFlow::InvokeNode invk, DataFlow::SourceNode commonSafeLibrary, string libraryName |
500+
libraryName = ["slugify", "striptags", "marked"]
501+
|
502+
commonSafeLibrary = DataFlow::moduleImport(libraryName) and
503+
invk = [commonSafeLibrary, commonSafeLibrary.getAPropertyRead()].getAnInvocation() and
504+
n = invk.getAnArgument()
505+
)
506+
}
507+
}
508+
509+
private class IsSanitizerCharacteristic extends StandardEndpointFilterCharacteristic {
510+
IsSanitizerCharacteristic() { this = "sanitizer" }
511+
512+
override predicate getEndpoints(DataFlow::Node n) {
513+
exists(DataFlow::CallNode call | n = call.getAnArgument() |
514+
call.getCalleeName().regexpMatch("(?i).*(escape|valid(ate)?|sanitize|purify).*")
515+
)
516+
}
517+
}
518+
519+
private class IsPredicateCharacteristic extends StandardEndpointFilterCharacteristic {
520+
IsPredicateCharacteristic() { this = "predicate" }
521+
522+
override predicate getEndpoints(DataFlow::Node n) {
523+
exists(DataFlow::CallNode call | n = call.getAnArgument() |
524+
call.getCalleeName().regexpMatch("(equals|(|is|has|can)(_|[A-Z])).*")
525+
)
526+
}
527+
}
528+
529+
private class IsHashCharacteristic extends StandardEndpointFilterCharacteristic {
530+
IsHashCharacteristic() { this = "hash" }
531+
532+
override predicate getEndpoints(DataFlow::Node n) {
533+
exists(DataFlow::CallNode call | n = call.getAnArgument() |
534+
call.getCalleeName().regexpMatch("(?i)^(sha\\d*|md5|hash)$")
535+
)
536+
}
537+
}
538+
539+
private class IsNumericCharacteristic extends StandardEndpointFilterCharacteristic {
540+
IsNumericCharacteristic() { this = "numeric" }
541+
542+
override predicate getEndpoints(DataFlow::Node n) { isReadFrom(n, ".*index.*") }
543+
}
544+
545+
private class InIrrelevantFileCharacteristic extends StandardEndpointFilterCharacteristic {
546+
private string category;
547+
548+
InIrrelevantFileCharacteristic() {
549+
this = "in " + category + " file" and category = ["externs", "generated", "library", "test"]
550+
}
551+
552+
override predicate getEndpoints(DataFlow::Node n) {
553+
// Ignore candidate sinks within externs, generated, library, and test code
554+
ClassifyFiles::classify(n.getFile(), category) and
555+
this = "in " + category + " file"
556+
}
557+
}

javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ query predicate trainingEndpoints(
7373
c instanceof LikelyNotASinkCharacteristic
7474
)
7575
) and
76+
// Don't surface endpoint filters as characteristics, because they were previously not surfaced.
77+
// TODO: Experiment with surfacing these to the modeling code by removing the following line (and then make
78+
// EndpointFilterCharacteristic private).
79+
not characteristic instanceof EndpointFilterCharacteristic and
7680
(
7781
// If the list of characteristics includes positive indicators with high confidence for this class, select this as a
7882
// training sample belonging to the class.
@@ -188,6 +192,10 @@ query predicate reformattedTrainingEndpoints(
188192
confidence3 >= characteristic3.getHighConfidenceThreshold() and
189193
not posClass instanceof NegativeType
190194
) and
195+
// Don't surface endpoint filters as notASinkReasons, because they were previously not surfaced.
196+
// TODO: Experiment with surfacing these to the modeling code by removing the following line (and then make
197+
// EndpointFilterCharacteristic private).
198+
not exists(EndpointFilterCharacteristic filterCharacteristic | value = filterCharacteristic) and
191199
valueType = "string"
192200
)
193201
)

0 commit comments

Comments
 (0)