@@ -239,13 +239,12 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
239
239
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
240
240
predicate isSanitizer ( Endpoint e , EndpointType t ) {
241
241
exists ( t ) and
242
- (
243
- e .asNode ( ) .getType ( ) instanceof BoxedType
244
- or
245
- e .asNode ( ) .getType ( ) instanceof PrimitiveType
246
- or
247
- e .asNode ( ) .getType ( ) instanceof NumberType
248
- )
242
+ AutomodelJavaUtil:: isUnexploitableType ( [
243
+ // for most endpoints, we can get the type from the node
244
+ e .asNode ( ) .getType ( ) ,
245
+ // but not for calls to void methods, where we need to go via the AST
246
+ e .asTop ( ) .( Expr ) .getType ( )
247
+ ] )
249
248
or
250
249
t instanceof AutomodelEndpointTypes:: PathInjectionSinkType and
251
250
e .asNode ( ) instanceof PathSanitizer:: PathInjectionSanitizer
@@ -372,62 +371,124 @@ class ApplicationModeMetadataExtractor extends string {
372
371
}
373
372
}
374
373
374
+ /**
375
+ * Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
376
+ *
377
+ * The other parameters record various other properties of interest.
378
+ */
379
+ predicate isCandidate (
380
+ Endpoint endpoint , string package , string type , string subtypes , string name , string signature ,
381
+ string input , string output , string isVarargs , string extensibleType , string alreadyAiModeled
382
+ ) {
383
+ CharacteristicsImpl:: isCandidate ( endpoint , _) and
384
+ not exists ( CharacteristicsImpl:: UninterestingToModelCharacteristic u |
385
+ u .appliesToEndpoint ( endpoint )
386
+ ) and
387
+ any ( ApplicationModeMetadataExtractor meta )
388
+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output , isVarargs ,
389
+ alreadyAiModeled , extensibleType ) and
390
+ // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
391
+ // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
392
+ // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
393
+ // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
394
+ // types, and we don't need to reexamine it.
395
+ alreadyAiModeled .matches ( [ "" , "%ai-%" ] ) and
396
+ AutomodelJavaUtil:: includeAutomodelCandidate ( package , type , name , signature )
397
+ }
398
+
399
+ /**
400
+ * Holds if the given `endpoint` is a negative example for the `extensibleType`
401
+ * because of the `characteristic`.
402
+ *
403
+ * The other parameters record various other properties of interest.
404
+ */
405
+ predicate isNegativeExample (
406
+ Endpoint endpoint , EndpointCharacteristic characteristic , float confidence , string package ,
407
+ string type , string subtypes , string name , string signature , string input , string output ,
408
+ string isVarargsArray , string extensibleType
409
+ ) {
410
+ characteristic .appliesToEndpoint ( endpoint ) and
411
+ // the node is known not to be an endpoint of any appropriate type
412
+ forall ( AutomodelEndpointTypes:: EndpointType tp |
413
+ tp = CharacteristicsImpl:: getAPotentialType ( endpoint )
414
+ |
415
+ characteristic .hasImplications ( tp , false , _)
416
+ ) and
417
+ // the lowest confidence across all endpoint types should be at least highConfidence
418
+ confidence =
419
+ min ( float c |
420
+ characteristic .hasImplications ( CharacteristicsImpl:: getAPotentialType ( endpoint ) , false , c )
421
+ ) and
422
+ confidence >= SharedCharacteristics:: highConfidence ( ) and
423
+ any ( ApplicationModeMetadataExtractor meta )
424
+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output ,
425
+ isVarargsArray , _, extensibleType ) and
426
+ // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
427
+ // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
428
+ not exists ( EndpointCharacteristic characteristic2 , float confidence2 |
429
+ characteristic2 != characteristic
430
+ |
431
+ characteristic2 .appliesToEndpoint ( endpoint ) and
432
+ confidence2 >= SharedCharacteristics:: maximalConfidence ( ) and
433
+ characteristic2
434
+ .hasImplications ( CharacteristicsImpl:: getAPotentialType ( endpoint ) , true , confidence2 )
435
+ )
436
+ }
437
+
438
+ /**
439
+ * Holds if the given `endpoint` is a positive example for the `endpointType`.
440
+ *
441
+ * The other parameters record various other properties of interest.
442
+ */
443
+ predicate isPositiveExample (
444
+ Endpoint endpoint , string endpointType , string package , string type , string subtypes , string name ,
445
+ string signature , string input , string output , string isVarargsArray , string extensibleType
446
+ ) {
447
+ any ( ApplicationModeMetadataExtractor meta )
448
+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output ,
449
+ isVarargsArray , _, extensibleType ) and
450
+ CharacteristicsImpl:: isKnownAs ( endpoint , endpointType , _) and
451
+ exists ( CharacteristicsImpl:: getRelatedLocationOrCandidate ( endpoint , CallContext ( ) ) )
452
+ }
453
+
375
454
/*
376
455
* EndpointCharacteristic classes that are specific to Automodel for Java.
377
456
*/
378
457
379
458
/**
380
- * A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
381
- * and its return value should not be considered a source.
459
+ * A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks.
382
460
*
383
461
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
384
462
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
385
463
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
386
464
*
387
465
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
388
466
*/
389
- private class UnexploitableIsCharacteristic extends CharacteristicsImpl:: NeitherSourceNorSinkCharacteristic
390
- {
467
+ private class UnexploitableIsCharacteristic extends CharacteristicsImpl:: NotASinkCharacteristic {
391
468
UnexploitableIsCharacteristic ( ) { this = "unexploitable (is-style boolean method)" }
392
469
393
470
override predicate appliesToEndpoint ( Endpoint e ) {
394
471
e .getCallable ( ) .getName ( ) .matches ( "is%" ) and
395
472
e .getCallable ( ) .getReturnType ( ) instanceof BooleanType and
396
- (
397
- e .getExtensibleType ( ) = "sinkModel" and
398
- not ApplicationCandidatesImpl:: isSink ( e , _, _)
399
- or
400
- e .getExtensibleType ( ) = "sourceModel" and
401
- not ApplicationCandidatesImpl:: isSource ( e , _, _) and
402
- e .getMaDOutput ( ) = "ReturnValue"
403
- )
473
+ not ApplicationCandidatesImpl:: isSink ( e , _, _)
404
474
}
405
475
}
406
476
407
477
/**
408
478
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
409
- * considered sinks, and its return value should not be considered a source .
479
+ * considered sinks.
410
480
*
411
481
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
412
482
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
413
483
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
414
484
*/
415
- private class UnexploitableExistsCharacteristic extends CharacteristicsImpl:: NeitherSourceNorSinkCharacteristic
416
- {
485
+ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl:: NotASinkCharacteristic {
417
486
UnexploitableExistsCharacteristic ( ) { this = "unexploitable (existence-checking boolean method)" }
418
487
419
488
override predicate appliesToEndpoint ( Endpoint e ) {
420
- exists ( Callable callable |
421
- callable = e .getCallable ( ) and
489
+ exists ( Callable callable | callable = e .getCallable ( ) |
422
490
callable .getName ( ) .toLowerCase ( ) = [ "exists" , "notexists" ] and
423
491
callable .getReturnType ( ) instanceof BooleanType
424
- |
425
- e .getExtensibleType ( ) = "sinkModel" and
426
- not ApplicationCandidatesImpl:: isSink ( e , _, _)
427
- or
428
- e .getExtensibleType ( ) = "sourceModel" and
429
- not ApplicationCandidatesImpl:: isSource ( e , _, _) and
430
- e .getMaDOutput ( ) = "ReturnValue"
431
492
)
432
493
}
433
494
}
0 commit comments