|
12 | 12 | * @tags internal extract automodel application-mode candidates
|
13 | 13 | */
|
14 | 14 |
|
| 15 | +import java |
15 | 16 | private import AutomodelApplicationModeCharacteristics
|
16 | 17 | private import AutomodelJavaUtil
|
17 | 18 |
|
| 19 | +/** |
| 20 | + * Gets a sample of endpoints (of at most `limit` samples) with the given method signature. |
| 21 | + * |
| 22 | + * The main purpose of this helper predicate is to avoid selecting too many candidates, as this may |
| 23 | + * cause the SARIF file to exceed the maximum size limit. |
| 24 | + */ |
| 25 | +bindingset[limit] |
| 26 | +private Endpoint getSampleForSignature( |
| 27 | + int limit, string package, string type, string subtypes, string name, string signature, |
| 28 | + string input |
| 29 | +) { |
| 30 | + exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta | |
| 31 | + num_endpoints = |
| 32 | + count(Endpoint e | meta.hasMetadata(e, package, type, subtypes, name, signature, input)) |
| 33 | + | |
| 34 | + result = |
| 35 | + rank[n](Endpoint e, Location loc | |
| 36 | + loc = e.getLocation() and |
| 37 | + meta.hasMetadata(e, package, type, subtypes, name, signature, input) |
| 38 | + | |
| 39 | + e |
| 40 | + order by |
| 41 | + loc.getFile().getAbsolutePath(), loc.getStartLine(), loc.getStartColumn(), |
| 42 | + loc.getEndLine(), loc.getEndColumn() |
| 43 | + ) and |
| 44 | + // To avoid selecting samples that are too close together (as the ranking above goes by file |
| 45 | + // path first), we select `limit` evenly spaced samples from the ranked list of endpoints. By |
| 46 | + // default this would always include the first sample, so we add a random-chosen prime offset |
| 47 | + // to the first sample index, and reduce modulo the number of endpoints. |
| 48 | + // Finally, we add 1 to the result, as ranking results in a 1-indexed relation. |
| 49 | + n = 1 + (([0 .. limit - 1] * (num_endpoints / limit).floor() + 46337) % num_endpoints) |
| 50 | + ) |
| 51 | +} |
| 52 | + |
18 | 53 | from
|
19 | 54 | Endpoint endpoint, string message, ApplicationModeMetadataExtractor meta, DollarAtString package,
|
20 | 55 | DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
|
|
23 | 58 | not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
|
24 | 59 | u.appliesToEndpoint(endpoint)
|
25 | 60 | ) and
|
| 61 | + endpoint = getSampleForSignature(9, package, type, subtypes, name, signature, input) and |
26 | 62 | // If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
|
27 | 63 | // don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
|
28 | 64 | // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
|
|
0 commit comments