Skip to content

Commit 943b2a2

Browse files
committed
Python: Highlight problem with flow summaries and TAttributeContent
1 parent c85d99d commit 943b2a2

File tree

6 files changed

+62
-1
lines changed

6 files changed

+62
-1
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import LocalSources
1010
private import semmle.python.essa.SsaCompute
1111
private import semmle.python.dataflow.new.internal.ImportStar
1212
private import FlowSummaryImpl as FlowSummaryImpl
13+
private import semmle.python.frameworks.data.ModelsAsData
1314

1415
/**
1516
* IPA type for data flow nodes.
@@ -597,7 +598,30 @@ newtype TContent =
597598
/** An element of a dictionary under any key. */
598599
TDictionaryElementAnyContent() or
599600
/** An object attribute. */
600-
TAttributeContent(string attr) { attr = any(Attribute a).getName() }
601+
TAttributeContent(string attr) {
602+
attr = any(Attribute a).getName()
603+
or
604+
// Flow summaries that target attributes rely on a TAttributeContent being
605+
// available. However, since the code above only constructs a TAttributeContent
606+
// based on the attribute names seen in the DB, we can end up in a scenario where
607+
// flow summaries don't work due to missing TAttributeContent. To get around this,
608+
// we need to add the attribute names used by flow summaries. This needs to be done
609+
// both for the summaries written in QL and the ones written in data-extension
610+
// files.
611+
//
612+
// 1) Summaries in QL. Sadly the following code leads to non-monotonic recursion
613+
// name = any(AccessPathToken a).getAnArgument("Attribute")
614+
// instead we use a qltest to alert if we write a new summary in QL that uses an
615+
// attribute -- see
616+
// python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql
617+
none() // to be filled out in next commit
618+
or
619+
//
620+
// 2) summaries in data-extension files
621+
exists(string input, string output | ModelOutput::relevantSummaryModel(_, _, input, output, _) |
622+
attr = [input, output].regexpFind("(?<=(^|\\.)Attribute\\[)[^\\]]+(?=\\])", _, _).trim()
623+
)
624+
}
601625

602626
/**
603627
* A data-flow value can have associated content.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# an empty file, since we want the test to run on an empty db
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
| compiled re.Match | Argument[self].Attribute[pattern] | Attribute[pattern] |
2+
| compiled re.Match | ReturnValue.Attribute[re].Attribute[pattern] | Attribute[pattern] |
3+
| compiled re.Match | ReturnValue.Attribute[re].Attribute[pattern] | Attribute[re] |
4+
| compiled re.Match | ReturnValue.Attribute[string] | Attribute[string] |
5+
| compiled re.subn | ReturnValue.TupleElement[0] | TupleElement[0] |
6+
| re.Match | ReturnValue.Attribute[re].Attribute[pattern] | Attribute[pattern] |
7+
| re.Match | ReturnValue.Attribute[re].Attribute[pattern] | Attribute[re] |
8+
| re.Match | ReturnValue.Attribute[string] | Attribute[string] |
9+
| re.Match.expand | Argument[self].Attribute[string] | Attribute[string] |
10+
| re.Match.group | Argument[self].Attribute[string] | Attribute[string] |
11+
| re.Match.groupdict | Argument[self].Attribute[string] | Attribute[string] |
12+
| re.Match.groups | Argument[self].Attribute[string] | Attribute[string] |
13+
| re.Pattern | ReturnValue.Attribute[pattern] | Attribute[pattern] |
14+
| re.subn | ReturnValue.TupleElement[0] | TupleElement[0] |
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import python
2+
import semmle.python.dataflow.new.FlowSummary
3+
import semmle.python.dataflow.new.internal.FlowSummaryImpl
4+
5+
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
6+
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
7+
Private::External::invalidSpecComponent(s, c)
8+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
| The attribute "pattern" is not a valid TAttributeContent, please add it to the hardcoded list of TAttributeContent in the dataflow library. |
2+
| The attribute "re" is not a valid TAttributeContent, please add it to the hardcoded list of TAttributeContent in the dataflow library. |
3+
| The attribute "string" is not a valid TAttributeContent, please add it to the hardcoded list of TAttributeContent in the dataflow library. |
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import python
2+
import semmle.python.dataflow.new.FlowSummary
3+
import semmle.python.dataflow.new.internal.FlowSummaryImpl
4+
5+
from SummarizedCallable sc, string s, string c, string attr
6+
where
7+
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
8+
Private::External::invalidSpecComponent(s, c) and
9+
c = "Attribute[" + attr + "]"
10+
select "The attribute \"" + attr +
11+
"\" is not a valid TAttributeContent, please add it to the hardcoded list of TAttributeContent in the dataflow library."

0 commit comments

Comments
 (0)