Skip to content

Commit c740894

Browse files
committed
Python: Move MatchUnpacking to own file
I had hoped that git would be able to see this as a rename, and therefore I haven't done autoformat
1 parent a43f3a2 commit c740894

File tree

2 files changed

+316
-314
lines changed

2 files changed

+316
-314
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 3 additions & 314 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ private import semmle.python.dataflow.new.internal.ImportStar
1212
//
1313
// This matches behavior in C#.
1414
private import semmle.python.Frameworks
15+
// part of the implementation for this module has been spread over multiple files to
16+
// make it more digestible.
17+
import MatchUnpacking
1518

1619
/** Gets the callable in which this node occurs. */
1720
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -1659,320 +1662,6 @@ module IterableUnpacking {
16591662

16601663
import IterableUnpacking
16611664

1662-
/**
1663-
* There are a number of patterns available for the match statement.
1664-
* Each one transfers data and content differently to its parts.
1665-
*
1666-
* Furthermore, given a successful match, we can infer some data about
1667-
* the subject. Consider the example:
1668-
* ```python
1669-
* match choice:
1670-
* case 'Y':
1671-
* ...body
1672-
* ```
1673-
* Inside `body`, we know that `choice` has the value `'Y'`.
1674-
*
1675-
* A similar thing happens with the "as pattern". Consider the example:
1676-
* ```python
1677-
* match choice:
1678-
* case ('y'|'Y') as c:
1679-
* ...body
1680-
* ```
1681-
* By the binding rules, there is data flow from `choice` to `c`. But we
1682-
* can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
1683-
*
1684-
* We will treat such inferences separately as guards. First we will model the data flow
1685-
* stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
1686-
* top-level subject of the match, but rather the part recursively matched by the current pattern.
1687-
* For instance, in the example:
1688-
* ```python
1689-
* match command:
1690-
* case ('quit' as c) | ('go', ('up'|'down') as c):
1691-
* ...body
1692-
* ```
1693-
* `command` is the subject of first the as-pattern, while the second component of `command`
1694-
* is the subject of the second as-pattern. As such, 'subject' refers to the pattern under evaluation.
1695-
*
1696-
* - as pattern: subject flows to alias as well as to the interior pattern
1697-
* - or pattern: subject flows to each alternative
1698-
* - literal pattern: flow from the literal to the pattern, to add information
1699-
* - capture pattern: subject flows to the variable
1700-
* - wildcard pattern: no flow
1701-
* - value pattern: flow from the value to the pattern, to add information
1702-
* - sequence pattern: each element reads from subject at the associated index
1703-
* - star pattern: subject flows to the variable, possibly via a conversion
1704-
* - mapping pattern: each value reads from subject at the associated key
1705-
* - double star pattern: subject flows to the variable, possibly via a conversion
1706-
* - key-value pattern: the value reads from the subject at the key (see mapping pattern)
1707-
* - class pattern: all keywords read the appropriate attribute from the subject
1708-
* - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
1709-
*
1710-
* Inside the class pattern, we also find positional arguments. They are converted to
1711-
* keyword arguments using the `__match_args__` attribute on the class. We do not
1712-
* currently model this.
1713-
*/
1714-
module MatchUnpacking {
1715-
/**
1716-
* Holds when there is flow from the subject `nodeFrom` to the (top-level) pattern `nodeTo` of a `match` statement.
1717-
*
1718-
* The subject of a match flows to each top-level pattern
1719-
* (a pattern directly under a `case` statement).
1720-
*
1721-
* We could consider a model closer to use-use-flow, where the subject
1722-
* only flows to the first top-level pattern and from there to the
1723-
* following ones.
1724-
*/
1725-
predicate matchSubjectFlowStep(Node nodeFrom, Node nodeTo) {
1726-
exists(MatchStmt match, Expr subject, Pattern target |
1727-
subject = match.getSubject() and
1728-
target = match.getCase(_).(Case).getPattern()
1729-
|
1730-
nodeFrom.asExpr() = subject and
1731-
nodeTo.asCfgNode().getNode() = target
1732-
)
1733-
}
1734-
1735-
/**
1736-
* as pattern: subject flows to alias as well as to the interior pattern
1737-
* syntax (toplevel): `case pattern as alias:`
1738-
*/
1739-
predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
1740-
exists(MatchAsPattern subject, Name alias | alias = subject.getAlias() |
1741-
// We make the subject flow to the interior pattern via the alias.
1742-
// That way, information can propagate from the interior pattern to the alias.
1743-
//
1744-
// the subject flows to the interior pattern
1745-
nodeFrom.asCfgNode().getNode() = subject and
1746-
nodeTo.asCfgNode().getNode() = subject.getPattern()
1747-
or
1748-
// the interior pattern flows to the alias
1749-
nodeFrom.asCfgNode().getNode() = subject.getPattern() and
1750-
nodeTo.asVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() = alias
1751-
)
1752-
}
1753-
1754-
/**
1755-
* or pattern: subject flows to each alternative
1756-
* syntax (toplevel): `case alt1 | alt2:`
1757-
*/
1758-
predicate matchOrFlowStep(Node nodeFrom, Node nodeTo) {
1759-
exists(MatchOrPattern subject, Pattern pattern | pattern = subject.getAPattern() |
1760-
nodeFrom.asCfgNode().getNode() = subject and
1761-
nodeTo.asCfgNode().getNode() = pattern
1762-
)
1763-
}
1764-
1765-
/**
1766-
* literal pattern: flow from the literal to the pattern, to add information
1767-
* syntax (toplevel): `case literal:`
1768-
*/
1769-
predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
1770-
exists(MatchLiteralPattern pattern, Expr literal | literal = pattern.getLiteral() |
1771-
nodeFrom.asExpr() = literal and
1772-
nodeTo.asCfgNode().getNode() = pattern
1773-
)
1774-
}
1775-
1776-
/**
1777-
* capture pattern: subject flows to the variable
1778-
* syntax (toplevel): `case var:`
1779-
*/
1780-
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
1781-
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
1782-
nodeFrom.asCfgNode().getNode() = capture and
1783-
nodeTo.asVar().getDefinition().(PatternCaptureDefinition).getDefiningNode().getNode() = var
1784-
)
1785-
}
1786-
1787-
/**
1788-
* value pattern: flow from the value to the pattern, to add information
1789-
* syntax (toplevel): `case Dotted.value:`
1790-
*/
1791-
predicate matchValueFlowStep(Node nodeFrom, Node nodeTo) {
1792-
exists(MatchValuePattern pattern, Expr value | value = pattern.getValue() |
1793-
nodeFrom.asExpr() = value and
1794-
nodeTo.asCfgNode().getNode() = pattern
1795-
)
1796-
}
1797-
1798-
/**
1799-
* sequence pattern: each element reads from subject at the associated index
1800-
* syntax (toplevel): `case [a, b]:`
1801-
*/
1802-
predicate matchSequenceReadStep(Node nodeFrom, Content c, Node nodeTo) {
1803-
exists(MatchSequencePattern subject, int index, Pattern element |
1804-
element = subject.getPattern(index)
1805-
|
1806-
nodeFrom.asCfgNode().getNode() = subject and
1807-
nodeTo.asCfgNode().getNode() = element and
1808-
(
1809-
// tuple content
1810-
c.(TupleElementContent).getIndex() = index
1811-
or
1812-
// list content
1813-
c instanceof ListElementContent
1814-
// set content is excluded from sequence patterns,
1815-
// see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1816-
)
1817-
)
1818-
}
1819-
1820-
/**
1821-
* star pattern: subject flows to the variable, possibly via a conversion
1822-
* syntax (toplevel): `case *var:`
1823-
*
1824-
* We decompose this flow into a read step and a store step. The read step
1825-
* reads both tuple and list content, the store step only stores list content.
1826-
* This way, we convert all content to list content.
1827-
*
1828-
* This is the read step.
1829-
*/
1830-
predicate matchStarReadStep(Node nodeFrom, Content c, Node nodeTo) {
1831-
exists(MatchSequencePattern subject, int index, MatchStarPattern star |
1832-
star = subject.getPattern(index)
1833-
|
1834-
nodeFrom.asCfgNode().getNode() = subject and
1835-
nodeTo = TStarPatternElementNode(star) and
1836-
(
1837-
// tuple content
1838-
c.(TupleElementContent).getIndex() >= index
1839-
or
1840-
// list content
1841-
c instanceof ListElementContent
1842-
// set content is excluded from sequence patterns,
1843-
// see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1844-
)
1845-
)
1846-
}
1847-
1848-
/**
1849-
* star pattern: subject flows to the variable, possibly via a conversion
1850-
* syntax (toplevel): `case *var:`
1851-
*
1852-
* We decompose this flow into a read step and a store step. The read step
1853-
* reads both tuple and list content, the store step only stores list content.
1854-
* This way, we convert all content to list content.
1855-
*
1856-
* This is the store step.
1857-
*/
1858-
predicate matchStarStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1859-
exists(MatchStarPattern star |
1860-
nodeFrom = TStarPatternElementNode(star) and
1861-
nodeTo.asCfgNode().getNode() = star.getTarget() and
1862-
c instanceof ListElementContent
1863-
)
1864-
}
1865-
1866-
/**
1867-
* mapping pattern: each value reads from subject at the associated key
1868-
* syntax (toplevel): `case {"color": c, "height": x}:`
1869-
*/
1870-
predicate matchMappingReadStep(Node nodeFrom, Content c, Node nodeTo) {
1871-
exists(
1872-
MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
1873-
Pattern value
1874-
|
1875-
keyValue = subject.getAMapping() and
1876-
key = keyValue.getKey() and
1877-
value = keyValue.getValue()
1878-
|
1879-
nodeFrom.asCfgNode().getNode() = subject and
1880-
nodeTo.asCfgNode().getNode() = value and
1881-
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
1882-
)
1883-
}
1884-
1885-
/**
1886-
* double star pattern: subject flows to the variable, possibly via a conversion
1887-
* syntax (toplevel): `case {**var}:`
1888-
*
1889-
* Dictionary content flows to the double star, but all mentioned keys in the
1890-
* mapping pattern should be cleared.
1891-
*/
1892-
predicate matchMappingFlowStep(Node nodeFrom, Node nodeTo) {
1893-
exists(MatchMappingPattern subject, MatchDoubleStarPattern dstar |
1894-
dstar = subject.getAMapping()
1895-
|
1896-
nodeFrom.asCfgNode().getNode() = subject and
1897-
nodeTo.asCfgNode().getNode() = dstar.getTarget()
1898-
)
1899-
}
1900-
1901-
/**
1902-
* Bindings that are mentioned in a mapping pattern will not be available
1903-
* to a double star pattern in the same mapping pattern.
1904-
*/
1905-
predicate matchMappingClearStep(Node n, Content c) {
1906-
exists(
1907-
MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
1908-
MatchDoubleStarPattern dstar
1909-
|
1910-
keyValue = subject.getAMapping() and
1911-
key = keyValue.getKey() and
1912-
dstar = subject.getAMapping()
1913-
|
1914-
n.asCfgNode().getNode() = dstar.getTarget() and
1915-
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
1916-
)
1917-
}
1918-
1919-
/**
1920-
* class pattern: all keywords read the appropriate attribute from the subject
1921-
* syntax (toplevel): `case ClassName(attr = val):`
1922-
*/
1923-
predicate matchClassReadStep(Node nodeFrom, Content c, Node nodeTo) {
1924-
exists(MatchClassPattern subject, MatchKeywordPattern keyword, Name attr, Pattern value |
1925-
keyword = subject.getKeyword(_) and
1926-
attr = keyword.getAttribute() and
1927-
value = keyword.getValue()
1928-
|
1929-
nodeFrom.asCfgNode().getNode() = subject and
1930-
nodeTo.asCfgNode().getNode() = value and
1931-
c.(AttributeContent).getAttribute() = attr.getId()
1932-
)
1933-
}
1934-
1935-
/** All flow steps associated with match. */
1936-
predicate matchFlowStep(Node nodeFrom, Node nodeTo) {
1937-
matchSubjectFlowStep(nodeFrom, nodeTo)
1938-
or
1939-
matchAsFlowStep(nodeFrom, nodeTo)
1940-
or
1941-
matchOrFlowStep(nodeFrom, nodeTo)
1942-
or
1943-
matchLiteralFlowStep(nodeFrom, nodeTo)
1944-
or
1945-
matchCaptureFlowStep(nodeFrom, nodeTo)
1946-
or
1947-
matchValueFlowStep(nodeFrom, nodeTo)
1948-
or
1949-
matchMappingFlowStep(nodeFrom, nodeTo)
1950-
}
1951-
1952-
/** All read steps associated with match. */
1953-
predicate matchReadStep(Node nodeFrom, Content c, Node nodeTo) {
1954-
matchClassReadStep(nodeFrom, c, nodeTo)
1955-
or
1956-
matchSequenceReadStep(nodeFrom, c, nodeTo)
1957-
or
1958-
matchMappingReadStep(nodeFrom, c, nodeTo)
1959-
or
1960-
matchStarReadStep(nodeFrom, c, nodeTo)
1961-
}
1962-
1963-
/** All store steps associated with match. */
1964-
predicate matchStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1965-
matchStarStoreStep(nodeFrom, c, nodeTo)
1966-
}
1967-
1968-
/**
1969-
* All clear steps associated with match
1970-
*/
1971-
predicate matchClearStep(Node n, Content c) { matchMappingClearStep(n, c) }
1972-
}
1973-
1974-
import MatchUnpacking
1975-
19761665
/** Data flows from a sequence to a call to `pop` on the sequence. */
19771666
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
19781667
// set.pop or list.pop

0 commit comments

Comments
 (0)