@@ -12,6 +12,9 @@ private import semmle.python.dataflow.new.internal.ImportStar
12
12
//
13
13
// This matches behavior in C#.
14
14
private import semmle.python.Frameworks
15
+ // part of the implementation for this module has been spread over multiple files to
16
+ // make it more digestible.
17
+ import MatchUnpacking
15
18
16
19
/** Gets the callable in which this node occurs. */
17
20
DataFlowCallable nodeGetEnclosingCallable ( Node n ) { result = n .getEnclosingCallable ( ) }
@@ -1659,320 +1662,6 @@ module IterableUnpacking {
1659
1662
1660
1663
import IterableUnpacking
1661
1664
1662
- /**
1663
- * There are a number of patterns available for the match statement.
1664
- * Each one transfers data and content differently to its parts.
1665
- *
1666
- * Furthermore, given a successful match, we can infer some data about
1667
- * the subject. Consider the example:
1668
- * ```python
1669
- * match choice:
1670
- * case 'Y':
1671
- * ...body
1672
- * ```
1673
- * Inside `body`, we know that `choice` has the value `'Y'`.
1674
- *
1675
- * A similar thing happens with the "as pattern". Consider the example:
1676
- * ```python
1677
- * match choice:
1678
- * case ('y'|'Y') as c:
1679
- * ...body
1680
- * ```
1681
- * By the binding rules, there is data flow from `choice` to `c`. But we
1682
- * can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
1683
- *
1684
- * We will treat such inferences separately as guards. First we will model the data flow
1685
- * stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
1686
- * top-level subject of the match, but rather the part recursively matched by the current pattern.
1687
- * For instance, in the example:
1688
- * ```python
1689
- * match command:
1690
- * case ('quit' as c) | ('go', ('up'|'down') as c):
1691
- * ...body
1692
- * ```
1693
- * `command` is the subject of first the as-pattern, while the second component of `command`
1694
- * is the subject of the second as-pattern. As such, 'subject' refers to the pattern under evaluation.
1695
- *
1696
- * - as pattern: subject flows to alias as well as to the interior pattern
1697
- * - or pattern: subject flows to each alternative
1698
- * - literal pattern: flow from the literal to the pattern, to add information
1699
- * - capture pattern: subject flows to the variable
1700
- * - wildcard pattern: no flow
1701
- * - value pattern: flow from the value to the pattern, to add information
1702
- * - sequence pattern: each element reads from subject at the associated index
1703
- * - star pattern: subject flows to the variable, possibly via a conversion
1704
- * - mapping pattern: each value reads from subject at the associated key
1705
- * - double star pattern: subject flows to the variable, possibly via a conversion
1706
- * - key-value pattern: the value reads from the subject at the key (see mapping pattern)
1707
- * - class pattern: all keywords read the appropriate attribute from the subject
1708
- * - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
1709
- *
1710
- * Inside the class pattern, we also find positional arguments. They are converted to
1711
- * keyword arguments using the `__match_args__` attribute on the class. We do not
1712
- * currently model this.
1713
- */
1714
- module MatchUnpacking {
1715
- /**
1716
- * Holds when there is flow from the subject `nodeFrom` to the (top-level) pattern `nodeTo` of a `match` statement.
1717
- *
1718
- * The subject of a match flows to each top-level pattern
1719
- * (a pattern directly under a `case` statement).
1720
- *
1721
- * We could consider a model closer to use-use-flow, where the subject
1722
- * only flows to the first top-level pattern and from there to the
1723
- * following ones.
1724
- */
1725
- predicate matchSubjectFlowStep ( Node nodeFrom , Node nodeTo ) {
1726
- exists ( MatchStmt match , Expr subject , Pattern target |
1727
- subject = match .getSubject ( ) and
1728
- target = match .getCase ( _) .( Case ) .getPattern ( )
1729
- |
1730
- nodeFrom .asExpr ( ) = subject and
1731
- nodeTo .asCfgNode ( ) .getNode ( ) = target
1732
- )
1733
- }
1734
-
1735
- /**
1736
- * as pattern: subject flows to alias as well as to the interior pattern
1737
- * syntax (toplevel): `case pattern as alias:`
1738
- */
1739
- predicate matchAsFlowStep ( Node nodeFrom , Node nodeTo ) {
1740
- exists ( MatchAsPattern subject , Name alias | alias = subject .getAlias ( ) |
1741
- // We make the subject flow to the interior pattern via the alias.
1742
- // That way, information can propagate from the interior pattern to the alias.
1743
- //
1744
- // the subject flows to the interior pattern
1745
- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1746
- nodeTo .asCfgNode ( ) .getNode ( ) = subject .getPattern ( )
1747
- or
1748
- // the interior pattern flows to the alias
1749
- nodeFrom .asCfgNode ( ) .getNode ( ) = subject .getPattern ( ) and
1750
- nodeTo .asVar ( ) .getDefinition ( ) .( PatternAliasDefinition ) .getDefiningNode ( ) .getNode ( ) = alias
1751
- )
1752
- }
1753
-
1754
- /**
1755
- * or pattern: subject flows to each alternative
1756
- * syntax (toplevel): `case alt1 | alt2:`
1757
- */
1758
- predicate matchOrFlowStep ( Node nodeFrom , Node nodeTo ) {
1759
- exists ( MatchOrPattern subject , Pattern pattern | pattern = subject .getAPattern ( ) |
1760
- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1761
- nodeTo .asCfgNode ( ) .getNode ( ) = pattern
1762
- )
1763
- }
1764
-
1765
- /**
1766
- * literal pattern: flow from the literal to the pattern, to add information
1767
- * syntax (toplevel): `case literal:`
1768
- */
1769
- predicate matchLiteralFlowStep ( Node nodeFrom , Node nodeTo ) {
1770
- exists ( MatchLiteralPattern pattern , Expr literal | literal = pattern .getLiteral ( ) |
1771
- nodeFrom .asExpr ( ) = literal and
1772
- nodeTo .asCfgNode ( ) .getNode ( ) = pattern
1773
- )
1774
- }
1775
-
1776
- /**
1777
- * capture pattern: subject flows to the variable
1778
- * syntax (toplevel): `case var:`
1779
- */
1780
- predicate matchCaptureFlowStep ( Node nodeFrom , Node nodeTo ) {
1781
- exists ( MatchCapturePattern capture , Name var | capture .getVariable ( ) = var |
1782
- nodeFrom .asCfgNode ( ) .getNode ( ) = capture and
1783
- nodeTo .asVar ( ) .getDefinition ( ) .( PatternCaptureDefinition ) .getDefiningNode ( ) .getNode ( ) = var
1784
- )
1785
- }
1786
-
1787
- /**
1788
- * value pattern: flow from the value to the pattern, to add information
1789
- * syntax (toplevel): `case Dotted.value:`
1790
- */
1791
- predicate matchValueFlowStep ( Node nodeFrom , Node nodeTo ) {
1792
- exists ( MatchValuePattern pattern , Expr value | value = pattern .getValue ( ) |
1793
- nodeFrom .asExpr ( ) = value and
1794
- nodeTo .asCfgNode ( ) .getNode ( ) = pattern
1795
- )
1796
- }
1797
-
1798
- /**
1799
- * sequence pattern: each element reads from subject at the associated index
1800
- * syntax (toplevel): `case [a, b]:`
1801
- */
1802
- predicate matchSequenceReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1803
- exists ( MatchSequencePattern subject , int index , Pattern element |
1804
- element = subject .getPattern ( index )
1805
- |
1806
- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1807
- nodeTo .asCfgNode ( ) .getNode ( ) = element and
1808
- (
1809
- // tuple content
1810
- c .( TupleElementContent ) .getIndex ( ) = index
1811
- or
1812
- // list content
1813
- c instanceof ListElementContent
1814
- // set content is excluded from sequence patterns,
1815
- // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1816
- )
1817
- )
1818
- }
1819
-
1820
- /**
1821
- * star pattern: subject flows to the variable, possibly via a conversion
1822
- * syntax (toplevel): `case *var:`
1823
- *
1824
- * We decompose this flow into a read step and a store step. The read step
1825
- * reads both tuple and list content, the store step only stores list content.
1826
- * This way, we convert all content to list content.
1827
- *
1828
- * This is the read step.
1829
- */
1830
- predicate matchStarReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1831
- exists ( MatchSequencePattern subject , int index , MatchStarPattern star |
1832
- star = subject .getPattern ( index )
1833
- |
1834
- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1835
- nodeTo = TStarPatternElementNode ( star ) and
1836
- (
1837
- // tuple content
1838
- c .( TupleElementContent ) .getIndex ( ) >= index
1839
- or
1840
- // list content
1841
- c instanceof ListElementContent
1842
- // set content is excluded from sequence patterns,
1843
- // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1844
- )
1845
- )
1846
- }
1847
-
1848
- /**
1849
- * star pattern: subject flows to the variable, possibly via a conversion
1850
- * syntax (toplevel): `case *var:`
1851
- *
1852
- * We decompose this flow into a read step and a store step. The read step
1853
- * reads both tuple and list content, the store step only stores list content.
1854
- * This way, we convert all content to list content.
1855
- *
1856
- * This is the store step.
1857
- */
1858
- predicate matchStarStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1859
- exists ( MatchStarPattern star |
1860
- nodeFrom = TStarPatternElementNode ( star ) and
1861
- nodeTo .asCfgNode ( ) .getNode ( ) = star .getTarget ( ) and
1862
- c instanceof ListElementContent
1863
- )
1864
- }
1865
-
1866
- /**
1867
- * mapping pattern: each value reads from subject at the associated key
1868
- * syntax (toplevel): `case {"color": c, "height": x}:`
1869
- */
1870
- predicate matchMappingReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1871
- exists (
1872
- MatchMappingPattern subject , MatchKeyValuePattern keyValue , MatchLiteralPattern key ,
1873
- Pattern value
1874
- |
1875
- keyValue = subject .getAMapping ( ) and
1876
- key = keyValue .getKey ( ) and
1877
- value = keyValue .getValue ( )
1878
- |
1879
- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1880
- nodeTo .asCfgNode ( ) .getNode ( ) = value and
1881
- c .( DictionaryElementContent ) .getKey ( ) = key .getLiteral ( ) .( StrConst ) .getText ( )
1882
- )
1883
- }
1884
-
1885
- /**
1886
- * double star pattern: subject flows to the variable, possibly via a conversion
1887
- * syntax (toplevel): `case {**var}:`
1888
- *
1889
- * Dictionary content flows to the double star, but all mentioned keys in the
1890
- * mapping pattern should be cleared.
1891
- */
1892
- predicate matchMappingFlowStep ( Node nodeFrom , Node nodeTo ) {
1893
- exists ( MatchMappingPattern subject , MatchDoubleStarPattern dstar |
1894
- dstar = subject .getAMapping ( )
1895
- |
1896
- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1897
- nodeTo .asCfgNode ( ) .getNode ( ) = dstar .getTarget ( )
1898
- )
1899
- }
1900
-
1901
- /**
1902
- * Bindings that are mentioned in a mapping pattern will not be available
1903
- * to a double star pattern in the same mapping pattern.
1904
- */
1905
- predicate matchMappingClearStep ( Node n , Content c ) {
1906
- exists (
1907
- MatchMappingPattern subject , MatchKeyValuePattern keyValue , MatchLiteralPattern key ,
1908
- MatchDoubleStarPattern dstar
1909
- |
1910
- keyValue = subject .getAMapping ( ) and
1911
- key = keyValue .getKey ( ) and
1912
- dstar = subject .getAMapping ( )
1913
- |
1914
- n .asCfgNode ( ) .getNode ( ) = dstar .getTarget ( ) and
1915
- c .( DictionaryElementContent ) .getKey ( ) = key .getLiteral ( ) .( StrConst ) .getText ( )
1916
- )
1917
- }
1918
-
1919
- /**
1920
- * class pattern: all keywords read the appropriate attribute from the subject
1921
- * syntax (toplevel): `case ClassName(attr = val):`
1922
- */
1923
- predicate matchClassReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1924
- exists ( MatchClassPattern subject , MatchKeywordPattern keyword , Name attr , Pattern value |
1925
- keyword = subject .getKeyword ( _) and
1926
- attr = keyword .getAttribute ( ) and
1927
- value = keyword .getValue ( )
1928
- |
1929
- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1930
- nodeTo .asCfgNode ( ) .getNode ( ) = value and
1931
- c .( AttributeContent ) .getAttribute ( ) = attr .getId ( )
1932
- )
1933
- }
1934
-
1935
- /** All flow steps associated with match. */
1936
- predicate matchFlowStep ( Node nodeFrom , Node nodeTo ) {
1937
- matchSubjectFlowStep ( nodeFrom , nodeTo )
1938
- or
1939
- matchAsFlowStep ( nodeFrom , nodeTo )
1940
- or
1941
- matchOrFlowStep ( nodeFrom , nodeTo )
1942
- or
1943
- matchLiteralFlowStep ( nodeFrom , nodeTo )
1944
- or
1945
- matchCaptureFlowStep ( nodeFrom , nodeTo )
1946
- or
1947
- matchValueFlowStep ( nodeFrom , nodeTo )
1948
- or
1949
- matchMappingFlowStep ( nodeFrom , nodeTo )
1950
- }
1951
-
1952
- /** All read steps associated with match. */
1953
- predicate matchReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1954
- matchClassReadStep ( nodeFrom , c , nodeTo )
1955
- or
1956
- matchSequenceReadStep ( nodeFrom , c , nodeTo )
1957
- or
1958
- matchMappingReadStep ( nodeFrom , c , nodeTo )
1959
- or
1960
- matchStarReadStep ( nodeFrom , c , nodeTo )
1961
- }
1962
-
1963
- /** All store steps associated with match. */
1964
- predicate matchStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1965
- matchStarStoreStep ( nodeFrom , c , nodeTo )
1966
- }
1967
-
1968
- /**
1969
- * All clear steps associated with match
1970
- */
1971
- predicate matchClearStep ( Node n , Content c ) { matchMappingClearStep ( n , c ) }
1972
- }
1973
-
1974
- import MatchUnpacking
1975
-
1976
1665
/** Data flows from a sequence to a call to `pop` on the sequence. */
1977
1666
predicate popReadStep ( CfgNode nodeFrom , Content c , CfgNode nodeTo ) {
1978
1667
// set.pop or list.pop
0 commit comments