@@ -249,6 +249,8 @@ module EssaFlow {
249
249
// Flow inside an unpacking assignment
250
250
iterableUnpackingFlowStep ( nodeFrom , nodeTo )
251
251
or
252
+ matchFlowStep ( nodeFrom , nodeTo )
253
+ or
252
254
// Overflow keyword argument
253
255
exists ( CallNode call , CallableValue callable |
254
256
call = callable .getACall ( ) and
@@ -982,6 +984,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
982
984
posOverflowStoreStep ( nodeFrom , c , nodeTo )
983
985
or
984
986
kwOverflowStoreStep ( nodeFrom , c , nodeTo )
987
+ or
988
+ matchStoreStep ( nodeFrom , c , nodeTo )
985
989
}
986
990
987
991
/** Data flows from an element of a list to the list. */
@@ -1124,6 +1128,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
1124
1128
or
1125
1129
iterableUnpackingReadStep ( nodeFrom , c , nodeTo )
1126
1130
or
1131
+ matchReadStep ( nodeFrom , c , nodeTo )
1132
+ or
1127
1133
popReadStep ( nodeFrom , c , nodeTo )
1128
1134
or
1129
1135
forReadStep ( nodeFrom , c , nodeTo )
@@ -1553,6 +1559,290 @@ module IterableUnpacking {
1553
1559
1554
1560
import IterableUnpacking
1555
1561
1562
+ /**
1563
+ * There are a number of patterns available for the match statement.
1564
+ * Each one transfers data and content differently to its parts.
1565
+ *
1566
+ * Furthermore, given a successful match, we can infer some daa about
1567
+ * the subject. Consider the example:
1568
+ * ```python
1569
+ * match choice:
1570
+ * case 'Y':
1571
+ * ...body
1572
+ * ```
1573
+ * Inside `body`, we know that `choice` has the value `'Y'`.
1574
+ *
1575
+ * A similar thing happens with the "as pattern". Consider the example:
1576
+ * ```python
1577
+ * match choice:
1578
+ * case ('y'|'Y') as c:
1579
+ * ...body
1580
+ * ```
1581
+ * By the binding rules, there is data flow from `choice` to `c`. But we
1582
+ * can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
1583
+ *
1584
+ * We will treat such inference separately as guards. First we will model the data flow
1585
+ * stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
1586
+ * top-level subject of the match, but rather the part recursively matched by the current pattern.
1587
+ * For instance, in the example:
1588
+ * ```python
1589
+ * match command:
1590
+ * case ('quit' as c) | ('go', ('up'|'down') as c):
1591
+ * ...body
1592
+ * ```
1593
+ * `command` is the subject of the as-pattern, while the second component of `command` is the subject
1594
+ * of the first capture pattern. As such, 'subject' refers to the pattern under evaluation.
1595
+ *
1596
+ * - as pattern: subject flows to alias as well as to the interior pattern
1597
+ * - or pattern: subject flows to each alternative
1598
+ * - literal pattern: no flow
1599
+ * - capture pattern: subject flows to the variable
1600
+ * - wildcard pattern: no flow
1601
+ * - value pattern: no flow
1602
+ * - sequence pattern: each element reads from subject at the associated index
1603
+ * - star pattern: subject flows to the variable, possibly via a conversion
1604
+ * - mapping pattern: each value reads from subject at the associated key
1605
+ * - double star pattern: subject flows to the variable, possibly via a conversion
1606
+ * - key-value pattern: the value reads from the subject at the key (see mapping pattern)
1607
+ * - class pattern: all keywords read the appropriate attribute from the subject
1608
+ * - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
1609
+ *
1610
+ * Inside the class pattern, we also find positional arguments. They are converted to
1611
+ * keyword arguments using the `__match_args__` attribute on the class. We do not
1612
+ * currently model this.
1613
+ */
1614
+ module MatchUnpacking {
1615
+ /**
1616
+ * The subject of a match flows to each top-level pattern
1617
+ * (a pattern directly under a `case` statement).
1618
+ *
1619
+ * We could consider a model closer to use-use-flow, where the subject
1620
+ * only flows to the first top-level pattern and from there to the
1621
+ * following ones.
1622
+ */
1623
+ predicate matchSubjectFlowStep ( Node nodeFrom , Node nodeTo ) {
1624
+ exists ( MatchStmt match , Expr subject , Pattern target |
1625
+ subject = match .getSubject ( ) and
1626
+ target = match .getCase ( _) .( Case ) .getPattern ( )
1627
+ |
1628
+ nodeFrom .asExpr ( ) = subject and
1629
+ nodeTo .asCfgNode ( ) .getNode ( ) = target
1630
+ )
1631
+ }
1632
+
1633
+ /**
1634
+ * as pattern: subject flows to alias as well as to the interior pattern
1635
+ * syntax (toplevel): `case pattern as alias:`
1636
+ */
1637
+ predicate matchAsFlowStep ( Node nodeFrom , Node nodeTo ) {
1638
+ exists ( MatchAsPattern subject , Name alias | alias = subject .getAlias ( ) |
1639
+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1640
+ (
1641
+ // the subject flows to the alias
1642
+ nodeTo .asVar ( ) .getDefinition ( ) .( PatternAliasDefinition ) .getDefiningNode ( ) .getNode ( ) = alias
1643
+ or
1644
+ // the subject flows to the interior pattern
1645
+ nodeTo .asCfgNode ( ) .getNode ( ) = subject .getPattern ( )
1646
+ )
1647
+ )
1648
+ }
1649
+
1650
+ /**
1651
+ * or pattern: subject flows to each alternative
1652
+ * syntax (toplevel): `case alt1 | alt2:`
1653
+ */
1654
+ predicate matchOrFlowStep ( Node nodeFrom , Node nodeTo ) {
1655
+ exists ( MatchOrPattern subject , Pattern pattern | pattern = subject .getAPattern ( ) |
1656
+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1657
+ nodeTo .asCfgNode ( ) .getNode ( ) = pattern
1658
+ )
1659
+ }
1660
+
1661
+ /**
1662
+ * capture pattern: subject flows to the variable
1663
+ * syntax (toplevel): `case var:`
1664
+ */
1665
+ predicate matchCaptureFlowStep ( Node nodeFrom , Node nodeTo ) {
1666
+ exists ( MatchCapturePattern capture , Name var | capture .getVariable ( ) = var |
1667
+ nodeFrom .asCfgNode ( ) .getNode ( ) = capture and
1668
+ nodeTo .asVar ( ) .getDefinition ( ) .( PatternCaptureDefinition ) .getDefiningNode ( ) .getNode ( ) = var
1669
+ )
1670
+ }
1671
+
1672
+ /**
1673
+ * sequence pattern: each element reads from subject at the associated index
1674
+ * syntax (toplevel): `case [a, b]:`
1675
+ */
1676
+ predicate matchSequenceReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1677
+ exists ( MatchSequencePattern subject , int index , Pattern element |
1678
+ element = subject .getPattern ( index )
1679
+ |
1680
+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1681
+ nodeTo .asCfgNode ( ) .getNode ( ) = element and
1682
+ (
1683
+ // tuple content
1684
+ c .( TupleElementContent ) .getIndex ( ) = index
1685
+ or
1686
+ // list content
1687
+ c instanceof ListElementContent
1688
+ // set content is excluded from sequence patterns,
1689
+ // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1690
+ )
1691
+ )
1692
+ }
1693
+
1694
+ /**
1695
+ * star pattern: subject flows to the variable, possibly via a conversion
1696
+ * syntax (toplevel): `case *var:`
1697
+ *
1698
+ * We decompose this flow into a read step and a store step. The read step
1699
+ * reads both tupe and list content, the store step only stores list content.
1700
+ * This way, we convert all content to list content.
1701
+ *
1702
+ * This is the read step.
1703
+ */
1704
+ predicate matchStarReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1705
+ exists ( MatchSequencePattern subject , int index , MatchStarPattern star |
1706
+ star = subject .getPattern ( index )
1707
+ |
1708
+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1709
+ nodeTo = TStarPatternElementNode ( star ) and
1710
+ (
1711
+ // tuple content
1712
+ c .( TupleElementContent ) .getIndex ( ) >= index
1713
+ or
1714
+ // list content
1715
+ c instanceof ListElementContent
1716
+ // set content is excluded from sequence patterns,
1717
+ // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1718
+ )
1719
+ )
1720
+ }
1721
+
1722
+ /**
1723
+ * star pattern: subject flows to the variable, possibly via a conversion
1724
+ * syntax (toplevel): `case *var:`
1725
+ *
1726
+ * We decompose this flow into a read step and a store step. The read step
1727
+ * reads both tupe and list content, the store step only stores list content.
1728
+ * This way, we convert all content to list content.
1729
+ *
1730
+ * This is the store step.
1731
+ */
1732
+ predicate matchStarStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1733
+ exists ( MatchStarPattern star |
1734
+ nodeFrom = TStarPatternElementNode ( star ) and
1735
+ nodeTo .asCfgNode ( ) .getNode ( ) = star .getTarget ( ) and
1736
+ c instanceof ListElementContent
1737
+ )
1738
+ }
1739
+
1740
+ /**
1741
+ * mapping pattern: each value reads from subject at the associated key
1742
+ * syntax (toplevel): `case {"color": c, "height": x}:`
1743
+ */
1744
+ predicate matchMappingReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1745
+ exists (
1746
+ MatchMappingPattern subject , MatchKeyValuePattern keyValue , MatchLiteralPattern key ,
1747
+ Pattern value
1748
+ |
1749
+ keyValue = subject .getAMapping ( ) and
1750
+ key = keyValue .getKey ( ) and
1751
+ value = keyValue .getValue ( )
1752
+ |
1753
+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1754
+ nodeTo .asCfgNode ( ) .getNode ( ) = value and
1755
+ c .( DictionaryElementContent ) .getKey ( ) = key .getLiteral ( ) .( StrConst ) .getText ( )
1756
+ )
1757
+ }
1758
+
1759
+ /**
1760
+ * double star pattern: subject flows to the variable, possibly via a conversion
1761
+ * syntax (toplevel): `case {**var}:`
1762
+ *
1763
+ * Dictionary content flows to the double star, but all mentioned keys in the
1764
+ * mapping pattern should be cleared.
1765
+ */
1766
+ predicate matchMappingFlowStep ( Node nodeFrom , Node nodeTo ) {
1767
+ exists ( MatchMappingPattern subject , MatchDoubleStarPattern dstar |
1768
+ dstar = subject .getAMapping ( )
1769
+ |
1770
+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1771
+ nodeTo .asCfgNode ( ) .getNode ( ) = dstar .getTarget ( )
1772
+ )
1773
+ }
1774
+
1775
+ /**
1776
+ * Bindings that are mentioned in a mapping pattern will not be available
1777
+ * to a double star pattern in the same mapping pattern.
1778
+ */
1779
+ predicate matchMappingClearStep ( Node n , Content c ) {
1780
+ exists (
1781
+ MatchMappingPattern subject , MatchKeyValuePattern keyValue , MatchLiteralPattern key ,
1782
+ MatchDoubleStarPattern dstar
1783
+ |
1784
+ keyValue = subject .getAMapping ( ) and
1785
+ key = keyValue .getKey ( ) and
1786
+ dstar = subject .getAMapping ( )
1787
+ |
1788
+ n .asCfgNode ( ) .getNode ( ) = dstar .getTarget ( ) and
1789
+ c .( DictionaryElementContent ) .getKey ( ) = key .getLiteral ( ) .( StrConst ) .getText ( )
1790
+ )
1791
+ }
1792
+
1793
+ /**
1794
+ * class pattern: all keywords read the appropriate attribute from the subject
1795
+ * syntax (toplevel): `case ClassName(attr = val):`
1796
+ */
1797
+ predicate matchClassReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1798
+ exists ( MatchClassPattern subject , MatchKeywordPattern keyword , Name attr , Pattern value |
1799
+ keyword = subject .getKeyword ( _) and
1800
+ attr = keyword .getAttribute ( ) and
1801
+ value = keyword .getValue ( )
1802
+ |
1803
+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1804
+ nodeTo .asCfgNode ( ) .getNode ( ) = value and
1805
+ c .( AttributeContent ) .getAttribute ( ) = attr .getId ( )
1806
+ )
1807
+ }
1808
+
1809
+ /** All flow steps associated with match. */
1810
+ predicate matchFlowStep ( Node nodeFrom , Node nodeTo ) {
1811
+ matchSubjectFlowStep ( nodeFrom , nodeTo )
1812
+ or
1813
+ matchAsFlowStep ( nodeFrom , nodeTo )
1814
+ or
1815
+ matchOrFlowStep ( nodeFrom , nodeTo )
1816
+ or
1817
+ matchCaptureFlowStep ( nodeFrom , nodeTo )
1818
+ or
1819
+ matchMappingFlowStep ( nodeFrom , nodeTo )
1820
+ }
1821
+
1822
+ /** All read steps associated with match. */
1823
+ predicate matchReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1824
+ matchClassReadStep ( nodeFrom , c , nodeTo )
1825
+ or
1826
+ matchSequenceReadStep ( nodeFrom , c , nodeTo )
1827
+ or
1828
+ matchMappingReadStep ( nodeFrom , c , nodeTo )
1829
+ or
1830
+ matchStarReadStep ( nodeFrom , c , nodeTo )
1831
+ }
1832
+
1833
+ /** All store steps associated with match. */
1834
+ predicate matchStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1835
+ matchStarStoreStep ( nodeFrom , c , nodeTo )
1836
+ }
1837
+
1838
+ /**
1839
+ * All clear steps associated with match
1840
+ */
1841
+ predicate matchClearStep ( Node n , Content c ) { matchMappingClearStep ( n , c ) }
1842
+ }
1843
+
1844
+ import MatchUnpacking
1845
+
1556
1846
/** Data flows from a sequence to a call to `pop` on the sequence. */
1557
1847
predicate popReadStep ( CfgNode nodeFrom , Content c , CfgNode nodeTo ) {
1558
1848
// set.pop or list.pop
@@ -1635,18 +1925,28 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
1635
1925
}
1636
1926
1637
1927
/**
1638
- * Holds if values stored inside content `c` are cleared at node `n`. For example,
1639
- * any value stored inside `f` is cleared at the pre-update node associated with `x`
1640
- * in `x.f = newValue`.
1928
+ * Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
1929
+ * whenever `call` unpacks `name`.
1641
1930
*/
1642
- predicate clearsContent ( Node n , Content c ) {
1931
+ predicate kwOverflowClearStep ( Node n , Content c ) {
1643
1932
exists ( CallNode call , CallableValue callable , string name |
1644
1933
call_unpacks ( call , _, callable , name , _) and
1645
1934
n = TKwOverflowNode ( call , callable ) and
1646
1935
c .( DictionaryElementContent ) .getKey ( ) = name
1647
1936
)
1648
1937
}
1649
1938
1939
+ /**
1940
+ * Holds if values stored inside content `c` are cleared at node `n`. For example,
1941
+ * any value stored inside `f` is cleared at the pre-update node associated with `x`
1942
+ * in `x.f = newValue`.
1943
+ */
1944
+ predicate clearsContent ( Node n , Content c ) {
1945
+ kwOverflowClearStep ( n , c )
1946
+ or
1947
+ matchClearStep ( n , c )
1948
+ }
1949
+
1650
1950
//--------
1651
1951
// Fancy context-sensitive guards
1652
1952
//--------
0 commit comments