Skip to content

Commit 2821b01

Browse files
authored
Merge pull request github#2915 from tausbn/python-add-points-to-for-missing-builtin-return-types
Approved by RasmusWL
2 parents f4e5079 + ae1268f commit 2821b01

File tree

10 files changed

+143
-21
lines changed

10 files changed

+143
-21
lines changed

python/ql/src/semmle/python/objects/Callables.qll

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc
215215
cls = ObjectInternal::builtin("bool") and obj = ObjectInternal::bool(_)
216216
) and
217217
origin = CfgOrigin::unknown()
218+
or
219+
this.returnTypeUnknown() and
220+
obj = ObjectInternal::unknown() and
221+
origin = CfgOrigin::unknown()
218222
}
219223

220224
override ControlFlowNode getOrigin() {
@@ -231,26 +235,15 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc
231235

232236
Builtin getReturnType() {
233237
exists(Builtin func |
234-
func = this.getBuiltin() |
235-
/* Enumerate the types of a few builtin functions, that the CPython analysis misses. */
236-
func = Builtin::builtin("hex") and result = Builtin::special("str")
237-
or
238-
func = Builtin::builtin("oct") and result = Builtin::special("str")
239-
or
240-
func = Builtin::builtin("intern") and result = Builtin::special("str")
241-
or
242-
func = Builtin::builtin("__import__") and result = Builtin::special("ModuleType")
243-
or
244-
/* Fix a few minor inaccuracies in the CPython analysis */
245-
ext_rettype(func, result) and not (
246-
func = Builtin::builtin("__import__")
247-
or
248-
func = Builtin::builtin("compile") and result = Builtin::special("NoneType")
249-
or
250-
func = Builtin::builtin("sum")
251-
or
252-
func = Builtin::builtin("filter")
253-
)
238+
func = this.getBuiltin() and
239+
result = getBuiltinFunctionReturnType(func)
240+
)
241+
}
242+
243+
private predicate returnTypeUnknown() {
244+
exists(Builtin func |
245+
func = this.getBuiltin() and
246+
not exists(getBuiltinFunctionReturnType(func))
254247
)
255248
}
256249

@@ -293,7 +286,30 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc
293286

294287
}
295288

289+
private Builtin getBuiltinFunctionReturnType(Builtin func) {
290+
/* Enumerate the types of a few builtin functions, that the CPython analysis misses. */
291+
func = Builtin::builtin("hex") and result = Builtin::special("str")
292+
or
293+
func = Builtin::builtin("oct") and result = Builtin::special("str")
294+
or
295+
func = Builtin::builtin("intern") and result = Builtin::special("str")
296+
or
297+
func = Builtin::builtin("__import__") and result = Builtin::special("ModuleType")
298+
or
299+
/* Fix a few minor inaccuracies in the CPython analysis */
300+
ext_rettype(func, result) and not (
301+
func = Builtin::builtin("__import__")
302+
or
303+
func = Builtin::builtin("compile") and result = Builtin::special("NoneType")
304+
or
305+
func = Builtin::builtin("sum")
306+
or
307+
func = Builtin::builtin("filter")
308+
)
309+
}
310+
296311
/** Class representing methods of built-in classes (otherwise known as method-descriptors) such as `list.append`. */
312+
297313
class BuiltinMethodObjectInternal extends CallableObjectInternal, TBuiltinMethodObject {
298314

299315
override Builtin getBuiltin() {
@@ -328,15 +344,27 @@ class BuiltinMethodObjectInternal extends CallableObjectInternal, TBuiltinMethod
328344
cls = ObjectInternal::builtin("bool") and obj = ObjectInternal::bool(_)
329345
) and
330346
origin = CfgOrigin::unknown()
347+
or
348+
this.returnTypeUnknown() and
349+
obj = ObjectInternal::unknown() and
350+
origin = CfgOrigin::unknown()
331351
}
332352

333353
Builtin getReturnType() {
354+
/* If we have a record of the return type in our stubs, use that. */
334355
exists(Builtin func |
335356
func = this.getBuiltin() |
336357
ext_rettype(func, result)
337358
)
338359
}
339360

361+
private predicate returnTypeUnknown() {
362+
exists(Builtin func |
363+
func = this.getBuiltin() |
364+
not ext_rettype(func, _)
365+
)
366+
}
367+
340368
override ControlFlowNode getOrigin() {
341369
none()
342370
}

python/ql/test/2/library-tests/types/functions/ReturnTypes.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@
2121
| 112 | multi_return | builtin-class int |
2222
| 118 | do_something | builtin-class int |
2323
| 123 | with_flow | builtin-class int |
24-
| 128 | return_default | builtin-class tuple |
24+
| 128 | return_default | builtin-class tuple |

python/ql/test/library-tests/PointsTo/new/Call.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
| l_calls.py:51 | ControlFlowNode for g() | g |
2424
| l_calls.py:52 | ControlFlowNode for Attribute() | F.m |
2525
| l_calls.py:53 | ControlFlowNode for Attribute() | F.m |
26+
| l_calls.py:59 | ControlFlowNode for Attribute() | int.bit_length |
27+
| l_calls.py:63 | ControlFlowNode for Attribute() | dict.get |
2628
| q_super.py:4 | ControlFlowNode for Attribute() | object.__init__ |
2729
| q_super.py:12 | ControlFlowNode for Attribute() | Base2.__init__ |
2830
| q_super.py:22 | ControlFlowNode for Attribute() | Base1.meth |

python/ql/test/library-tests/PointsTo/new/NameSpace.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@
123123
| l_calls.py:0 | Module code.l_calls | E | class E |
124124
| l_calls.py:0 | Module code.l_calls | F | class F |
125125
| l_calls.py:0 | Module code.l_calls | Owner | class Owner |
126+
| l_calls.py:0 | Module code.l_calls | a | Builtin-method bit_length |
126127
| l_calls.py:0 | Module code.l_calls | bar | Function bar |
128+
| l_calls.py:0 | Module code.l_calls | c | Builtin-method get |
127129
| l_calls.py:0 | Module code.l_calls | f | Function f |
128130
| l_calls.py:0 | Module code.l_calls | foo | Function foo |
129131
| l_calls.py:0 | Module code.l_calls | g | Function g |

python/ql/test/library-tests/PointsTo/new/PointsToMissing.expected

Whitespace-only changes.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import python
2+
import Util
3+
import semmle.python.pointsto.PointsTo
4+
import semmle.python.objects.ObjectInternal
5+
6+
/* This test should return _no_ results. */
7+
8+
predicate relevant_node(ControlFlowNode n) {
9+
exists(CallNode c |
10+
c.getFunction().(NameNode).getId() = "check" and
11+
n = c.getAnArg()
12+
)
13+
or
14+
exists(Comment c, string filepath, int bl |
15+
n.getNode().getScope().getLocation().hasLocationInfo(filepath, bl, _, _, _) and
16+
c.getLocation().hasLocationInfo(filepath, bl, _, _, _) and
17+
c.getText().matches("%check")
18+
and not n.(NameNode).isStore()
19+
)
20+
}
21+
22+
from ControlFlowNode f
23+
where
24+
relevant_node(f) and
25+
not PointsTo::pointsTo(f, _, _, _)
26+
select locate(f.getLocation(), "abchlr"), f.toString()

python/ql/test/library-tests/PointsTo/new/PointsToWithContext.expected

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,25 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P
725725
| l_calls.py:53 | ControlFlowNode for Attribute() | 'b' | builtin-class str | 53 | import |
726726
| l_calls.py:53 | ControlFlowNode for F | class F | builtin-class type | 47 | import |
727727
| l_calls.py:53 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | import |
728+
| l_calls.py:58 | ControlFlowNode for Attribute | Builtin-method bit_length | builtin-class method_descriptor | 58 | import |
729+
| l_calls.py:58 | ControlFlowNode for a | Builtin-method bit_length | builtin-class method_descriptor | 58 | import |
730+
| l_calls.py:58 | ControlFlowNode for int | builtin-class int | builtin-class type | 58 | import |
731+
| l_calls.py:59 | ControlFlowNode for Attribute | Builtin-method bit_length | builtin-class method_descriptor | 59 | import |
732+
| l_calls.py:59 | ControlFlowNode for Attribute() | Attribute() | builtin-class int | 59 | import |
733+
| l_calls.py:59 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 59 | import |
734+
| l_calls.py:59 | ControlFlowNode for b | Attribute() | builtin-class int | 59 | import |
735+
| l_calls.py:59 | ControlFlowNode for int | builtin-class int | builtin-class type | 59 | import |
736+
| l_calls.py:62 | ControlFlowNode for Attribute | Builtin-method get | builtin-class method_descriptor | 62 | import |
737+
| l_calls.py:62 | ControlFlowNode for c | Builtin-method get | builtin-class method_descriptor | 62 | import |
738+
| l_calls.py:62 | ControlFlowNode for dict | builtin-class dict | builtin-class type | 62 | import |
739+
| l_calls.py:63 | ControlFlowNode for Attribute | Builtin-method get | builtin-class method_descriptor | 63 | import |
740+
| l_calls.py:63 | ControlFlowNode for Dict | Dict | builtin-class dict | 63 | import |
741+
| l_calls.py:63 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 63 | import |
742+
| l_calls.py:63 | ControlFlowNode for Str | 'foo' | builtin-class str | 63 | import |
743+
| l_calls.py:63 | ControlFlowNode for dict | builtin-class dict | builtin-class type | 63 | import |
744+
| l_calls.py:64 | ControlFlowNode for a | Builtin-method bit_length | builtin-class method_descriptor | 58 | import |
745+
| l_calls.py:64 | ControlFlowNode for b | Attribute() | builtin-class int | 59 | import |
746+
| l_calls.py:64 | ControlFlowNode for c | Builtin-method get | builtin-class method_descriptor | 62 | import |
728747
| m_attributes.py:3 | ControlFlowNode for C | class C | builtin-class type | 3 | import |
729748
| m_attributes.py:3 | ControlFlowNode for ClassExpr | class C | builtin-class type | 3 | import |
730749
| m_attributes.py:3 | ControlFlowNode for object | builtin-class object | builtin-class type | 3 | import |

python/ql/test/library-tests/PointsTo/new/PointsToWithType.expected

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,25 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P
798798
| l_calls.py:53 | ControlFlowNode for Attribute() | 'b' | builtin-class str | 53 |
799799
| l_calls.py:53 | ControlFlowNode for F | class F | builtin-class type | 47 |
800800
| l_calls.py:53 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 |
801+
| l_calls.py:58 | ControlFlowNode for Attribute | Builtin-method bit_length | builtin-class method_descriptor | 58 |
802+
| l_calls.py:58 | ControlFlowNode for a | Builtin-method bit_length | builtin-class method_descriptor | 58 |
803+
| l_calls.py:58 | ControlFlowNode for int | builtin-class int | builtin-class type | 58 |
804+
| l_calls.py:59 | ControlFlowNode for Attribute | Builtin-method bit_length | builtin-class method_descriptor | 59 |
805+
| l_calls.py:59 | ControlFlowNode for Attribute() | Attribute() | builtin-class int | 59 |
806+
| l_calls.py:59 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 59 |
807+
| l_calls.py:59 | ControlFlowNode for b | Attribute() | builtin-class int | 59 |
808+
| l_calls.py:59 | ControlFlowNode for int | builtin-class int | builtin-class type | 59 |
809+
| l_calls.py:62 | ControlFlowNode for Attribute | Builtin-method get | builtin-class method_descriptor | 62 |
810+
| l_calls.py:62 | ControlFlowNode for c | Builtin-method get | builtin-class method_descriptor | 62 |
811+
| l_calls.py:62 | ControlFlowNode for dict | builtin-class dict | builtin-class type | 62 |
812+
| l_calls.py:63 | ControlFlowNode for Attribute | Builtin-method get | builtin-class method_descriptor | 63 |
813+
| l_calls.py:63 | ControlFlowNode for Dict | Dict | builtin-class dict | 63 |
814+
| l_calls.py:63 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 63 |
815+
| l_calls.py:63 | ControlFlowNode for Str | 'foo' | builtin-class str | 63 |
816+
| l_calls.py:63 | ControlFlowNode for dict | builtin-class dict | builtin-class type | 63 |
817+
| l_calls.py:64 | ControlFlowNode for a | Builtin-method bit_length | builtin-class method_descriptor | 58 |
818+
| l_calls.py:64 | ControlFlowNode for b | Attribute() | builtin-class int | 59 |
819+
| l_calls.py:64 | ControlFlowNode for c | Builtin-method get | builtin-class method_descriptor | 62 |
801820
| s_scopes.py:4 | ControlFlowNode for True | bool True | builtin-class bool | 4 |
802821
| s_scopes.py:4 | ControlFlowNode for float | bool True | builtin-class bool | 4 |
803822
| s_scopes.py:7 | ControlFlowNode for C2 | class C2 | builtin-class type | 7 |

python/ql/test/library-tests/PointsTo/new/Values.expected

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,22 @@
585585
| l_calls.py:53 | ControlFlowNode for Attribute() | import | 'b' | builtin-class str |
586586
| l_calls.py:53 | ControlFlowNode for F | import | class F | builtin-class type |
587587
| l_calls.py:53 | ControlFlowNode for t | import | ('a', 'b', 'c', ) | builtin-class tuple |
588+
| l_calls.py:58 | ControlFlowNode for Attribute | import | builtin method bit_length | builtin-class method_descriptor |
589+
| l_calls.py:58 | ControlFlowNode for int | import | builtin-class int | builtin-class type |
590+
| l_calls.py:59 | ControlFlowNode for Attribute | import | builtin method bit_length | builtin-class method_descriptor |
591+
| l_calls.py:59 | ControlFlowNode for Attribute() | import | instance of int | builtin-class int |
592+
| l_calls.py:59 | ControlFlowNode for IntegerLiteral | import | int 5 | builtin-class int |
593+
| l_calls.py:59 | ControlFlowNode for int | import | builtin-class int | builtin-class type |
594+
| l_calls.py:62 | ControlFlowNode for Attribute | import | builtin method get | builtin-class method_descriptor |
595+
| l_calls.py:62 | ControlFlowNode for dict | import | builtin-class dict | builtin-class type |
596+
| l_calls.py:63 | ControlFlowNode for Attribute | import | builtin method get | builtin-class method_descriptor |
597+
| l_calls.py:63 | ControlFlowNode for Dict | import | Dict | builtin-class dict |
598+
| l_calls.py:63 | ControlFlowNode for IntegerLiteral | import | int 5 | builtin-class int |
599+
| l_calls.py:63 | ControlFlowNode for Str | import | 'foo' | builtin-class str |
600+
| l_calls.py:63 | ControlFlowNode for dict | import | builtin-class dict | builtin-class type |
601+
| l_calls.py:64 | ControlFlowNode for a | import | builtin method bit_length | builtin-class method_descriptor |
602+
| l_calls.py:64 | ControlFlowNode for b | import | instance of int | builtin-class int |
603+
| l_calls.py:64 | ControlFlowNode for c | import | builtin method get | builtin-class method_descriptor |
588604
| m_attributes.py:3 | ControlFlowNode for ClassExpr | import | class C | builtin-class type |
589605
| m_attributes.py:3 | ControlFlowNode for object | import | builtin-class object | builtin-class type |
590606
| m_attributes.py:5 | ControlFlowNode for FunctionExpr | import | Function C.__init__ | builtin-class function |

python/ql/test/library-tests/PointsTo/new/code/l_calls.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,13 @@ def m(self, x, y, z=None):
5252
F().m(*t)
5353
F.m(*t)
5454

55+
# Calls to built-in methods
56+
57+
# Methods with a known return type.
58+
a = int.bit_length
59+
b = int.bit_length(5)
60+
61+
# Methods without a known return type.
62+
c = dict.get
63+
d = dict.get({"foo":5}, 5)
64+
check(a,b,c,d)

0 commit comments

Comments
 (0)