Skip to content

Commit d248fbf

Browse files
Merge pull request github#18301 from joefarebrother/python-model-missing-builtins
Python: Add models for builtins `map`, `filter`, `zip`, and `enumerate`.
2 parents 4e59ac4 + 344dd2d commit d248fbf

File tree

4 files changed

+366
-1
lines changed

4 files changed

+366
-1
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Additional data flow models for the builtin functions `map`, `filter`, `zip`, and `enumerate` have been added.

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4523,6 +4523,124 @@ module StdlibPrivate {
45234523
}
45244524
}
45254525

4526+
/** A flow summary for `map`. */
4527+
class MapSummary extends SummarizedCallable {
4528+
MapSummary() { this = "builtins.map" }
4529+
4530+
override DataFlow::CallCfgNode getACall() { result = API::builtin("map").getACall() }
4531+
4532+
override DataFlow::ArgumentNode getACallback() {
4533+
result = API::builtin("map").getAValueReachableFromSource()
4534+
}
4535+
4536+
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
4537+
exists(int i | exists(any(Call c).getArg(i)) |
4538+
(
4539+
input = "Argument[" + (i + 1).toString() + "].ListElement"
4540+
or
4541+
input = "Argument[" + (i + 1).toString() + "].SetElement"
4542+
or
4543+
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
4544+
// TODO: Once we have TupleElementAny, this generality can be increased.
4545+
i = 0 and
4546+
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
4547+
input = "Argument[1].TupleElement[" + j.toString() + "]"
4548+
)
4549+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
4550+
) and
4551+
output = "Argument[0].Parameter[" + i.toString() + "]" and
4552+
preservesValue = true
4553+
)
4554+
or
4555+
input = "Argument[0].ReturnValue" and
4556+
output = "ReturnValue.ListElement" and
4557+
preservesValue = true
4558+
}
4559+
}
4560+
4561+
/** A flow summary for `filter`. */
4562+
class FilterSummary extends SummarizedCallable {
4563+
FilterSummary() { this = "builtins.filter" }
4564+
4565+
override DataFlow::CallCfgNode getACall() { result = API::builtin("filter").getACall() }
4566+
4567+
override DataFlow::ArgumentNode getACallback() {
4568+
result = API::builtin("filter").getAValueReachableFromSource()
4569+
}
4570+
4571+
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
4572+
(
4573+
input = "Argument[1].ListElement"
4574+
or
4575+
input = "Argument[1].SetElement"
4576+
or
4577+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
4578+
input = "Argument[1].TupleElement[" + i.toString() + "]"
4579+
)
4580+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
4581+
) and
4582+
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
4583+
preservesValue = true
4584+
}
4585+
}
4586+
4587+
/**A summary for `enumerate`. */
4588+
class EnumerateSummary extends SummarizedCallable {
4589+
EnumerateSummary() { this = "builtins.enumerate" }
4590+
4591+
override DataFlow::CallCfgNode getACall() { result = API::builtin("enumerate").getACall() }
4592+
4593+
override DataFlow::ArgumentNode getACallback() {
4594+
result = API::builtin("enumerate").getAValueReachableFromSource()
4595+
}
4596+
4597+
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
4598+
(
4599+
input = "Argument[0].ListElement"
4600+
or
4601+
input = "Argument[0].SetElement"
4602+
or
4603+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
4604+
input = "Argument[0].TupleElement[" + i.toString() + "]"
4605+
)
4606+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
4607+
) and
4608+
output = "ReturnValue.ListElement.TupleElement[1]" and
4609+
preservesValue = true
4610+
}
4611+
}
4612+
4613+
/** A flow summary for `zip`. */
4614+
class ZipSummary extends SummarizedCallable {
4615+
ZipSummary() { this = "builtins.zip" }
4616+
4617+
override DataFlow::CallCfgNode getACall() { result = API::builtin("zip").getACall() }
4618+
4619+
override DataFlow::ArgumentNode getACallback() {
4620+
result = API::builtin("zip").getAValueReachableFromSource()
4621+
}
4622+
4623+
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
4624+
exists(int i | exists(any(Call c).getArg(i)) |
4625+
(
4626+
input = "Argument[" + i.toString() + "].ListElement"
4627+
or
4628+
input = "Argument[" + i.toString() + "].SetElement"
4629+
or
4630+
// We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance.
4631+
// TODO: Once we have TupleElementAny, this generality can be increased.
4632+
i in [0 .. 1] and
4633+
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
4634+
input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]"
4635+
)
4636+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
4637+
) and
4638+
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
4639+
preservesValue = true
4640+
)
4641+
}
4642+
}
4643+
45264644
// ---------------------------------------------------------------------------
45274645
// Flow summaries for container methods
45284646
// ---------------------------------------------------------------------------

python/ql/test/library-tests/dataflow/coverage/test_builtins.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,3 +366,246 @@ def test_next_dict():
366366
i = iter(d)
367367
n = next(i)
368368
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"
369+
370+
### map
371+
372+
@expects(4)
373+
def test_map_list():
374+
l1 = [SOURCE]
375+
l2 = [NONSOURCE]
376+
377+
def f(p1,p2):
378+
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
379+
SINK_F(p2)
380+
381+
return p1,p2
382+
383+
rl = list(map(f, l1, l2))
384+
SINK(rl[0][0]) #$ flow="SOURCE, l:-10 -> rl[0][0]"
385+
SINK_F(rl[0][1])
386+
387+
@expects(4)
388+
def test_map_set():
389+
s1 = {SOURCE}
390+
s2 = {NONSOURCE}
391+
392+
def f(p1,p2):
393+
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
394+
SINK_F(p2)
395+
396+
return p1,p2
397+
398+
rl = list(map(f, s1, s2))
399+
SINK(rl[0][0]) #$ flow="SOURCE, l:-10 -> rl[0][0]"
400+
SINK_F(rl[0][1])
401+
402+
@expects(4)
403+
def test_map_tuple():
404+
t1 = (SOURCE,)
405+
t2 = (NONSOURCE,)
406+
407+
def f(p1,p2):
408+
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
409+
SINK_F(p2)
410+
411+
return p1,p2
412+
413+
rl = list(map(f, t1, t2))
414+
SINK(rl[0][0]) #$ flow="SOURCE, l:-10 -> rl[0][0]"
415+
SINK_F(rl[0][1])
416+
417+
418+
@expects(4)
419+
def test_map_dict():
420+
d1 = {SOURCE: "v1"}
421+
d2 = {NONSOURCE: "v2"}
422+
423+
def f(p1,p2):
424+
SINK(p1) #$ MISSING: flow="SOURCE, l:-4 -> p1"
425+
SINK_F(p2)
426+
427+
return p1,p2
428+
429+
rl = list(map(f, d1, d2))
430+
SINK(rl[0][0]) #$ MISSING: flow="SOURCE, l:-10 -> rl[0][0]"
431+
SINK_F(rl[0][1])
432+
433+
@expects(4)
434+
def test_map_multi_list():
435+
l1 = [SOURCE]
436+
l2 = [SOURCE]
437+
438+
def f(p1,p2):
439+
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
440+
SINK(p2) #$ flow="SOURCE, l:-4 -> p2"
441+
return p1,p2
442+
443+
rl = list(map(f, l1, l2))
444+
SINK(rl[0][0]) #$ flow="SOURCE, l:-9 -> rl[0][0]"
445+
SINK(rl[0][1]) #$ flow="SOURCE, l:-9 -> rl[0][1]"
446+
447+
@expects(4)
448+
def test_map_multi_tuple():
449+
l1 = (SOURCE,)
450+
l2 = (SOURCE,)
451+
452+
def f(p1,p2):
453+
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
454+
SINK(p2) #$ MISSING: flow="SOURCE, l:-4 -> p2" # Tuples are not tracked beyond the first list argument for performance.
455+
return p1,p2
456+
457+
rl = list(map(f, l1, l2))
458+
SINK(rl[0][0]) #$ flow="SOURCE, l:-9 -> rl[0][0]"
459+
SINK(rl[0][1]) #$ MISSING: flow="SOURCE, l:-9 -> rl[0][1]"
460+
461+
### filter
462+
463+
@expects(2)
464+
def test_filter_list():
465+
l = [SOURCE]
466+
467+
def f(p):
468+
SINK(p) #$ flow="SOURCE, l:-3 -> p"
469+
return True
470+
471+
rl = list(filter(f,l))
472+
SINK(rl[0]) #$ flow="SOURCE, l:-7 -> rl[0]"
473+
474+
@expects(2)
475+
def test_filter_set():
476+
s = {SOURCE}
477+
478+
def f(p):
479+
SINK(p) #$ flow="SOURCE, l:-3 -> p"
480+
return True
481+
482+
rl = list(filter(f,s))
483+
SINK(rl[0]) #$ flow="SOURCE, l:-7 -> rl[0]"
484+
485+
@expects(2)
486+
def test_filter_tuple():
487+
t = (SOURCE,)
488+
489+
def f(p):
490+
SINK(p) #$ flow="SOURCE, l:-3 -> p"
491+
return True
492+
493+
rl = list(filter(f,t))
494+
SINK(rl[0]) #$ flow="SOURCE, l:-7 -> rl[0]"
495+
496+
@expects(2)
497+
def test_filter_dict():
498+
d = {SOURCE: "v"}
499+
500+
def f(p):
501+
SINK(p) #$ MISSING: flow="SOURCE, l:-3 -> p"
502+
return True
503+
504+
rl = list(filter(f,d))
505+
SINK(rl[0]) #$ MISSING: flow="SOURCE, l:-7 -> rl[0]"
506+
507+
@expects(1)
508+
def test_enumerate_list():
509+
l = [SOURCE]
510+
511+
e = list(enumerate(l))
512+
513+
SINK(e[0][1]) #$ flow="SOURCE, l:-4 -> e[0][1]"
514+
515+
@expects(1)
516+
def test_enumerate_set():
517+
s = {SOURCE}
518+
519+
e = list(enumerate(s))
520+
521+
SINK(e[0][1]) #$ flow="SOURCE, l:-4 -> e[0][1]"
522+
523+
@expects(1)
524+
def test_enumerate_tuple():
525+
t = (SOURCE,)
526+
527+
e = list(enumerate(t))
528+
529+
SINK(e[0][1]) #$ flow="SOURCE, l:-4 -> e[0][1]"
530+
531+
@expects(2)
532+
def test_enumerate_list_for():
533+
l = [SOURCE]
534+
535+
for i, x in enumerate(l):
536+
SINK(x) #$ flow="SOURCE, l:-3 -> x"
537+
538+
for t in enumerate(l):
539+
SINK(t[1]) #$ flow="SOURCE, l:-6 -> t[1]"
540+
541+
@expects(1)
542+
def test_enumerate_dict():
543+
d = {SOURCE:"v"}
544+
545+
e = list(enumerate(d))
546+
547+
SINK(e[0][1]) # $ MISSING: flow="SOURCE, l:-4 -> e[0][1]"
548+
549+
@expects(8)
550+
def test_zip_list():
551+
l1 = [SOURCE, SOURCE]
552+
l2 = [SOURCE, NONSOURCE]
553+
l3 = [NONSOURCE, SOURCE]
554+
l4 = [NONSOURCE, NONSOURCE]
555+
556+
z = list(zip(l1,l2,l3,l4))
557+
558+
SINK(z[0][0]) #$ flow="SOURCE, l:-7 -> z[0][0]"
559+
SINK(z[0][1]) #$ flow="SOURCE, l:-7 -> z[0][1]"
560+
SINK_F(z[0][2]) #$ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]"
561+
SINK_F(z[0][3])
562+
SINK(z[1][0]) #$ flow="SOURCE, l:-11 -> z[1][0]"
563+
SINK_F(z[1][1]) #$ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
564+
SINK(z[1][2]) #$ flow="SOURCE, l:-11 -> z[1][2]"
565+
SINK_F(z[1][3])
566+
567+
@expects(4)
568+
def test_zip_set():
569+
s1 = {SOURCE}
570+
s2 = {NONSOURCE}
571+
s3 = {SOURCE}
572+
s4 = {NONSOURCE}
573+
574+
z = list(zip(s1,s2,s3,s4))
575+
576+
SINK(z[0][0]) #$ flow="SOURCE, l:-7 -> z[0][0]"
577+
SINK_F(z[0][1])
578+
SINK(z[0][2]) #$ flow="SOURCE, l:-7 -> z[0][2]"
579+
SINK_F(z[0][3])
580+
581+
@expects(8)
582+
def test_zip_tuple():
583+
t1 = (SOURCE, SOURCE)
584+
t2 = (SOURCE, NONSOURCE)
585+
t3 = (NONSOURCE, SOURCE)
586+
t4 = (NONSOURCE, NONSOURCE)
587+
588+
z = list(zip(t1,t2,t3,t4))
589+
590+
SINK(z[0][0]) #$ flow="SOURCE, l:-7 -> z[0][0]"
591+
SINK(z[0][1]) #$ flow="SOURCE, l:-7 -> z[0][1]"
592+
SINK_F(z[0][2])
593+
SINK_F(z[0][3])
594+
SINK(z[1][0]) #$ flow="SOURCE, l:-11 -> z[1][0]"
595+
SINK_F(z[1][1]) #$ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
596+
SINK(z[1][2]) #$ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance.
597+
SINK_F(z[1][3])
598+
599+
@expects(4)
600+
def test_zip_dict():
601+
d1 = {SOURCE: "v"}
602+
d2 = {NONSOURCE: "v"}
603+
d3 = {SOURCE: "v"}
604+
d4 = {NONSOURCE: "v"}
605+
606+
z = list(zip(d1,d2,d3,d4))
607+
608+
SINK(z[0][0]) #$ MISSING: flow="SOURCE, l:-7 -> z[0][0]"
609+
SINK_F(z[0][1])
610+
SINK(z[0][2]) #$ MISSING: flow="SOURCE, l:-7 -> z[0][2]"
611+
SINK_F(z[0][3])

python/ql/test/library-tests/dataflow/variable-capture/test_library_calls.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,4 @@ def set(x):
4545
for x in map(set, [1]):
4646
pass
4747

48-
SINK(captured["x"]) #$ MISSING: captured
48+
SINK(captured["x"]) #$ captured

0 commit comments

Comments
 (0)