Skip to content

Commit 867a390

Browse files
authored
Merge pull request github#14114 from yoff/python/allow-namespace-packages
Python: Allow namespace packages
2 parents abef848 + dcc7785 commit 867a390

File tree

17 files changed

+146
-40
lines changed

17 files changed

+146
-40
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Namespace packages in the form of regular packages with missing `__init__.py`-files are now allowed. This enables the analysis to resolve modules and functions inside such packages.

python/ql/lib/semmle/python/Module.qll

Lines changed: 96 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -179,21 +179,6 @@ private predicate legalDottedName(string name) {
179179
bindingset[name]
180180
private predicate legalShortName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") }
181181

182-
/**
183-
* Holds if `f` is potentially a source package.
184-
* Does it have an __init__.py file (or --respect-init=False for Python 2) and is it within the source archive?
185-
*/
186-
private predicate isPotentialSourcePackage(Folder f) {
187-
f.getRelativePath() != "" and
188-
isPotentialPackage(f)
189-
}
190-
191-
private predicate isPotentialPackage(Folder f) {
192-
exists(f.getFile("__init__.py"))
193-
or
194-
py_flags_versioned("options.respect_init", "False", _) and major_version() = 2 and exists(f)
195-
}
196-
197182
private string moduleNameFromBase(Container file) {
198183
// We used to also require `isPotentialPackage(f)` to hold in this case,
199184
// but we saw modules not getting resolved because their folder did not
@@ -236,31 +221,114 @@ private predicate transitively_imported_from_entry_point(File file) {
236221
)
237222
}
238223

224+
/**
225+
* Holds if the folder `f` is a regular Python package,
226+
* containing an `__init__.py` file.
227+
*/
228+
private predicate isRegularPackage(Folder f, string name) {
229+
legalShortName(name) and
230+
name = f.getStem() and
231+
exists(f.getFile("__init__.py"))
232+
}
233+
234+
/** Gets the name of a module imported in package `c`. */
235+
private string moduleImportedInPackage(Container c) {
236+
legalShortName(result) and
237+
// it has to be imported in this folder
238+
result =
239+
any(ImportExpr i | i.getLocation().getFile().getParent() = c)
240+
.getName()
241+
// strip everything after the first `.`
242+
.regexpReplaceAll("\\..*", "") and
243+
result != ""
244+
}
245+
246+
/** Holds if the file `f` could be resolved to a module named `name`. */
247+
private predicate isPotentialModuleFile(File file, string name) {
248+
legalShortName(name) and
249+
name = file.getStem() and
250+
file.getExtension() = ["py", "pyc", "so", "pyd"] and
251+
// it has to be imported in this folder
252+
name = moduleImportedInPackage(file.getParent())
253+
}
254+
255+
/**
256+
* Holds if the folder `f` is a namespace package named `name`.
257+
*
258+
* See https://peps.python.org/pep-0420/#specification
259+
* for details on namespace packages.
260+
*/
261+
private predicate isNameSpacePackage(Folder f, string name) {
262+
legalShortName(name) and
263+
name = f.getStem() and
264+
not isRegularPackage(f, name) and
265+
// it has to be imported in a file
266+
// either in this folder or next to this folder
267+
name = moduleImportedInPackage([f, f.getParent()]) and
268+
// no sibling regular package
269+
// and no sibling module
270+
not exists(Folder sibling | sibling.getParent() = f.getParent() |
271+
isRegularPackage(sibling.getFolder(name), name)
272+
or
273+
isPotentialModuleFile(sibling.getAFile(), name)
274+
)
275+
}
276+
277+
/**
278+
* Holds if the folder `f` is a package (either a regular package
279+
* or a namespace package) named `name`.
280+
*/
281+
private predicate isPackage(Folder f, string name) {
282+
isRegularPackage(f, name)
283+
or
284+
isNameSpacePackage(f, name)
285+
}
286+
287+
/**
288+
* Holds if the file `f` is a module named `name`.
289+
*/
290+
private predicate isModuleFile(File file, string name) {
291+
isPotentialModuleFile(file, name) and
292+
not isPackage(file.getParent(), _)
293+
}
294+
295+
/**
296+
* Holds if the folder `f` is a package named `name`
297+
* and does reside inside another package.
298+
*/
299+
private predicate isOutermostPackage(Folder f, string name) {
300+
isPackage(f, name) and
301+
not isPackage(f.getParent(), _)
302+
}
303+
304+
/** Gets the name of the module that `c` resolves to, if any. */
239305
cached
240-
string moduleNameFromFile(Container file) {
306+
string moduleNameFromFile(Container c) {
307+
// package
308+
isOutermostPackage(c, result)
309+
or
310+
// module
311+
isModuleFile(c, result)
312+
or
241313
Stages::AST::ref() and
242314
exists(string basename |
243-
basename = moduleNameFromBase(file) and
315+
basename = moduleNameFromBase(c) and
244316
legalShortName(basename)
245317
|
246-
result = moduleNameFromFile(file.getParent()) + "." + basename
318+
// recursive case
319+
result = moduleNameFromFile(c.getParent()) + "." + basename
247320
or
248321
// If `file` is a transitive import of a file that's executed directly, we allow references
249322
// to it by its `basename`.
250-
transitively_imported_from_entry_point(file) and
323+
transitively_imported_from_entry_point(c) and
251324
result = basename
252325
)
253326
or
254-
isPotentialSourcePackage(file) and
255-
result = file.getStem() and
256-
(
257-
not isPotentialSourcePackage(file.getParent()) or
258-
not legalShortName(file.getParent().getBaseName())
259-
)
260-
or
261-
result = file.getStem() and file.getParent() = file.getImportRoot()
327+
//
328+
// standard library
329+
result = c.getStem() and c.getParent() = c.getImportRoot()
262330
or
263-
result = file.getStem() and isStubRoot(file.getParent())
331+
result = c.getStem() and isStubRoot(c.getParent())
264332
}
265333

266334
private predicate isStubRoot(Folder f) {

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
400400
override Scope getScope() { result = mod }
401401

402402
override string toString() {
403-
result = "ModuleVariableNode in " + mod.toString() + " for " + var.getId()
403+
result = "ModuleVariableNode in " + concat( | | mod.toString(), ",") + " for " + var.getId()
404404
}
405405

406406
/** Gets the module in which this variable appears. */

python/ql/test/2/library-tests/six/pointsto.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,4 @@
55
| six.moves.range | builtin-class xrange |
66
| six.moves.urllib | Package six.moves.urllib |
77
| six.moves.urllib.parse | Module six.moves.urllib_parse |
8-
| six.moves.urllib.parse.urlsplit | Function urlsplit |
98
| six.moves.zip | Builtin-function zip |

python/ql/test/3/library-tests/modules/entry_point/modules.expected

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
| module | hash_bang/module.py:0:0:0:0 | Module module |
22
| module | name_main/module.py:0:0:0:0 | Module module |
3+
| namespace_package | hash_bang/namespace_package:0:0:0:0 | Package namespace_package |
4+
| namespace_package | name_main/namespace_package:0:0:0:0 | Package namespace_package |
5+
| namespace_package | no_py_extension/namespace_package:0:0:0:0 | Package namespace_package |
6+
| namespace_package.namespace_package_main | hash_bang/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
7+
| namespace_package.namespace_package_main | name_main/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
8+
| namespace_package.namespace_package_main | no_py_extension/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
9+
| namespace_package.namespace_package_module | hash_bang/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
10+
| namespace_package.namespace_package_module | name_main/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
11+
| namespace_package.namespace_package_module | no_py_extension/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
312
| package | hash_bang/package:0:0:0:0 | Package package |
413
| package | name_main/package:0:0:0:0 | Package package |
514
| package | no_py_extension/package:0:0:0:0 | Package package |

python/ql/test/3/library-tests/six/pointsto.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,4 @@
55
| six.moves.range | builtin-class range |
66
| six.moves.urllib | Package six.moves.urllib |
77
| six.moves.urllib.parse | Module six.moves.urllib_parse |
8-
| six.moves.urllib.parse.urlsplit | Function urlsplit |
98
| six.moves.zip | builtin-class zip |

python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,13 @@
1616

1717
from foo.bar.a import afunc
1818
from foo_explicit.bar.a import explicit_afunc
19+
from not_root.baz.foo import foo_func
20+
from not_root.baz.bar.a import afunc as afunc2
1921

2022
afunc() # $ pt,tt="foo/bar/a.py:afunc"
2123

2224
explicit_afunc() # $ pt,tt="foo_explicit/bar/a.py:explicit_afunc"
25+
26+
foo_func() # $ pt,tt="not_root/baz/foo.py:foo_func"
27+
28+
afunc2() # $ pt,tt="not_root/baz/bar/a.py:afunc"
Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
def afunc():
2-
print("afunc called")
3-
return 1
2+
print("afunc called")
3+
return 1
4+
5+
from foo.foo import foo_func
6+
foo_func() # $ pt,tt="foo/foo.py:foo_func"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def foo_func():
2+
print("foo_func called")
Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
def explicit_afunc():
2-
print("explicit_afunc called")
3-
return 1
2+
print("explicit_afunc called")
3+
return 1
4+
5+
from foo_explicit.foo_explicit import foo_explicit_func
6+
foo_explicit_func() # $ pt,tt="foo_explicit/foo_explicit.py:foo_explicit_func"

0 commit comments

Comments
 (0)