Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* The compiler will issue a warning when adding any alias that might conflict with any other alias (#1045)

### Fixed
* Basilisp now respects the value of Python's `sys.dont_write_bytecode` flag when generating bytecode (#1054)
* Fix a bug where Basilisp did not respect the value of Python's `sys.dont_write_bytecode` flag when generating bytecode (#1054)
* Fix a bug where Basilisp import names existed in the same namespace as `def` names, which caused some unexpected behavior (#1045)

## [v0.2.2]
### Added
Expand Down
4 changes: 4 additions & 0 deletions src/basilisp/lang/compiler/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2595,6 +2595,10 @@ def _import_ast(form: ISeq, ctx: AnalyzerContext) -> Import:
"Python module alias must be a symbol", form=f
)
module_alias = module_alias_sym.name
if "." in module_alias:
raise ctx.AnalyzerException(
"Python module alias must not contain '.'", form=f
)
Comment on lines +2598 to +2601
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This never actually worked, but it was not covered before. Imports always generated pretty 1-1 Lisp to Python code, so this naturally generated exactly the code that it appeared to generate. Unfortunately, the top-level alias name was never defined so Python would error out:

basilisp.user=> (import [re :as reg.ex])
<module 're' from '/Users/christopher/.pyenv/versions/3.12.1/lib/python3.12/re/__init__.py'>

basilisp.user=> (reg.ex/fullmatch #"[A-Z]+" "YES")
Traceback (most recent call last):
  File "/Users/christopher/Projects/basilisp/src/basilisp/cli.py", line 577, in repl
    result = eval_str(lsrc, ctx, ns, eof)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/christopher/Projects/basilisp/src/basilisp/cli.py", line 53, in eval_str
    last = compiler.compile_and_exec_form(form, ctx, ns)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/christopher/Projects/basilisp/src/basilisp/lang/compiler/__init__.py", line 194, in compile_and_exec_form
    return getattr(ns.module, final_wrapped_name)()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<REPL Input>", line 1, in __lisp_expr___6612
NameError: name 'reg' is not defined```


ctx.put_new_symbol(
module_alias_sym,
Expand Down
85 changes: 72 additions & 13 deletions src/basilisp/lang/compiler/generator.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# pylint: disable=too-many-lines

import ast
import base64
import collections
import contextlib
import functools
import hashlib
import logging
import re
import uuid
Expand Down Expand Up @@ -2275,6 +2277,43 @@ def _if_to_py_ast(ctx: GeneratorContext, node: If) -> GeneratedPyAST[ast.expr]:
)


_IMPORT_HASH_TRANSLATE_TABLE = str.maketrans({"=": "", "+": "", "/": ""})


@functools.lru_cache
def _import_hash(s: str) -> str:
"""Generate a short, consistent, hash which can be appended to imported module
names to effectively separate them from objects of the same name defined in the
module.

Aliases in Clojure exist in a separate "namespace" from interned values, but
Basilisp generates Python modules (which are essentially just a single shared
namespace), so it is possible that imported module names could clash with `def`'ed
names.

Below, we generate a truncated URL-safe Base64 representation of the MD5 hash of
the input string (typically the first '.' delimited component of the potentially
qualified module name), removing any '-' characters since those are not safe for
Python identifiers.

The hash doesn't need to be cryptographically secure, but it does need to be
consistent across sessions such that when cached namespace modules are reloaded,
the new session can find objects generated by the session which generated the
cache file. Since we are not concerned with being able to round-trip this data,
destructive modifications are not an issue."""
digest = hashlib.md5(s.encode()).digest()
return base64.b64encode(digest).decode().translate(_IMPORT_HASH_TRANSLATE_TABLE)[:6]


def _import_name(root: str, *submodules: str) -> Tuple[str, str]:
"""Return a tuple of the root import name (with hash suffix) for an import and the
full import name (if submodules are provided)."""
safe_root = f"{root}_{_import_hash(root)}"
if not submodules:
return safe_root, safe_root
return safe_root, ".".join([safe_root, *submodules])


@_with_ast_loc_deps
def _import_to_py_ast(ctx: GeneratorContext, node: Import) -> GeneratedPyAST[ast.expr]:
"""Return a Python AST node for a Basilisp `import*` expression."""
Expand All @@ -2290,10 +2329,11 @@ def _import_to_py_ast(ctx: GeneratorContext, node: Import) -> GeneratedPyAST[ast
# import if parent and child are both imported:
# (import* collections collections.abc)
if alias.alias is not None:
py_import_alias = munge(alias.alias)
py_import_alias, _ = _import_name(munge(alias.alias))
import_func = _IMPORTLIB_IMPORT_MODULE_FN_NAME
else:
py_import_alias = safe_name.split(".", maxsplit=1)[0]
py_import_alias, *submodules = safe_name.split(".", maxsplit=1)
py_import_alias, _ = _import_name(py_import_alias, *submodules)
import_func = _BUILTINS_IMPORT_FN_NAME

ctx.symbol_table.context_boundary.new_symbol(
Expand Down Expand Up @@ -3268,26 +3308,45 @@ def _interop_prop_to_py_ast(

@_with_ast_loc
def _maybe_class_to_py_ast(
_: GeneratorContext, node: MaybeClass
ctx: GeneratorContext, node: MaybeClass
) -> GeneratedPyAST[ast.expr]:
"""Generate a Python AST node for accessing a potential Python module
variable name."""
"""Generate a Python AST node for accessing a potential Python module variable
name."""
assert node.op == NodeOp.MAYBE_CLASS
return GeneratedPyAST(
node=ast.Name(id=_MODULE_ALIASES.get(node.class_, node.class_), ctx=ast.Load())
)
if (mod_name := _MODULE_ALIASES.get(node.class_)) is None:
current_ns = ctx.current_ns

# For imported modules only, we should generate the name reference using a
# unique, consistent hash name (just as they are imported) to avoid clashing
# with names def'ed later in the namespace.
name = sym.symbol(node.form.name)
if (alias := current_ns.import_aliases.val_at(name)) is not None:
_, mod_name = _import_name(munge(alias.name))
elif name in current_ns.imports:
root, *submodules = node.class_.split(".", maxsplit=1)
_, mod_name = _import_name(root, *submodules)

# Names which are not module references should be passed through.
if mod_name is None:
mod_name = node.class_

return GeneratedPyAST(node=ast.Name(id=mod_name, ctx=ast.Load()))


@_with_ast_loc
def _maybe_host_form_to_py_ast(
_: GeneratorContext, node: MaybeHostForm
) -> GeneratedPyAST[ast.expr]:
"""Generate a Python AST node for accessing a potential Python module
variable name with a namespace."""
"""Generate a Python AST node for accessing a potential Python module variable name
with a namespace."""
assert node.op == NodeOp.MAYBE_HOST_FORM
return GeneratedPyAST(
node=_load_attr(f"{_MODULE_ALIASES.get(node.class_, node.class_)}.{node.field}")
)
if (mod_name := _MODULE_ALIASES.get(node.class_)) is None:
# At import time, the compiler generates a unique, consistent name for the root
# level Python name to avoid clashing with names later def'ed in the namespace.
# This is the same logic applied to completing the reference.
root, *submodules = node.class_.split(".", maxsplit=1)
__, mod_name = _import_name(root, *submodules)
return GeneratedPyAST(node=_load_attr(f"{mod_name}.{node.field}"))


#########################
Expand Down
46 changes: 46 additions & 0 deletions tests/basilisp/compiler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3311,6 +3311,10 @@ def test_import_module_must_be_symbol(self, lcompile: CompileFn, code: str):
with pytest.raises(compiler.CompilerException):
lcompile(code)

def test_import_alias_may_not_contain_dot(self, lcompile: CompileFn):
with pytest.raises(compiler.CompilerException):
lcompile("(import* [re :as reg.expr])")

@pytest.mark.parametrize(
"code",
[
Expand Down Expand Up @@ -6225,6 +6229,48 @@ def test_aliased_namespace_not_hidden_by_python_module(
monkeypatch.chdir(cwd)
os.unlink(module_file_path)

@pytest.mark.parametrize(
"code",
[
"""
(import re)

(defn re [s]
(re/fullmatch #"[A-Z]+" s))

(re "YES")
""",
"""
(import [re :as regex])

(defn regex [s]
(regex/fullmatch #"[A-Z]+" s))

(regex "YES")
""",
],
)
def test_imports_dont_shadow_def_names(self, lcompile: CompileFn, code: str):
match = lcompile(code)
assert match is not None

def test_imports_names_resolve(self, lcompile: CompileFn):
import re

assert lcompile("(import re) re") is re
assert lcompile("(import [re :as regex]) regex") is re

import urllib.parse

assert (
lcompile("(import urllib.parse) urllib.parse/urlparse")
is urllib.parse.urlparse
)
assert (
lcompile("(import [urllib.parse :as urlparse]) urlparse/urlparse")
is urllib.parse.urlparse
)

def test_aliased_var_does_not_resolve(
self, lcompile: CompileFn, ns: runtime.Namespace
):
Expand Down