diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 702f75aed..77b6c272d 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -193,7 +193,7 @@ class Expr: :ref:`Expressions` in the online documentation for more information. """ - def __init__(self, expr: expr_internal.Expr) -> None: + def __init__(self, expr: expr_internal.RawExpr) -> None: """This constructor should not be called by the end user.""" self.expr = expr @@ -383,7 +383,7 @@ def literal(value: Any) -> Expr: value = pa.scalar(value, type=pa.string_view()) if not isinstance(value, pa.Scalar): value = pa.scalar(value) - return Expr(expr_internal.Expr.literal(value)) + return Expr(expr_internal.RawExpr.literal(value)) @staticmethod def string_literal(value: str) -> Expr: @@ -398,13 +398,13 @@ def string_literal(value: str) -> Expr: """ if isinstance(value, str): value = pa.scalar(value, type=pa.string()) - return Expr(expr_internal.Expr.literal(value)) + return Expr(expr_internal.RawExpr.literal(value)) return Expr.literal(value) @staticmethod def column(value: str) -> Expr: """Creates a new expression representing a column.""" - return Expr(expr_internal.Expr.column(value)) + return Expr(expr_internal.RawExpr.column(value)) def alias(self, name: str) -> Expr: """Assign a name to the expression.""" diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index d7f6f6e35..a2de2d32b 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -28,37 +28,62 @@ from enum import EnumMeta as EnumType -def missing_exports(internal_obj, wrapped_obj) -> None: - # Special case enums - just make sure they exist since dir() - # and other functions get overridden. +def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901 + """ + Identify if any of the rust exposted structs or functions do not have wrappers. + + Special handling for: + - Raw* classes: Internal implementation details that shouldn't be exposed + - _global_ctx: Internal implementation detail + - __self__, __class__: Python special attributes + """ + # Special case enums - EnumType overrides a some of the internal functions, + # so check all of the values exist and move on if isinstance(wrapped_obj, EnumType): + expected_values = [v for v in dir(internal_obj) if not v.startswith("__")] + for value in expected_values: + assert value in dir(wrapped_obj) return - for attr in dir(internal_obj): - if attr in ["_global_ctx"]: - continue - assert attr in dir(wrapped_obj) + for internal_attr_name in dir(internal_obj): + wrapped_attr_name = internal_attr_name.removeprefix("Raw") + assert wrapped_attr_name in dir(wrapped_obj) - internal_attr = getattr(internal_obj, attr) - wrapped_attr = getattr(wrapped_obj, attr) + internal_attr = getattr(internal_obj, internal_attr_name) + wrapped_attr = getattr(wrapped_obj, wrapped_attr_name) - if internal_attr is not None and wrapped_attr is None: - pytest.fail(f"Missing attribute: {attr}") + # There are some auto generated attributes that can be None, such as + # __kwdefaults__ and __doc__. As long as these are None on the internal + # object, it's okay to skip them. However if they do exist on the internal + # object they must also exist on the wrapped object. + if internal_attr is not None: + if wrapped_attr is None: + pytest.fail(f"Missing attribute: {internal_attr_name}") - if attr in ["__self__", "__class__"]: + if internal_attr_name in ["__self__", "__class__"]: continue + if isinstance(internal_attr, list): assert isinstance(wrapped_attr, list) + + # We have cases like __all__ that are a list and we want to be certain that + # every value in the list in the internal object is also in the wrapper list for val in internal_attr: - assert val in wrapped_attr + if isinstance(val, str) and val.startswith("Raw"): + assert val[3:] in wrapped_attr + else: + assert val in wrapped_attr elif hasattr(internal_attr, "__dict__"): + # Check all submodules recursively missing_exports(internal_attr, wrapped_attr) def test_datafusion_missing_exports() -> None: """Check for any missing python exports. - This test verifies that every exposed class, attribute, and function in - the internal (pyo3) module is also exposed in our python wrappers. + This test verifies that every exposed class, attribute, + and function in the internal (pyo3) module - datafusion._internal + is also exposed in our python wrappers - datafusion - + i.e., the ones exposed to the public. """ missing_exports(datafusion._internal, datafusion) diff --git a/src/expr.rs b/src/expr.rs index e750be6a4..d3c528eb4 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -101,7 +101,7 @@ pub mod window; use sort_expr::{to_sort_expressions, PySortExpr}; /// A PyExpr that can be used on a DataFrame -#[pyclass(name = "Expr", module = "datafusion.expr", subclass)] +#[pyclass(name = "RawExpr", module = "datafusion.expr", subclass)] #[derive(Debug, Clone)] pub struct PyExpr { pub expr: Expr,