diff --git a/docs/source/user-guide/common-operations/expressions.rst b/docs/source/user-guide/common-operations/expressions.rst index 6014c9d2e..77f3359f5 100644 --- a/docs/source/user-guide/common-operations/expressions.rst +++ b/docs/source/user-guide/common-operations/expressions.rst @@ -77,12 +77,25 @@ approaches. df = ctx.from_pydict({"a": [[1, 2, 3], [4, 5, 6]]}) df.select(col("a")[0].alias("a0")) - .. warning:: Indexing an element of an array via ``[]`` starts at index 0 whereas :py:func:`~datafusion.functions.array_element` starts at index 1. +To check if an array is empty, you can use the function :py:func:`datafusion.functions.array_empty`. +This function returns a boolean indicating whether the array is empty. + +.. ipython:: python + + from datafusion import SessionContext, col + from datafusion.functions import array_empty + + ctx = SessionContext() + df = ctx.from_pydict({"a": [[], [1, 2, 3]]}) + df.select(array_empty(col("a")).alias("is_empty")) + +In this example, the `is_empty` column will contain `True` for the first row and `False` for the second row. + Structs ------- diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 727321979..570a6ce5e 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -51,6 +51,7 @@ "array_dims", "array_distinct", "array_element", + "array_empty", "array_except", "array_extract", "array_has", @@ -1160,6 +1161,11 @@ def array_element(array: Expr, n: Expr) -> Expr: return Expr(f.array_element(array.expr, n.expr)) +def array_empty(array: Expr) -> Expr: + """Returns a boolean indicating whether the array is empty.""" + return Expr(f.array_empty(array.expr)) + + def array_extract(array: Expr, n: Expr) -> Expr: """Extracts the element with the index n from the array. diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 9353f872d..e6fd41d8b 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -309,6 +309,10 @@ def py_flatten(arr): lambda col: f.array_element(col, literal(1)), lambda data: [r[0] for r in data], ], + [ + lambda col: f.array_empty(col), + lambda data: [len(r) == 0 for r in data], + ], [ lambda col: f.array_extract(col, literal(1)), lambda data: [r[0] for r in data], diff --git a/src/functions.rs b/src/functions.rs index 24d33af39..4facb6cf7 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -572,6 +572,7 @@ array_fn!(array_to_string, array delimiter); array_fn!(array_dims, array); array_fn!(array_distinct, array); array_fn!(array_element, array element); +array_fn!(array_empty, array); array_fn!(array_length, array); array_fn!(array_has, first_array second_array); array_fn!(array_has_all, first_array second_array); @@ -1003,6 +1004,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_dims))?; m.add_wrapped(wrap_pyfunction!(array_distinct))?; m.add_wrapped(wrap_pyfunction!(array_element))?; + m.add_wrapped(wrap_pyfunction!(array_empty))?; m.add_wrapped(wrap_pyfunction!(array_length))?; m.add_wrapped(wrap_pyfunction!(array_has))?; m.add_wrapped(wrap_pyfunction!(array_has_all))?;