diff --git a/src/daft-core/src/lit/python.rs b/src/daft-core/src/lit/python.rs index b3b2e89517..00b859e5cc 100644 --- a/src/daft-core/src/lit/python.rs +++ b/src/daft-core/src/lit/python.rs @@ -198,7 +198,11 @@ impl<'py> IntoPyObject<'py> for Literal { "Key and value counts should be equal in map literal" ); - Ok(PyList::new(py, keys.to_literals().zip(values.to_literals()))?.into_any()) + let map = PyDict::new(py); + for (key, value) in keys.to_literals().into_iter().zip(values.to_literals()) { + map.set_item(key.into_pyobject(py)?, value.into_pyobject(py)?)?; + } + Ok(map.into_any()) } Self::Tensor { data, shape } => { let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index cf08214d44..08c3e7cb4f 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -656,7 +656,7 @@ def test_list_value_counts(): value_counts = result.to_pydict()["value_counts"] # Expected output - expected = [[("a", 2), ("b", 1), ("c", 1)], [("b", 2), ("c", 1)], [("a", 3)], [], [("d", 2)]] + expected = [{"a": 2, "b": 1, "c": 1}, {"b": 2, "c": 1}, {"a": 3}, {}, {"d": 2}] # Check the result assert value_counts == expected @@ -689,16 +689,8 @@ def test_list_value_counts_nested(): # Apply list_value_counts operation and expect an exception result = mp.eval_expression_list([col("nested_list_col").value_counts().alias("value_counts")]) - result_dict = result.to_pydict() - - assert result_dict["value_counts"] == [ - [([1, 2], 1), ([3, 4], 1)], - [([1, 2], 1), ([5, 6], 1)], - [([3, 4], 1), ([1, 2], 1)], - [], - [], - [([1, 2], 2)], - ] + with pytest.raises(TypeError): + result.to_pydict() def test_list_value_counts_fixed_size(): @@ -727,12 +719,12 @@ def test_list_value_counts_fixed_size(): # Verify the value counts result_dict = result.to_pydict() assert result_dict["value_counts"] == [ - [(1, 1), (2, 1), (3, 1)], - [(4, 2), (3, 1)], - [(4, 1), (5, 1), (6, 1)], - [(1, 1), (2, 1), (3, 1)], - [(7, 1), (8, 1), (9, 1)], - [], + {1: 1, 2: 1, 3: 1}, + {4: 2, 3: 1}, + {4: 1, 5: 1, 6: 1}, + {1: 1, 2: 1, 3: 1}, + {7: 1, 8: 1, 9: 1}, + {}, ] @@ -754,7 +746,7 @@ def test_list_value_counts_degenerate(): result_null = null_mp.eval_expression_list([col("null_list_col").value_counts().alias("value_counts")]) # Check the result for null values - assert result_null.to_pydict() == {"value_counts": [[], []]} + assert result_null.to_pydict() == {"value_counts": [{}, {}]} @pytest.mark.parametrize( diff --git a/tests/recordbatch/test_from_py.py b/tests/recordbatch/test_from_py.py index ab3f09a55f..bd718486d7 100644 --- a/tests/recordbatch/test_from_py.py +++ b/tests/recordbatch/test_from_py.py @@ -303,7 +303,7 @@ def test_from_pydict_arrow_map_array() -> None: # Perform expected Daft cast, where the inner string and int arrays are cast to large string and int arrays. expected = arrow_arr.cast(pa.map_(pa.int64(), pa.float64())) assert daft_recordbatch.to_arrow()["a"].combine_chunks() == expected - assert daft_recordbatch.to_pydict()["a"] == data + assert daft_recordbatch.to_pydict()["a"] == [{1: 2.0, 3: 4.0}, None, {5: 6.0, 7: 8.0}] def test_from_pydict_arrow_struct_array() -> None: @@ -524,7 +524,7 @@ def test_from_arrow_map_array() -> None: # Perform expected Daft cast, where the inner string and int arrays are cast to large string and int arrays. expected = arrow_arr.cast(pa.map_(pa.float32(), pa.int32())) assert daft_recordbatch.to_arrow()["a"].combine_chunks() == expected - assert daft_recordbatch.to_pydict()["a"] == data + assert daft_recordbatch.to_pydict()["a"] == [{1.0: 1, 2.0: 2}, {3.0: 3, 4.0: 4}] @pytest.mark.skipif( diff --git a/tests/series/test_concat.py b/tests/series/test_concat.py index f8f0b9cd24..f9dce47b5f 100644 --- a/tests/series/test_concat.py +++ b/tests/series/test_concat.py @@ -105,8 +105,8 @@ def test_series_concat_map_array(chunks) -> None: counter = 0 for i in range(chunks): for j in range(i): - assert concated_list[counter][0][1] == i + j - assert concated_list[counter][1][1] == float(i * j) + assert concated_list[counter]["a"] == i + j + assert concated_list[counter]["b"] == float(i * j) counter += 1 diff --git a/tests/series/test_if_else.py b/tests/series/test_if_else.py index 73fad1e6dc..953140ed83 100644 --- a/tests/series/test_if_else.py +++ b/tests/series/test_if_else.py @@ -249,7 +249,7 @@ def test_series_if_else_fixed_size_list(if_true, if_false, expected) -> None: [[("a", 8), ("b", 9)], [("c", 10)], None, [("a", 12), ("b", 13)]], type=pa.map_(pa.string(), pa.int64()), ), - [[("a", 1), ("b", 2)], [("c", 10)], None, [("a", 5), ("c", 7)]], + [{"a": 1, "b": 2}, {"c": 10}, None, {"a": 5, "c": 7}], ), # Same length, different super-castable data type ( @@ -261,7 +261,7 @@ def test_series_if_else_fixed_size_list(if_true, if_false, expected) -> None: [[("a", 8), ("b", 9)], [("c", 10)], None, [("a", 12), ("b", 13)]], type=pa.map_(pa.string(), pa.int64()), ), - [[("a", 1), ("b", 2)], [("c", 10)], None, [("a", 5), ("c", 7)]], + [{"a": 1, "b": 2}, {"c": 10}, None, {"a": 5, "c": 7}], ), # Broadcast left ( @@ -270,7 +270,7 @@ def test_series_if_else_fixed_size_list(if_true, if_false, expected) -> None: [[("a", 8), ("b", 9)], [("c", 10)], None, [("a", 12), ("b", 13)]], type=pa.map_(pa.string(), pa.int64()), ), - [[("a", 1), ("b", 2)], [("c", 10)], None, [("a", 1), ("b", 2)]], + [{"a": 1, "b": 2}, {"c": 10}, None, {"a": 1, "b": 2}], ), # Broadcast right ( @@ -279,13 +279,13 @@ def test_series_if_else_fixed_size_list(if_true, if_false, expected) -> None: type=pa.map_(pa.string(), pa.int64()), ), pa.array([[("a", 8), ("b", 9)]], type=pa.map_(pa.string(), pa.int64())), - [[("a", 1), ("b", 2)], [("a", 8), ("b", 9)], None, [("a", 5), ("c", 7)]], + [{"a": 1, "b": 2}, {"a": 8, "b": 9}, None, {"a": 5, "c": 7}], ), # Broadcast both ( pa.array([[("a", 1), ("b", 2)]], type=pa.map_(pa.string(), pa.int64())), pa.array([[("a", 8), ("b", 9)]], type=pa.map_(pa.string(), pa.int64())), - [[("a", 1), ("b", 2)], [("a", 8), ("b", 9)], None, [("a", 1), ("b", 2)]], + [{"a": 1, "b": 2}, {"a": 8, "b": 9}, None, {"a": 1, "b": 2}], ), ], )