|
| 1 | +""" |
| 2 | +This script demonstrates how to register a filtered DataFrame as a table |
| 3 | +using DataFusion's `ctx.register_table` method and then query it. |
| 4 | +""" |
| 5 | + |
| 6 | +from datafusion import SessionContext, col, literal |
| 7 | +import pyarrow as pa |
| 8 | +import pytest |
| 9 | + |
| 10 | +def test_register_filtered_dataframe(): |
| 11 | + # Create a new session context |
| 12 | + ctx = SessionContext() |
| 13 | + |
| 14 | + # Create sample data as a dictionary |
| 15 | + data = { |
| 16 | + "a": [1, 2, 3, 4, 5], |
| 17 | + "b": [10, 20, 30, 40, 50] |
| 18 | + } |
| 19 | + |
| 20 | + # Create a DataFrame from the dictionary |
| 21 | + df = ctx.from_pydict(data, "my_table") |
| 22 | + |
| 23 | + # Filter the DataFrame (for example, keep rows where a > 2) |
| 24 | + df_filtered = df.filter(col("a") > literal(2)) |
| 25 | + |
| 26 | + # Register the filtered DataFrame as a table called "view1" |
| 27 | + ctx.register_table("view1", df_filtered) |
| 28 | + |
| 29 | + # Now run a SQL query against the registered table "view1" |
| 30 | + df_view = ctx.sql("SELECT * FROM view1") |
| 31 | + |
| 32 | + # Collect the results (as a list of Arrow RecordBatches) |
| 33 | + results = df_view.collect() |
| 34 | + |
| 35 | + # Convert results to a list of dictionaries for easier assertion |
| 36 | + result_dicts = [batch.to_pydict() for batch in results] |
| 37 | + |
| 38 | + # Expected results |
| 39 | + expected_results = [ |
| 40 | + {"a": [3, 4, 5], "b": [30, 40, 50]} |
| 41 | + ] |
| 42 | + |
| 43 | + # Assert the results match the expected results |
| 44 | + assert result_dicts == expected_results |
0 commit comments