1+ import pytest
12from polars import LazyFrame , Object , String
23from polars .testing import assert_frame_equal
34
45from rdw_ingestion_tools .api import concatenate_to_string_lazyframe , get_polars_schema
56
67
78def test_get_polars_schema_empty_data ():
8- """
9- Tests that schemas generated for empty responses are empty dictionaries.
10-
11- """
9+ """Tests that schemas generated for empty responses are empty dictionaries."""
1210 schema = get_polars_schema (object_columns = [], data = [])
1311
1412 assert schema == {}
1513
1614
1715def test_concatenate_to_string_lazyframe_empty_response ():
18- """
19- Tests that concatenate_to_string_lazyframe returns an empty LazyFrame for
16+ """Tests that concatenate_to_string_lazyframe returns an empty LazyFrame for
2017 empty response data.
21-
2218 """
2319 lf = concatenate_to_string_lazyframe (objs = [], object_columns = [])
2420
2521 assert_frame_equal (lf , LazyFrame (schema = {}))
2622
2723
2824def test_get_polars_schema_primitive_types ():
29- """
30- Schemas generated from response data use type `String`
25+ """Tests that schemas generated from response data use type `String`
3126 for all primitive types.
32-
3327 """
3428 data = [
3529 {
@@ -57,10 +51,8 @@ def test_get_polars_schema_primitive_types():
5751
5852
5953def test_get_polars_schema_list_types ():
60- """
61- Tests that generated schemas from response data use type `Object`
54+ """Tests that generated schemas from response data use type `Object`
6255 for list columns.
63-
6456 """
6557 data = [{"col1" : [1 , 2 , 3 ], "col2" : [{"key" : "value" }], "col3" : False }]
6658
@@ -72,10 +64,8 @@ def test_get_polars_schema_list_types():
7264
7365
7466def test_get_polars_schema_json_types ():
75- """
76- Tests that generated schemas from response data with JSON columns
67+ """Tests that generated schemas from response data with JSON columns
7768 normalise the column names in the schema.
78-
7969 """
8070 data = [{"col1" : {"key" : {"inner_key" : "value" }}, "col2" : {"key" : "value" }}]
8171
@@ -86,18 +76,19 @@ def test_get_polars_schema_json_types():
8676 assert schema == expected_schema
8777
8878
89- def test_concatenate_to_string_lazyframe ():
90- """
91- Tests that response data is concatenated and normalised into LazyFrames
79+ @ pytest . mark . parametrize ( "batch_size" , [ 1 , 2 ])
80+ def test_concatenate_to_string_lazyframe ( batch_size ):
81+ """ Tests that response data is concatenated and normalised into LazyFrames
9282 with column type `String`.
93-
9483 """
9584 data = [
9685 {"col1" : 1 , "col2" : [1 , 2 , 3 ], "col3" : {"key" : "value1" }},
9786 {"col1" : 2 , "col2" : [1 , 2 , 3 ], "col3" : {"key" : "value2" }},
9887 ]
9988
100- lf = concatenate_to_string_lazyframe (objs = data , object_columns = ["col2" ])
89+ lf = concatenate_to_string_lazyframe (
90+ objs = data , object_columns = ["col2" ], batch_size = batch_size
91+ )
10192
10293 expected_lf = LazyFrame (
10394 {
@@ -108,3 +99,27 @@ def test_concatenate_to_string_lazyframe():
10899 )
109100
110101 assert_frame_equal (lf , expected_lf )
102+
103+
104+ @pytest .mark .parametrize ("batch_size" , [1 , 2 , 3 ])
105+ def test_concatenate_to_string_lazyframe_uses_all_rows (batch_size ):
106+ """Tests that the key names in every JSON column are used."""
107+ data = [
108+ {"column1" : {"key1" : "1" }},
109+ {"column2" : {"key1" : "1" }},
110+ {"column2" : {"key1" : "1" , "key2" : "2" }},
111+ ]
112+
113+ expected_lf = LazyFrame (
114+ {
115+ "column1_key1" : ["1" , None , None ],
116+ "column2_key1" : [None , "1" , "1" ],
117+ "column2_key2" : [None , None , "2" ],
118+ }
119+ )
120+
121+ lf = concatenate_to_string_lazyframe (
122+ objs = data , object_columns = [], batch_size = batch_size
123+ )
124+
125+ assert_frame_equal (lf , expected_lf )
0 commit comments