@@ -394,13 +394,12 @@ def get_schema(self, table_name, column_indices=None):
394394 column_indices = column_indices ,
395395 )["columns" ]
396396
397- def search_schema (self , table_name , filters , start_index , max_results ):
397+ def search_schema (self , table_name , filters , sort_order = "original" ):
398398 return self .do_json_rpc (
399399 table_name ,
400400 "search_schema" ,
401401 filters = filters ,
402- start_index = start_index ,
403- max_results = max_results ,
402+ sort_order = sort_order ,
404403 )
405404
406405 def get_state (self , table_name ):
@@ -907,11 +906,11 @@ def _match_types_filter(data_types):
907906def test_search_schema (dxf : DataExplorerFixture ):
908907 # Test search_schema RPC for pandas and polars
909908
910- # Make a few thousand column names we can search for
909+ # Make a smaller set of column names for easier testing
911910 column_names = [
912911 f"{ prefix } _{ i } "
913- for prefix in ["aaa " , "bbb " , "ccc " , "ddd " ]
914- for i in range ({"aaa " : 1000 , "bbb " : 100 , "ccc " : 50 , "ddd " : 10 }[prefix ])
912+ for prefix in ["apple " , "banana " , "cherry " , "date " ]
913+ for i in range ({"apple " : 10 , "banana " : 5 , "cherry " : 3 , "date " : 2 }[prefix ])
915914 ]
916915
917916 data_examples = {
@@ -939,54 +938,94 @@ def test_search_schema(dxf: DataExplorerFixture):
939938 dxf .register_table ("test_df" , test_df )
940939 dxf .register_table ("dfp" , dfp )
941940
942- aaa_filter = _text_search_filter ("aaa" )
943- bbb_filter = _text_search_filter ("bbb" )
944- ccc_filter = _text_search_filter ("ccc" )
945- ddd_filter = _text_search_filter ("ddd" )
941+ apple_filter = _text_search_filter ("apple" )
942+ banana_filter = _text_search_filter ("banana" )
946943
947944 for name in ["test_df" , "dfp" ]:
948- full_schema = dxf .get_schema (name , list (range (len (column_names ))))
945+ # Test filtering by text
946+ result = dxf .search_schema (name , [apple_filter ])
947+ expected_apple_indices = [i for i , col in enumerate (column_names ) if "apple" in col ]
948+ assert result ["matches" ] == expected_apple_indices
949+
950+ result = dxf .search_schema (name , [banana_filter ])
951+ expected_banana_indices = [i for i , col in enumerate (column_names ) if "banana" in col ]
952+ assert result ["matches" ] == expected_banana_indices
953+
954+ # Test filtering by data type
955+ string_filter = _match_types_filter ([ColumnDisplayType .String ])
956+ result = dxf .search_schema (name , [string_filter ])
957+ # String columns should be at indices 1, 6, 11, 16 (every 5th starting from 1)
958+ expected_string_indices = [i for i in range (len (column_names )) if i % 5 == 1 ]
959+ assert result ["matches" ] == expected_string_indices
960+
961+ # Test combining filters
962+ result = dxf .search_schema (name , [apple_filter , string_filter ])
963+ # Apple columns that are also strings
964+ expected_combined = [i for i in expected_apple_indices if i % 5 == 1 ]
965+ assert result ["matches" ] == expected_combined
966+
967+ # Test sorting
968+ result = dxf .search_schema (name , [], "original" )
969+ expected_all_indices = list (range (len (column_names )))
970+ assert result ["matches" ] == expected_all_indices
971+
972+ result = dxf .search_schema (name , [], "ascending" )
973+ # Should be sorted by column name alphabetically
974+ expected_sorted = sorted (range (len (column_names )), key = lambda i : column_names [i ])
975+ assert result ["matches" ] == expected_sorted
976+
977+ result = dxf .search_schema (name , [], "descending" )
978+ # Should be sorted by column name reverse alphabetically
979+ expected_reverse_sorted = sorted (
980+ range (len (column_names )), key = lambda i : column_names [i ], reverse = True
981+ )
982+ assert result ["matches" ] == expected_reverse_sorted
949983
950- # (search_term, start_index, max_results, ex_total, ex_matches)
951- cases = [
952- ([aaa_filter ], 0 , 100 , 1000 , full_schema [:100 ]),
953- (
954- [aaa_filter , _match_types_filter ([ColumnDisplayType .String ])],
955- 0 ,
956- 100 ,
957- 200 ,
958- full_schema [:500 ][1 ::5 ],
959- ),
960- (
961- [
962- aaa_filter ,
963- _match_types_filter ([ColumnDisplayType .Boolean , ColumnDisplayType .Number ]),
964- ],
965- 0 ,
966- 120 ,
967- 600 ,
968- [x for i , x in enumerate (full_schema [:200 ]) if i % 5 in (0 , 2 , 3 )],
969- ),
970- ([aaa_filter ], 100 , 100 , 1000 , full_schema [100 :200 ]),
971- ([aaa_filter ], 950 , 100 , 1000 , full_schema [950 :1000 ]),
972- ([aaa_filter ], 1000 , 100 , 1000 , []),
973- ([bbb_filter ], 0 , 10 , 100 , full_schema [1000 :1010 ]),
974- ([ccc_filter ], 0 , 10 , 50 , full_schema [1100 :1110 ]),
975- ([ddd_filter ], 0 , 10 , 10 , full_schema [1150 :1160 ]),
976- ]
977984
978- for (
979- filters ,
980- start_index ,
981- max_results ,
982- ex_total ,
983- ex_matches ,
984- ) in cases :
985- result = dxf .search_schema (name , filters , start_index , max_results )
986-
987- assert result ["total_num_matches" ] == ex_total
988- matches = result ["matches" ]["columns" ]
989- assert matches == ex_matches
985+ def test_search_schema_sort_by_name (dxf : DataExplorerFixture ):
986+ # Test comprehensive sort-by-name functionality
987+
988+ # Create a dataframe with deliberately mixed-case and varied column names
989+ column_names = ["Zebra" , "apple" , "BANANA" , "Cherry" , "date" , "Elephant" , "fig" ]
990+ data = {name : [1 , 2 , 3 , 4 , 5 ] for name in column_names }
991+
992+ test_df = pd .DataFrame (data )
993+ dfp = pl .DataFrame (data )
994+
995+ dxf .register_table ("sort_test_df" , test_df )
996+ dxf .register_table ("sort_test_dfp" , dfp )
997+
998+ for name in ["sort_test_df" , "sort_test_dfp" ]:
999+ # Test original order (should be same as column order)
1000+ result = dxf .search_schema (name , [], "original" )
1001+ expected_original = list (range (len (column_names )))
1002+ assert result ["matches" ] == expected_original
1003+
1004+ # Test ascending sort (case-sensitive alphabetical)
1005+ result = dxf .search_schema (name , [], "ascending" )
1006+ expected_ascending = sorted (range (len (column_names )), key = lambda i : column_names [i ])
1007+ assert result ["matches" ] == expected_ascending
1008+
1009+ # Test descending sort
1010+ result = dxf .search_schema (name , [], "descending" )
1011+ expected_descending = sorted (
1012+ range (len (column_names )), key = lambda i : column_names [i ], reverse = True
1013+ )
1014+ assert result ["matches" ] == expected_descending
1015+
1016+ # Test that sorting works with filters too
1017+ filter_with_a = _text_search_filter ("a" ) # Should match "Zebra", "apple", "BANANA"
1018+
1019+ result = dxf .search_schema (name , [filter_with_a ], "ascending" )
1020+ filtered_indices = [i for i , col in enumerate (column_names ) if "a" in col .lower ()]
1021+ expected_filtered_ascending = sorted (filtered_indices , key = lambda i : column_names [i ])
1022+ assert result ["matches" ] == expected_filtered_ascending
1023+
1024+ result = dxf .search_schema (name , [filter_with_a ], "descending" )
1025+ expected_filtered_descending = sorted (
1026+ filtered_indices , key = lambda i : column_names [i ], reverse = True
1027+ )
1028+ assert result ["matches" ] == expected_filtered_descending
9901029
9911030
9921031def test_pandas_get_data_values (dxf : DataExplorerFixture ):
0 commit comments