Skip to content

Commit 7a92add

Browse files
authored
Data Explorer: Implement searchSchema API for DuckDB backend (#8894)
This brings the DuckDB backend up to feature parity with Python and R. I can add sorting-by-data-type to the three backends in follow up PRs.
1 parent e32f855 commit 7a92add

File tree

3 files changed

+442
-31
lines changed

3 files changed

+442
-31
lines changed

extensions/positron-duckdb/src/extension.ts

Lines changed: 134 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
BackendState,
99
ColumnDisplayType,
1010
ColumnFilter,
11+
ColumnFilterType,
1112
ColumnFrequencyTable,
1213
ColumnFrequencyTableParams,
1314
ColumnHistogram,
@@ -35,6 +36,7 @@ import {
3536
FilterBetween,
3637
FilterComparison,
3738
FilterComparisonOp,
39+
FilterMatchDataTypes,
3840
FilterResult,
3941
FilterSetMembership,
4042
FilterTextSearch,
@@ -47,6 +49,9 @@ import {
4749
ReturnColumnProfilesEvent,
4850
RowFilter,
4951
RowFilterType,
52+
SearchSchemaParams,
53+
SearchSchemaResult,
54+
SearchSchemaSortOrder,
5055
SetRowFiltersParams,
5156
SetSortColumnsParams,
5257
SupportStatus,
@@ -813,7 +818,120 @@ export class DuckDBTableView {
813818
type_name: entry.column_type,
814819
type_display
815820
};
816-
})
821+
}),
822+
};
823+
}
824+
825+
async searchSchema(
826+
params: SearchSchemaParams,
827+
): RpcResponse<SearchSchemaResult> {
828+
// Get all column indices
829+
const allIndices: number[] = [];
830+
for (let i = 0; i < this.fullSchema.length; i++) {
831+
allIndices.push(i);
832+
}
833+
834+
// Apply filters if any
835+
let filteredIndices = allIndices;
836+
if (params.filters && params.filters.length > 0) {
837+
filteredIndices = allIndices.filter((index) => {
838+
const entry = this.fullSchema[index];
839+
const columnName = entry.column_name;
840+
const columnType = entry.column_type;
841+
842+
// Get display type for this column
843+
let displayType = SCHEMA_TYPE_MAPPING.get(columnType);
844+
if (displayType === undefined) {
845+
displayType = ColumnDisplayType.Unknown;
846+
}
847+
if (columnType.startsWith('DECIMAL')) {
848+
displayType = ColumnDisplayType.Number;
849+
}
850+
851+
// Apply each filter
852+
return params.filters.every((filter) => {
853+
switch (filter.filter_type) {
854+
case ColumnFilterType.TextSearch: {
855+
const textFilter =
856+
filter.params as FilterTextSearch;
857+
const searchTerm = textFilter.case_sensitive
858+
? textFilter.term
859+
: textFilter.term.toLowerCase();
860+
const columnNameToMatch = textFilter.case_sensitive
861+
? columnName
862+
: columnName.toLowerCase();
863+
864+
switch (textFilter.search_type) {
865+
case TextSearchType.Contains:
866+
return columnNameToMatch.includes(
867+
searchTerm,
868+
);
869+
case TextSearchType.NotContains:
870+
return !columnNameToMatch.includes(
871+
searchTerm,
872+
);
873+
case TextSearchType.StartsWith:
874+
return columnNameToMatch.startsWith(
875+
searchTerm,
876+
);
877+
case TextSearchType.EndsWith:
878+
return columnNameToMatch.endsWith(
879+
searchTerm,
880+
);
881+
case TextSearchType.RegexMatch:
882+
try {
883+
const regex = new RegExp(
884+
textFilter.term,
885+
textFilter.case_sensitive
886+
? ''
887+
: 'i',
888+
);
889+
return regex.test(columnName);
890+
} catch {
891+
return false;
892+
}
893+
default:
894+
return false;
895+
}
896+
}
897+
case ColumnFilterType.MatchDataTypes: {
898+
const typeFilter =
899+
filter.params as FilterMatchDataTypes;
900+
return typeFilter.display_types.includes(
901+
displayType,
902+
);
903+
}
904+
default:
905+
return false;
906+
}
907+
});
908+
});
909+
}
910+
911+
// Sort the filtered indices
912+
switch (params.sort_order) {
913+
case SearchSchemaSortOrder.Ascending:
914+
filteredIndices.sort((a, b) => {
915+
const nameA = this.fullSchema[a].column_name.toLowerCase();
916+
const nameB = this.fullSchema[b].column_name.toLowerCase();
917+
return nameA.localeCompare(nameB);
918+
});
919+
break;
920+
case SearchSchemaSortOrder.Descending:
921+
filteredIndices.sort((a, b) => {
922+
const nameA = this.fullSchema[a].column_name.toLowerCase();
923+
const nameB = this.fullSchema[b].column_name.toLowerCase();
924+
return nameB.localeCompare(nameA);
925+
});
926+
break;
927+
case SearchSchemaSortOrder.Original:
928+
default:
929+
// Keep original order
930+
break;
931+
}
932+
933+
return {
934+
matches: filteredIndices,
817935
};
818936
}
819937

@@ -1117,8 +1235,17 @@ END`;
11171235
]
11181236
},
11191237
search_schema: {
1120-
support_status: SupportStatus.Unsupported,
1121-
supported_types: []
1238+
support_status: SupportStatus.Supported,
1239+
supported_types: [
1240+
{
1241+
column_filter_type: ColumnFilterType.TextSearch,
1242+
support_status: SupportStatus.Supported,
1243+
},
1244+
{
1245+
column_filter_type: ColumnFilterType.MatchDataTypes,
1246+
support_status: SupportStatus.Supported,
1247+
}
1248+
],
11221249
},
11231250
set_column_filters: {
11241251
support_status: SupportStatus.Unsupported,
@@ -1272,11 +1399,12 @@ END`;
12721399
other_count: 0
12731400
};
12741401
break;
1275-
case ColumnProfileType.SummaryStats:
1402+
case ColumnProfileType.SummaryStats: {
12761403
// Create null summary stats appropriate for the column type
12771404
const columnSchema = this.fullSchema[request.column_index];
12781405
result.summary_stats = this.createEmptySummaryStats(columnSchema);
12791406
break;
1407+
}
12801408
}
12811409
}
12821410
return result;
@@ -1664,8 +1792,9 @@ export class DataExplorerRpcHandler implements vscode.Disposable {
16641792
return table.setRowFilters(rpc.params as SetRowFiltersParams);
16651793
case DataExplorerBackendRequest.SetSortColumns:
16661794
return table.setSortColumns(rpc.params as SetSortColumnsParams);
1667-
case DataExplorerBackendRequest.SetColumnFilters:
16681795
case DataExplorerBackendRequest.SearchSchema:
1796+
return table.searchSchema(rpc.params as SearchSchemaParams);
1797+
case DataExplorerBackendRequest.SetColumnFilters:
16691798
return `${rpc.method} not yet implemented`;
16701799
default:
16711800
return `unrecognized data explorer method: ${rpc.method} `;

extensions/positron-duckdb/src/interfaces.ts

Lines changed: 72 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ export interface DataExplorerRpc {
1616
uri?: string;
1717
params: OpenDatasetParams |
1818
GetSchemaParams |
19+
SearchSchemaParams |
1920
GetDataValuesParams |
2021
GetRowLabelsParams |
2122
GetColumnProfilesParams |
@@ -73,14 +74,10 @@ export interface OpenDatasetResult {
7374
*/
7475
export interface SearchSchemaResult {
7576
/**
76-
* A schema containing matching columns up to the max_results limit
77+
* The column indices of the matching column indices in the indicated
78+
* sort order
7779
*/
78-
matches: TableSchema;
79-
80-
/**
81-
* The total number of columns matching the filter
82-
*/
83-
total_num_matches: number;
80+
matches: Array<number>;
8481

8582
}
8683

@@ -100,6 +97,28 @@ export interface ExportedData {
10097

10198
}
10299

100+
/**
101+
* Code snippet for the data view
102+
*/
103+
export interface ConvertedCode {
104+
/**
105+
* Lines of code that implement filters and sort keys
106+
*/
107+
converted_code: Array<string>;
108+
109+
}
110+
111+
/**
112+
* Syntax to use for code conversion
113+
*/
114+
export interface CodeSyntaxName {
115+
/**
116+
* The name of the code syntax, eg, pandas, polars, dplyr, etc.
117+
*/
118+
code_syntax_name: string;
119+
120+
}
121+
103122
/**
104123
* The result of applying filters to a table
105124
*/
@@ -815,7 +834,7 @@ export interface ColumnFrequencyTable {
815834
/**
816835
* The formatted top values
817836
*/
818-
values: Array<string>;
837+
values: Array<ColumnValue>;
819838

820839
/**
821840
* Counts of top values
@@ -1006,21 +1025,19 @@ export interface SetSortColumnsFeatures {
10061025
}
10071026

10081027
/**
1009-
* Feature flags for 'convert_to_code' RPC
1028+
* Feature flags for convert to code RPC
10101029
*/
10111030
export interface ConvertToCodeFeatures {
10121031
/**
10131032
* The support status for this RPC method
1014-
* */
1033+
*/
10151034
support_status: SupportStatus;
1035+
10161036
/**
1017-
* The supported code syntax names
1037+
* The syntaxes for converted code
10181038
*/
1019-
supported_code_syntaxes?: Array<CodeSyntaxName>;
1020-
}
1039+
code_syntaxes?: Array<CodeSyntaxName>;
10211040

1022-
export interface CodeSyntaxName {
1023-
code_syntax_name: string;
10241041
}
10251042

10261043
/**
@@ -1143,6 +1160,15 @@ export type Selection = DataSelectionSingleCell | DataSelectionCellRange | DataS
11431160
/// Union of selection specifications for array_selection
11441161
export type ArraySelection = DataSelectionRange | DataSelectionIndices;
11451162

1163+
/**
1164+
* Possible values for SortOrder in SearchSchema
1165+
*/
1166+
export enum SearchSchemaSortOrder {
1167+
Original = 'original',
1168+
Ascending = 'ascending',
1169+
Descending = 'descending'
1170+
}
1171+
11461172
/**
11471173
* Possible values for ColumnDisplayType
11481174
*/
@@ -1294,20 +1320,15 @@ export interface GetSchemaParams {
12941320
*/
12951321
export interface SearchSchemaParams {
12961322
/**
1297-
* Column filters to apply when searching
1323+
* Column filters to apply when searching, can be empty
12981324
*/
12991325
filters: Array<ColumnFilter>;
13001326

13011327
/**
1302-
* Index (starting from zero) of first result to fetch (for paging)
1303-
*/
1304-
start_index: number;
1305-
1306-
/**
1307-
* Maximum number of resulting column schemas to fetch from the start
1308-
* index
1328+
* How to sort results: original in-schema order, alphabetical ascending
1329+
* or descending
13091330
*/
1310-
max_results: number;
1331+
sort_order: SearchSchemaSortOrder;
13111332
}
13121333

13131334
/**
@@ -1355,6 +1376,31 @@ export interface ExportDataSelectionParams {
13551376
format: ExportFormat;
13561377
}
13571378

1379+
/**
1380+
* Parameters for the ConvertToCode method.
1381+
*/
1382+
export interface ConvertToCodeParams {
1383+
/**
1384+
* Zero or more column filters to apply
1385+
*/
1386+
column_filters: Array<ColumnFilter>;
1387+
1388+
/**
1389+
* Zero or more row filters to apply
1390+
*/
1391+
row_filters: Array<RowFilter>;
1392+
1393+
/**
1394+
* Zero or more sort keys to apply
1395+
*/
1396+
sort_keys: Array<ColumnSortKey>;
1397+
1398+
/**
1399+
* The code syntax to use for conversion
1400+
*/
1401+
code_syntax_name: CodeSyntaxName;
1402+
}
1403+
13581404
/**
13591405
* Parameters for the SetColumnFilters method.
13601406
*/
@@ -1471,6 +1517,8 @@ export enum DataExplorerBackendRequest {
14711517
GetDataValues = 'get_data_values',
14721518
GetRowLabels = 'get_row_labels',
14731519
ExportDataSelection = 'export_data_selection',
1520+
ConvertToCode = 'convert_to_code',
1521+
SuggestCodeSyntax = 'suggest_code_syntax',
14741522
SetColumnFilters = 'set_column_filters',
14751523
SetRowFilters = 'set_row_filters',
14761524
SetSortColumns = 'set_sort_columns',

0 commit comments

Comments
 (0)