Skip to content

Commit 7a0912e

Browse files
authored
Data Explorer: Add "cell_indices" table selection type for export_data_selection RPC (#9199)
This protocol work is needed to support exporting what the user is seeing with pinned rows and/or columns. An implementation is provided for Python and DuckDB, and I will work on a corresponding PR for R.
1 parent a7389ed commit 7a0912e

File tree

8 files changed

+171
-24
lines changed

8 files changed

+171
-24
lines changed

extensions/positron-duckdb/src/extension.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import {
2727
DataExplorerRpc,
2828
DataExplorerUiEvent,
2929
DataSelectionCellRange,
30+
DataSelectionCellIndices,
3031
DataSelectionIndices,
3132
DataSelectionRange,
3233
DataSelectionSingleCell,
@@ -1626,6 +1627,20 @@ END`;
16261627
FROM ${this.tableName}`;
16271628
return await exportQueryOutput(query, columns);
16281629
}
1630+
case TableSelectionKind.CellIndices: {
1631+
const selection = params.selection.selection as DataSelectionCellIndices;
1632+
const rowIndices = selection.row_indices;
1633+
const columnIndices = selection.column_indices;
1634+
const columns = columnIndices.map(i => this.fullSchema[i]);
1635+
1636+
// Create a VALUES clause to preserve the order of row indices
1637+
const orderValues = rowIndices.map((rowId, idx) => `(${rowId}, ${idx})`).join(', ');
1638+
const query = `SELECT ${getColumnSelectors(columns).join(',')}
1639+
FROM ${this.tableName}
1640+
JOIN (VALUES ${orderValues}) AS row_order(rowid, sort_order) ON ${this.tableName}.rowid = row_order.rowid
1641+
ORDER BY row_order.sort_order`;
1642+
return await exportQueryOutput(query, columns);
1643+
}
16291644
}
16301645
}
16311646
private async _getShape(whereClause: string = ''): Promise<[number, number]> {

extensions/positron-duckdb/src/interfaces.ts

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ export interface DataExplorerResponse {
5757
// AUTO-GENERATED from data_explorer.json; do not edit. Copy from
5858
// positronDataExplorerComm.ts instead.
5959

60-
6160
/**
6261
* Result in Methods
6362
*/
@@ -74,8 +73,8 @@ export interface OpenDatasetResult {
7473
*/
7574
export interface SearchSchemaResult {
7675
/**
77-
* The column indices of the matching column indices in the indicated
78-
* sort order
76+
* The column indices that match the search parameters in the indicated
77+
* sort order.
7978
*/
8079
matches: Array<number>;
8180

@@ -203,6 +202,11 @@ export interface ColumnSchema {
203202
*/
204203
column_name: string;
205204

205+
/**
206+
* Display label for column (e.g., from R's label attribute)
207+
*/
208+
column_label?: string;
209+
206210
/**
207211
* The position of the column within the table without any column filters
208212
*/
@@ -1099,6 +1103,23 @@ export interface DataSelectionCellRange {
10991103

11001104
}
11011105

1106+
/**
1107+
* A selection that for a rectangle of data cells defined by arrays of
1108+
* row and column indices
1109+
*/
1110+
export interface DataSelectionCellIndices {
1111+
/**
1112+
* The selected row indices
1113+
*/
1114+
row_indices: Array<number>;
1115+
1116+
/**
1117+
* The selected column indices
1118+
*/
1119+
column_indices: Array<number>;
1120+
1121+
}
1122+
11021123
/**
11031124
* A contiguous selection bounded by inclusive start and end indices
11041125
*/
@@ -1155,7 +1176,7 @@ export type ColumnFilterParams = FilterTextSearch | FilterMatchDataTypes;
11551176
export type ColumnProfileParams = ColumnHistogramParams | ColumnHistogramParams | ColumnFrequencyTableParams | ColumnFrequencyTableParams;
11561177

11571178
/// A union of selection types
1158-
export type Selection = DataSelectionSingleCell | DataSelectionCellRange | DataSelectionRange | DataSelectionIndices;
1179+
export type Selection = DataSelectionSingleCell | DataSelectionCellRange | DataSelectionCellIndices | DataSelectionRange | DataSelectionIndices;
11591180

11601181
/// Union of selection specifications for array_selection
11611182
export type ArraySelection = DataSelectionRange | DataSelectionIndices;
@@ -1275,7 +1296,8 @@ export enum TableSelectionKind {
12751296
ColumnRange = 'column_range',
12761297
RowRange = 'row_range',
12771298
ColumnIndices = 'column_indices',
1278-
RowIndices = 'row_indices'
1299+
RowIndices = 'row_indices',
1300+
CellIndices = 'cell_indices'
12791301
}
12801302

12811303
/**

extensions/positron-duckdb/src/test/extension.test.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,23 @@ suite('Positron DuckDB Extension Test Suite', () => {
737737
};
738738
await testColumnIndices([0, 2], 'int_col,float_col\n1,1.1\n2,2.2\n3,3.3\n4,NULL\nNULL,5.5e+20');
739739

740+
// Test CellIndices selection
741+
const testCellIndices = async (rowIndices: number[], columnIndices: number[], expected: string) => {
742+
await testSelection(TableSelectionKind.CellIndices, { row_indices: rowIndices, column_indices: columnIndices }, expected);
743+
};
744+
await testCellIndices([0, 2], [0, 2], 'int_col,float_col\n1,1.1\n3,3.3');
745+
await testCellIndices([1, 3], [1, 2], 'str_col,float_col\nb,2.2\nNULL,NULL');
746+
await testCellIndices([0, 1, 4], [0], 'int_col\n1\n2\nNULL');
747+
// Test non-strictly-increasing row indices (order should be preserved)
748+
await testCellIndices([4, 0, 2], [0], 'int_col\nNULL\n1\n3');
749+
await testCellIndices([3, 1, 0], [1, 2], 'str_col,float_col\nNULL,NULL\nb,2.2\na,1.1');
750+
await testCellIndices([2, 4, 1], [0, 1], `int_col,str_col\n3,c\nNULL,${longString}\n2,b`);
751+
// Test non-strictly-increasing column indices (order should be preserved)
752+
await testCellIndices([0, 1], [2, 0, 1], 'float_col,int_col,str_col\n1.1,1,a\n2.2,2,b');
753+
await testCellIndices([1, 2], [1, 2, 0], 'str_col,float_col,int_col\nb,2.2,2\nc,3.3,3');
754+
// Test both row and column indices out of order
755+
await testCellIndices([2, 0], [2, 0], 'float_col,int_col\n3.3,3\n1.1,1');
756+
740757
// Test TSV format
741758
await testSelection(TableSelectionKind.CellRange,
742759
{

extensions/positron-python/python_files/posit/positron/data_explorer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
ConvertToCodeParams,
5353
DataExplorerBackendMessageContent,
5454
DataExplorerFrontendEvent,
55+
DataSelectionCellIndices,
5556
DataSelectionCellRange,
5657
DataSelectionIndices,
5758
DataSelectionRange,
@@ -425,6 +426,9 @@ def export_data_selection(self, params: ExportDataSelectionParams):
425426
slice(sel.first_column_index, sel.last_column_index + 1),
426427
fmt,
427428
)
429+
elif kind == TableSelectionKind.CellIndices:
430+
assert isinstance(sel, DataSelectionCellIndices)
431+
return self._export_tabular(sel.row_indices, sel.column_indices, fmt)
428432
elif kind == TableSelectionKind.RowRange:
429433
assert isinstance(sel, DataSelectionRange)
430434
return self._export_tabular(

extensions/positron-python/python_files/posit/positron/data_explorer_comm.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ class TableSelectionKind(str, enum.Enum):
203203

204204
RowIndices = "row_indices"
205205

206+
CellIndices = "cell_indices"
207+
206208

207209
@enum.unique
208210
class ExportFormat(str, enum.Enum):
@@ -1165,6 +1167,21 @@ class DataSelectionCellRange(BaseModel):
11651167
)
11661168

11671169

1170+
class DataSelectionCellIndices(BaseModel):
1171+
"""
1172+
A rectangular cell selection defined by arrays of row and column
1173+
indices
1174+
"""
1175+
1176+
row_indices: List[StrictInt] = Field(
1177+
description="The selected row indices",
1178+
)
1179+
1180+
column_indices: List[StrictInt] = Field(
1181+
description="The selected column indices",
1182+
)
1183+
1184+
11681185
class DataSelectionRange(BaseModel):
11691186
"""
11701187
A contiguous selection bounded by inclusive start and end indices
@@ -1231,6 +1248,7 @@ class ColumnSelection(BaseModel):
12311248
Selection = Union[
12321249
DataSelectionSingleCell,
12331250
DataSelectionCellRange,
1251+
DataSelectionCellIndices,
12341252
DataSelectionRange,
12351253
DataSelectionIndices,
12361254
]
@@ -1835,6 +1853,8 @@ class ReturnColumnProfilesParams(BaseModel):
18351853

18361854
DataSelectionCellRange.update_forward_refs()
18371855

1856+
DataSelectionCellIndices.update_forward_refs()
1857+
18381858
DataSelectionRange.update_forward_refs()
18391859

18401860
DataSelectionIndices.update_forward_refs()

extensions/positron-python/python_files/posit/positron/tests/test_data_explorer.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2283,6 +2283,16 @@ def _select_cell_range(
22832283
}
22842284

22852285

2286+
def _select_cell_indices(row_indices: List[int], column_indices: List[int]):
2287+
return {
2288+
"kind": "cell_indices",
2289+
"selection": {
2290+
"row_indices": row_indices,
2291+
"column_indices": column_indices,
2292+
},
2293+
}
2294+
2295+
22862296
def _select_range(first_index: int, last_index: int, kind: str):
22872297
return {
22882298
"kind": kind,
@@ -2313,6 +2323,25 @@ def _select_row_indices(indices: List[int]):
23132323
return _select_indices(indices, "row_indices")
23142324

23152325

2326+
def _strip_newline(x):
2327+
"""Helper to strip only the final newline character for cross-platform compatibility."""
2328+
if x[-1] == "\n":
2329+
x = x[:-1]
2330+
return x
2331+
2332+
2333+
def _pandas_export_table(x, fmt):
2334+
"""Helper to export pandas DataFrame to various formats with proper line ending handling."""
2335+
buf = StringIO()
2336+
if fmt == "csv":
2337+
x.to_csv(buf, index=False)
2338+
elif fmt == "tsv":
2339+
x.to_csv(buf, sep="\t", index=False)
2340+
elif fmt == "html":
2341+
x.to_html(buf, index=False)
2342+
return _strip_newline(buf.getvalue())
2343+
2344+
23162345
def test_export_data_selection(dxf: DataExplorerFixture):
23172346
length = 100
23182347
ncols = 20
@@ -2345,23 +2374,16 @@ def test_export_data_selection(dxf: DataExplorerFixture):
23452374
(_select_row_range(1, 5), (slice(1, 6), slice(None))),
23462375
(_select_row_indices([0, 3, 5, 7]), ([0, 3, 5, 7], slice(None))),
23472376
(_select_column_indices([0, 3, 5, 7]), (slice(None), [0, 3, 5, 7])),
2377+
# Test cell_indices selections - Cartesian product of specified rows/columns
2378+
(_select_cell_indices([1, 3, 5], [10, 15, 19]), ([1, 3, 5], [10, 15, 19])),
2379+
(_select_cell_indices([0, 2, 4], [0, 1, 2]), ([0, 2, 4], [0, 1, 2])),
2380+
(_select_cell_indices([10], [5, 10, 15]), ([10], [5, 10, 15])),
2381+
(_select_cell_indices([1, 3], [7]), ([1, 3], [7])),
2382+
# Test cell_indices with non-strictly-increasing indices (order preservation)
2383+
(_select_cell_indices([5, 1, 3], [15, 10, 19]), ([5, 1, 3], [15, 10, 19])),
2384+
(_select_cell_indices([4, 0, 2], [2, 0, 1]), ([4, 0, 2], [2, 0, 1])),
23482385
]
23492386

2350-
def strip_newline(x):
2351-
if x[-1] == "\n":
2352-
x = x[:-1]
2353-
return x
2354-
2355-
def pandas_export_table(x, fmt):
2356-
buf = StringIO()
2357-
if fmt == "csv":
2358-
x.to_csv(buf, index=False)
2359-
elif fmt == "tsv":
2360-
x.to_csv(buf, sep="\t", index=False)
2361-
elif fmt == "html":
2362-
x.to_html(buf, index=False)
2363-
return strip_newline(buf.getvalue())
2364-
23652387
def pandas_export_cell(x, i, j):
23662388
return str(x.iloc[i, j])
23672389

@@ -2383,7 +2405,7 @@ def polars_export_cell(x, i, j):
23832405
return str(x[i, j])
23842406

23852407
data_cases = {
2386-
("test_df", pandas_export_cell, pandas_export_table, pandas_iloc),
2408+
("test_df", pandas_export_cell, _pandas_export_table, pandas_iloc),
23872409
("dfp", polars_export_cell, polars_export_table, polars_iloc),
23882410
}
23892411

positron/comms/data_explorer-backend-openrpc.json

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1591,7 +1591,8 @@
15911591
"column_range",
15921592
"row_range",
15931593
"column_indices",
1594-
"row_indices"
1594+
"row_indices",
1595+
"cell_indices"
15951596
]
15961597
},
15971598
"selection": {
@@ -1605,6 +1606,10 @@
16051606
"name": "cell_range",
16061607
"$ref": "#/components/schemas/data_selection_cell_range"
16071608
},
1609+
{
1610+
"name": "cell_indices",
1611+
"$ref": "#/components/schemas/data_selection_cell_indices"
1612+
},
16081613
{
16091614
"name": "index_range",
16101615
"$ref": "#/components/schemas/data_selection_range"
@@ -1663,6 +1668,30 @@
16631668
}
16641669
}
16651670
},
1671+
"data_selection_cell_indices": {
1672+
"type": "object",
1673+
"description": "A rectangular cell selection defined by arrays of row and column indices",
1674+
"required": [
1675+
"row_indices",
1676+
"column_indices"
1677+
],
1678+
"properties": {
1679+
"row_indices": {
1680+
"type": "array",
1681+
"description": "The selected row indices",
1682+
"items": {
1683+
"type": "integer"
1684+
}
1685+
},
1686+
"column_indices": {
1687+
"type": "array",
1688+
"description": "The selected column indices",
1689+
"items": {
1690+
"type": "integer"
1691+
}
1692+
}
1693+
}
1694+
},
16661695
"data_selection_range": {
16671696
"type": "object",
16681697
"description": "A contiguous selection bounded by inclusive start and end indices",

src/vs/workbench/services/languageRuntime/common/positronDataExplorerComm.ts

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,23 @@ export interface DataSelectionCellRange {
10571057

10581058
}
10591059

1060+
/**
1061+
* A rectangular cell selection defined by arrays of row and column
1062+
* indices
1063+
*/
1064+
export interface DataSelectionCellIndices {
1065+
/**
1066+
* The selected row indices
1067+
*/
1068+
row_indices: Array<number>;
1069+
1070+
/**
1071+
* The selected column indices
1072+
*/
1073+
column_indices: Array<number>;
1074+
1075+
}
1076+
10601077
/**
10611078
* A contiguous selection bounded by inclusive start and end indices
10621079
*/
@@ -1113,7 +1130,7 @@ export type ColumnFilterParams = FilterTextSearch | FilterMatchDataTypes;
11131130
export type ColumnProfileParams = ColumnHistogramParams | ColumnHistogramParams | ColumnFrequencyTableParams | ColumnFrequencyTableParams;
11141131

11151132
/// A union of selection types
1116-
export type Selection = DataSelectionSingleCell | DataSelectionCellRange | DataSelectionRange | DataSelectionIndices;
1133+
export type Selection = DataSelectionSingleCell | DataSelectionCellRange | DataSelectionCellIndices | DataSelectionRange | DataSelectionIndices;
11171134

11181135
/// Union of selection specifications for array_selection
11191136
export type ArraySelection = DataSelectionRange | DataSelectionIndices;
@@ -1233,7 +1250,8 @@ export enum TableSelectionKind {
12331250
ColumnRange = 'column_range',
12341251
RowRange = 'row_range',
12351252
ColumnIndices = 'column_indices',
1236-
RowIndices = 'row_indices'
1253+
RowIndices = 'row_indices',
1254+
CellIndices = 'cell_indices'
12371255
}
12381256

12391257
/**

0 commit comments

Comments
 (0)