Skip to content

Commit 70e42b8

Browse files
feat: add absolute index to column info (#434)
* Add start() to ExcelSheetData and CalamineDataProvider * Add field absolute_index to ColumnInfo and ColumnInfoNoDtype * Fix tests * Fix ColumnInfo python representation * ci: add tests for the shifted data case --------- Co-authored-by: Luka Peschke <luka.peschke@toucantoco.com>
1 parent 871a62b commit 70e42b8

File tree

11 files changed

+289
-9
lines changed

11 files changed

+289
-9
lines changed

python/fastexcel/_fastexcel.pyi

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,21 @@ DTypeFrom = Literal["provided_for_all", "provided_by_index", "provided_by_name",
1414
SheetVisible = Literal["visible", "hidden", "veryhidden"]
1515

1616
class ColumnInfoNoDtype:
17-
def __init__(self, *, name: str, index: int, column_name_from: ColumnNameFrom) -> None: ...
17+
def __init__(
18+
self,
19+
*,
20+
name: str,
21+
index: int,
22+
absolute_index: int,
23+
column_name_from: ColumnNameFrom,
24+
) -> None: ...
1825
@property
1926
def name(self) -> str: ...
2027
@property
2128
def index(self) -> int: ...
2229
@property
30+
def absolute_index(self) -> int: ...
31+
@property
2332
def column_name_from(self) -> ColumnNameFrom: ...
2433

2534
class ColumnInfo:
@@ -28,6 +37,7 @@ class ColumnInfo:
2837
*,
2938
name: str,
3039
index: int,
40+
absolute_index: int,
3141
column_name_from: ColumnNameFrom,
3242
dtype: DType,
3343
dtype_from: DTypeFrom,
@@ -37,6 +47,8 @@ class ColumnInfo:
3747
@property
3848
def index(self) -> int: ...
3949
@property
50+
def absolute_index(self) -> int: ...
51+
@property
4052
def dtype(self) -> DType: ...
4153
@property
4254
def column_name_from(self) -> ColumnNameFrom: ...

python/tests/test_column_selection.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,20 @@ def excel_reader_single_sheet() -> fastexcel.ExcelReader:
2323
def expected_column_info() -> list[fastexcel.ColumnInfo]:
2424
return [
2525
fastexcel.ColumnInfo(
26-
name="Month", index=0, column_name_from="looked_up", dtype="float", dtype_from="guessed"
26+
name="Month",
27+
index=0,
28+
absolute_index=0,
29+
column_name_from="looked_up",
30+
dtype="float",
31+
dtype_from="guessed",
2732
),
2833
fastexcel.ColumnInfo(
29-
name="Year", index=1, column_name_from="looked_up", dtype="float", dtype_from="guessed"
34+
name="Year",
35+
index=1,
36+
absolute_index=1,
37+
column_name_from="looked_up",
38+
dtype="float",
39+
dtype_from="guessed",
3040
),
3141
]
3242

@@ -117,27 +127,44 @@ def single_sheet_with_unnamed_columns_expected() -> dict[str, list[Any]]:
117127
def sheet_with_unnamed_columns_expected_column_info() -> list[fastexcel.ColumnInfo]:
118128
return [
119129
fastexcel.ColumnInfo(
120-
name="col1", index=0, column_name_from="looked_up", dtype="float", dtype_from="guessed"
130+
name="col1",
131+
index=0,
132+
absolute_index=0,
133+
column_name_from="looked_up",
134+
dtype="float",
135+
dtype_from="guessed",
121136
),
122137
fastexcel.ColumnInfo(
123138
name="__UNNAMED__1",
124139
index=1,
140+
absolute_index=1,
125141
column_name_from="generated",
126142
dtype="float",
127143
dtype_from="guessed",
128144
),
129145
fastexcel.ColumnInfo(
130-
name="col3", index=2, column_name_from="looked_up", dtype="string", dtype_from="guessed"
146+
name="col3",
147+
index=2,
148+
absolute_index=2,
149+
column_name_from="looked_up",
150+
dtype="string",
151+
dtype_from="guessed",
131152
),
132153
fastexcel.ColumnInfo(
133154
name="__UNNAMED__3",
134155
index=3,
156+
absolute_index=3,
135157
column_name_from="generated",
136158
dtype="float",
137159
dtype_from="guessed",
138160
),
139161
fastexcel.ColumnInfo(
140-
name="col5", index=4, column_name_from="looked_up", dtype="string", dtype_from="guessed"
162+
name="col5",
163+
index=4,
164+
absolute_index=4,
165+
column_name_from="looked_up",
166+
dtype="string",
167+
dtype_from="guessed",
141168
),
142169
]
143170

@@ -497,26 +524,30 @@ def test_use_columns_with_column_names() -> None:
497524
name="__UNNAMED__0",
498525
column_name_from="generated",
499526
index=0,
527+
absolute_index=0,
500528
dtype="float",
501529
dtype_from="guessed",
502530
),
503531
fastexcel.ColumnInfo(
504532
name="bools_renamed",
505533
index=1,
534+
absolute_index=1,
506535
dtype="boolean",
507536
dtype_from="guessed",
508537
column_name_from="provided",
509538
),
510539
fastexcel.ColumnInfo(
511540
name="dates_renamed",
512541
index=2,
542+
absolute_index=2,
513543
dtype="datetime",
514544
dtype_from="guessed",
515545
column_name_from="provided",
516546
),
517547
fastexcel.ColumnInfo(
518548
name="__UNNAMED__3",
519549
index=3,
550+
absolute_index=3,
520551
dtype="float",
521552
dtype_from="guessed",
522553
column_name_from="generated",

python/tests/test_dtypes.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,48 +266,55 @@ def test_one_dtype_for_all() -> None:
266266
fastexcel.ColumnInfo(
267267
name="Employee ID",
268268
index=0,
269+
absolute_index=0,
269270
dtype="string",
270271
dtype_from="provided_for_all",
271272
column_name_from="looked_up",
272273
),
273274
fastexcel.ColumnInfo(
274275
name="Employee Name",
275276
index=1,
277+
absolute_index=1,
276278
dtype="string",
277279
dtype_from="provided_for_all",
278280
column_name_from="looked_up",
279281
),
280282
fastexcel.ColumnInfo(
281283
name="Date",
282284
index=2,
285+
absolute_index=2,
283286
dtype="string",
284287
dtype_from="provided_for_all",
285288
column_name_from="looked_up",
286289
),
287290
fastexcel.ColumnInfo(
288291
name="Details",
289292
index=3,
293+
absolute_index=3,
290294
dtype="string",
291295
dtype_from="provided_for_all",
292296
column_name_from="looked_up",
293297
),
294298
fastexcel.ColumnInfo(
295299
name="Asset ID",
296300
index=4,
301+
absolute_index=4,
297302
dtype="string",
298303
dtype_from="provided_for_all",
299304
column_name_from="looked_up",
300305
),
301306
fastexcel.ColumnInfo(
302307
name="Mixed dates",
303308
index=5,
309+
absolute_index=5,
304310
dtype="string",
305311
dtype_from="provided_for_all",
306312
column_name_from="looked_up",
307313
),
308314
fastexcel.ColumnInfo(
309315
name="Mixed bools",
310316
index=6,
317+
absolute_index=6,
311318
dtype="string",
312319
dtype_from="provided_for_all",
313320
column_name_from="looked_up",
@@ -342,13 +349,15 @@ def test_fallback_infer_dtypes(mocker: MockerFixture) -> None:
342349
fastexcel.ColumnInfo(
343350
name="id",
344351
index=0,
352+
absolute_index=0,
345353
dtype="float",
346354
dtype_from="guessed",
347355
column_name_from="looked_up",
348356
),
349357
fastexcel.ColumnInfo(
350358
name="label",
351359
index=1,
360+
absolute_index=1,
352361
dtype="string",
353362
dtype_from="guessed",
354363
column_name_from="looked_up",
@@ -517,20 +526,23 @@ def test_guess_dtypes_with_div0_error() -> None:
517526
fastexcel.ColumnInfo(
518527
name="dividend",
519528
index=0,
529+
absolute_index=0,
520530
dtype="float",
521531
dtype_from="guessed",
522532
column_name_from="looked_up",
523533
),
524534
fastexcel.ColumnInfo(
525535
name="divisor",
526536
index=1,
537+
absolute_index=1,
527538
dtype="float",
528539
dtype_from="guessed",
529540
column_name_from="looked_up",
530541
),
531542
fastexcel.ColumnInfo(
532543
name="quotient",
533544
index=2,
545+
absolute_index=2,
534546
dtype="float",
535547
dtype_from="guessed",
536548
column_name_from="looked_up",

python/tests/test_shifted_data.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import fastexcel
2+
3+
from .utils import path_for_fixture
4+
5+
6+
def test_sheet_with_offset():
7+
reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
8+
sheet = reader.load_sheet("without-table")
9+
10+
assert sheet.available_columns() == [
11+
fastexcel.ColumnInfo(
12+
name="Column at H10",
13+
index=0,
14+
absolute_index=7,
15+
dtype="float",
16+
dtype_from="guessed",
17+
column_name_from="looked_up",
18+
),
19+
fastexcel.ColumnInfo(
20+
name="Column at I10",
21+
index=1,
22+
absolute_index=8,
23+
dtype="float",
24+
dtype_from="guessed",
25+
column_name_from="looked_up",
26+
),
27+
fastexcel.ColumnInfo(
28+
name="__UNNAMED__2",
29+
index=2,
30+
absolute_index=9,
31+
dtype="string",
32+
dtype_from="guessed",
33+
column_name_from="generated",
34+
),
35+
fastexcel.ColumnInfo(
36+
name="Column at K10",
37+
index=3,
38+
absolute_index=10,
39+
dtype="float",
40+
dtype_from="guessed",
41+
column_name_from="looked_up",
42+
),
43+
]
44+
45+
46+
def test_table_with_offset():
47+
reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
48+
table = reader.load_table("TableAtD5")
49+
50+
assert table.available_columns() == [
51+
fastexcel.ColumnInfo(
52+
name="Column at D5",
53+
index=0,
54+
absolute_index=3,
55+
dtype="float",
56+
dtype_from="guessed",
57+
column_name_from="provided",
58+
),
59+
fastexcel.ColumnInfo(
60+
name="Column at E5",
61+
index=1,
62+
absolute_index=4,
63+
dtype="float",
64+
dtype_from="guessed",
65+
column_name_from="provided",
66+
),
67+
]

python/tests/test_tables.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,27 +42,31 @@ def test_load_table(path: str) -> None:
4242
fastexcel.ColumnInfo(
4343
name="User Id",
4444
index=0,
45+
absolute_index=0,
4546
dtype="float",
4647
dtype_from="guessed",
4748
column_name_from="provided",
4849
),
4950
fastexcel.ColumnInfo(
5051
name="FirstName",
5152
index=1,
53+
absolute_index=1,
5254
dtype="string",
5355
dtype_from="guessed",
5456
column_name_from="provided",
5557
),
5658
fastexcel.ColumnInfo(
5759
name="LastName",
5860
index=2,
61+
absolute_index=2,
5962
dtype="string",
6063
dtype_from="guessed",
6164
column_name_from="provided",
6265
),
6366
fastexcel.ColumnInfo(
6467
name="Date",
6568
index=3,
69+
absolute_index=3,
6670
dtype="datetime",
6771
dtype_from="guessed",
6872
column_name_from="provided",

src/data/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,14 @@ impl ExcelSheetData<'_> {
8686
}
8787
}
8888
}
89+
90+
pub(crate) fn start(&self) -> Option<(usize, usize)> {
91+
let start = match self {
92+
ExcelSheetData::Owned(range) => range.start(),
93+
ExcelSheetData::Ref(range) => range.start(),
94+
};
95+
start.map(|(r, c)| (r as usize, c as usize))
96+
}
8997
}
9098

9199
impl From<Range<CalData>> for ExcelSheetData<'_> {

0 commit comments

Comments
 (0)