feat: do not require full range for usecols (#368)

PrettyWood · web-flow · commit f0fdf3fef342 · 2025-08-28T16:58:11.000+02:00
diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
@@ -327,8 +327,12 @@ def load_sheet(
                             - A list of strings and ints, the column names and/or indices
                               (starting at 0)
                             - A string, a comma separated list of Excel column letters and column
-                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
-                              `A,B,C,D,E` and `A,C,E,F`)
+                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
+                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
+                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
+                              ranges (e.g. `":C"` to select columns from A to C). These can be
+                              combined for "except" patterns (e.g. `":C,E:"` to select everything
+                              except column D)
                             - A callable, a function that takes a column and returns a boolean
                               indicating whether the column should be used
         :param dtypes: An optional dtype (for all columns)
@@ -443,8 +447,12 @@ def load_table(
                             - A list of strings and ints, the column names and/or indices
                               (starting at 0)
                             - A string, a comma separated list of Excel column letters and column
-                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
-                              `A,B,C,D,E` and `A,C,E,F`)
+                              ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
+                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
+                              (e.g. `"B:"` to select all columns from B onwards) and from-beginning
+                              ranges (e.g. `":C"` to select columns from A to C). These can be
+                              combined for "except" patterns (e.g. `":C,E:"` to select everything
+                              except column D)
                             - A callable, a function that takes a column and returns a boolean
                               indicating whether the column should be used
         :param dtypes: An optional dtype (for all columns)
diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
@@ -303,6 +303,138 @@ def test_single_sheet_with_unnamed_columns_and_str_range(
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
 
 
+def test_single_sheet_with_unnamed_columns_and_open_ended_range(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
+) -> None:
+    # Test B: (should get columns B, C, D, E - indices 1, 2, 3, 4)
+    use_columns_str = "B:"
+    expected = {
+        k: v
+        for k, v in single_sheet_with_unnamed_columns_expected.items()
+        if k in ["__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
+    }
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str
+    )
+    assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info[1:]
+    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
+def test_single_sheet_with_unnamed_columns_and_open_ended_range_from_start(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
+) -> None:
+    # Test A: (should get all columns)
+    use_columns_str = "A:"
+    expected = single_sheet_with_unnamed_columns_expected
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str
+    )
+    assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info
+    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
+def test_single_sheet_with_unnamed_columns_and_mixed_open_ended_range(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
+) -> None:
+    # Test A,C: (should get column A and columns from C onwards - indices 0, 2, 3, 4)
+    use_columns_str = "A,C:"
+    expected = {
+        k: v
+        for k, v in single_sheet_with_unnamed_columns_expected.items()
+        if k in ["col1", "col3", "__UNNAMED__3", "col5"]
+    }
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str
+    )
+    expected_selected_cols = [
+        sheet_with_unnamed_columns_expected_column_info[0]
+    ] + sheet_with_unnamed_columns_expected_column_info[2:]
+    assert sheet.selected_columns == expected_selected_cols
+    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
+def test_single_sheet_with_unnamed_columns_and_from_beginning_range(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
+) -> None:
+    # Test :C (should get columns A, B, C - indices 0, 1, 2)
+    use_columns_str = ":C"
+    expected = {
+        k: v
+        for k, v in single_sheet_with_unnamed_columns_expected.items()
+        if k in ["col1", "__UNNAMED__1", "col3"]
+    }
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str
+    )
+    assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info[:3]
+    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
+def test_single_sheet_with_unnamed_columns_and_from_beginning_range_single_column(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
+) -> None:
+    # Test :A (should get only column A - index 0)
+    use_columns_str = ":A"
+    expected = {
+        k: v for k, v in single_sheet_with_unnamed_columns_expected.items() if k in ["col1"]
+    }
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str
+    )
+    assert sheet.selected_columns == [sheet_with_unnamed_columns_expected_column_info[0]]
+    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
+def test_single_sheet_with_unnamed_columns_and_complex_mixed_pattern(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
+) -> None:
+    # Test A,:B,D,E: (should get A, A,B again (deduplicated), D, and E)
+    # This effectively becomes A,B,D,E (columns 0,1,3,4)
+    use_columns_str = "A,:B,D,E:"
+    expected = {
+        k: v
+        for k, v in single_sheet_with_unnamed_columns_expected.items()
+        if k in ["col1", "__UNNAMED__1", "__UNNAMED__3", "col5"]
+    }
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str
+    )
+    # Expected: columns A, A,B (from :B), D, E (from E:)
+    # After deduplication: 0,1,3,4
+    expected_selected_cols = [
+        sheet_with_unnamed_columns_expected_column_info[0],  # A
+        sheet_with_unnamed_columns_expected_column_info[1],  # B
+        sheet_with_unnamed_columns_expected_column_info[3],  # D
+        sheet_with_unnamed_columns_expected_column_info[4],  # E
+    ]
+    assert sheet.selected_columns == expected_selected_cols
+    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
 def test_single_sheet_invalid_column_indices_negative_integer(
     excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
 ) -> None:
diff --git a/src/types/python/excelsheet/mod.rs b/src/types/python/excelsheet/mod.rs