airbytehq · lleadbet · Nov 20, 2025 · Nov 20, 2025
diff --git a/...ions/connectors/source-file/integration_tests/sample_files/test-with-multiple-sheets.xlsx b/...ions/connectors/source-file/integration_tests/sample_files/test-with-multiple-sheets.xlsx
diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py
@@ -517,11 +517,21 @@ def openpyxl_chunk_reader(self, file, **kwargs):
         skiprows = kwargs.get("skiprows", 0)
         user_provided_column_names = kwargs.get("names")
         chunk_size = 500
-
+        sheet_name = kwargs.get("sheet_name", None)
         # Load workbook with data-only to avoid loading formulas
         work_book = load_workbook(filename=file, data_only=True, read_only=True)
 
-        for sheetname in work_book.sheetnames:
+        # Iterate through sheets
+        # Panda's read_excel allows specifying sheet_name as str, int, list of str/int or None (all sheets)
+        for idx, sheetname in enumerate(work_book.sheetnames):
+            # Handle sheet_name filtering
+            if sheet_name and isinstance(sheet_name, str) and sheetname != sheet_name:
+                continue
+            elif sheet_name and isinstance(sheet_name, int) and idx != sheet_name:
+                continue
+            elif sheet_name and isinstance(sheet_name, list) and sheetname not in sheet_name and idx not in sheet_name:
+                continue
+
             work_sheet = work_book[sheetname]
             data = list(work_sheet.iter_rows(values_only=True))
 

diff --git a/airbyte-integrations/connectors/source-file/unit_tests/test_client.py b/airbyte-integrations/connectors/source-file/unit_tests/test_client.py
@@ -99,6 +99,30 @@ def test_load_dataframes_xlsx(config, absolute_path, test_files, file_name, shou
         expected = read_excel(f, engine="openpyxl")
         assert read_file.equals(expected)
 
+@pytest.mark.parametrize("file_name, should_raise_error, sheet_name", [
+    ("test-with-multiple-sheets.xlsx", False, "unit_tests"), # single sheet by name
+    ("test-with-multiple-sheets.xlsx", False, 0), # single sheet by index
+    ("test-with-multiple-sheets.xlsx", False, ["unit_tests"]), # list of sheet names
+    ("test-with-multiple-sheets.xlsx", False, [0]) # list of sheet indices
+    ])
+def test_load_dataframes_xlsx_with_sheets(config, absolute_path, test_files, file_name, should_raise_error, sheet_name):
+    config["format"] = "excel"
+    config["reader_options"] = {"sheet_name": sheet_name}
+    client = Client(**config)
+    f = f"{absolute_path}/{test_files}/{file_name}"
+    if should_raise_error:
+        with pytest.raises(AirbyteTracedException):
+            next(client.load_dataframes(fp=f))
+    else:
+        read_file = next(client.load_dataframes(fp=f))
+        expected = read_excel(f, engine="openpyxl", sheet_name=sheet_name)
+        print(sheet_name)
+        if isinstance(sheet_name, list):
+            print(sheet_name)
+            print(expected)
+            expected = expected[sheet_name[0]]
+            print(expected)
+        assert read_file.equals(expected)
 
 @pytest.mark.parametrize("file_format, file_path", [("json", "formats/json/demo.json"), ("jsonl", "formats/jsonl/jsonl_nested.jsonl")])
 def test_load_nested_json(client, config, absolute_path, test_files, file_format, file_path):