airbytehq · agarctfi · Nov 20, 2025 · Nov 20, 2025 · github-actions · Nov 20, 2025
diff --git a/airbyte-integrations/connectors/source-file/metadata.yaml b/airbyte-integrations/connectors/source-file/metadata.yaml
@@ -10,7 +10,7 @@ data:
   connectorSubtype: file
   connectorType: source
   definitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
-  dockerImageTag: 0.6.0
+  dockerImageTag: 0.7.0
   dockerRepository: airbyte/source-file
   documentationUrl: https://docs.airbyte.com/integrations/sources/file
   githubIssueLabel: source-file

diff --git a/airbyte-integrations/connectors/source-file/pyproject.toml b/airbyte-integrations/connectors/source-file/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
-version = "0.6.0"
+version = "0.7.0"
 name = "source-file"
 description = "Source implementation for File"
 authors = ["Airbyte <[email protected]>"]

diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py
@@ -521,7 +521,49 @@ def openpyxl_chunk_reader(self, file, **kwargs):
         # Load workbook with data-only to avoid loading formulas
         work_book = load_workbook(filename=file, data_only=True, read_only=True)
 
-        for sheetname in work_book.sheetnames:
+        sheet_name_option = kwargs.get("sheet_name")
+        if sheet_name_option is None:
+            sheet_name_option = kwargs.get("sheet_names")
+
+        if sheet_name_option is None:
+            target_sheets = work_book.sheetnames
+        else:
+            if isinstance(sheet_name_option, (list, tuple, set)):
+                requested_sheets = list(sheet_name_option)
+            else:
+                requested_sheets = [sheet_name_option]
+
+            normalized_sheets = []
+            for requested_sheet in requested_sheets:
+                if isinstance(requested_sheet, int):
+                    try:
+                        normalized_sheets.append(work_book.sheetnames[requested_sheet])
+                    except IndexError as err:
+                        raise AirbyteTracedException(
+                            message="Sheet index is out of range for the provided Excel file.",
+                            internal_message=f"Sheet index {requested_sheet} does not exist.",
+                            failure_type=FailureType.config_error,
+                        ) from err
+                elif isinstance(requested_sheet, str):
+                    normalized_sheets.append(requested_sheet)
+                else:
+                    raise AirbyteTracedException(
+                        message="Invalid sheet_name reader option provided.",
+                        internal_message="sheet_name must be a string, integer index, or a list of those values.",
+                        failure_type=FailureType.config_error,
+                    )
+
+            missing_sheets = [sheet for sheet in normalized_sheets if sheet not in work_book.sheetnames]
+            if missing_sheets:
+                raise AirbyteTracedException(
+                    message="One or more sheet names were not found in the Excel file.",
+                    internal_message=f"Missing sheets: {missing_sheets}",
+                    failure_type=FailureType.config_error,
+                )
+
+            target_sheets = normalized_sheets
+
+        for sheetname in target_sheets:
             work_sheet = work_book[sheetname]
             data = list(work_sheet.iter_rows(values_only=True))
 

diff --git a/airbyte-integrations/connectors/source-file/unit_tests/test_client.py b/airbyte-integrations/connectors/source-file/unit_tests/test_client.py
@@ -288,6 +288,16 @@ def generate_excel_file(data):
     return tmp_file
 
 
+def generate_multi_sheet_excel_file(sheet_data):
+    """Helper to generate an Excel file with multiple sheets."""
+    tmp_file = NamedTemporaryFile(suffix=".xlsx", delete=False)
+    with pd.ExcelWriter(tmp_file.name, engine="openpyxl") as writer:
+        for sheet_name, data in sheet_data.items():
+            pd.DataFrame(data).to_excel(writer, index=False, header=False, sheet_name=sheet_name)
+    tmp_file.seek(0)
+    return tmp_file
+
+
 def test_excel_reader_option_names(config):
     """
     Test the 'names' option for the Excel reader.
@@ -344,3 +354,57 @@ def test_excel_reader_option_header(config):
         read_file = next(client.load_dataframes(fp=tmp.name))
         assert isinstance(read_file, pd.DataFrame)
         assert read_file.to_dict(orient="records") == expected_data
+
+
+def test_excel_reader_option_sheet_name(config):
+    config["format"] = "excel"
+    config["reader_options"] = {"sheet_name": "SheetB"}
+    client = Client(**config)
+
+    sheet_data = {
+        "SheetA": [["A1", "A2"], ["Value1", "Value2"]],
+        "SheetB": [["B1", "B2"], ["Keep1", "Keep2"]],
+    }
+    expected_data = [{"B1": "Keep1", "B2": "Keep2"}]
+
+    with generate_multi_sheet_excel_file(sheet_data) as tmp:
+        records = []
+        for df_chunk in client.load_dataframes(fp=tmp.name):
+            records.extend(df_chunk.to_dict(orient="records"))
+        assert records == expected_data
+
+
+def test_excel_reader_option_sheet_name_list(config):
+    config["format"] = "excel"
+    config["reader_options"] = {"sheet_name": ["SheetB", "SheetC"]}
+    client = Client(**config)
+
+    sheet_data = {
+        "SheetA": [["A1", "A2"], ["Value1", "Value2"]],
+        "SheetB": [["B1", "B2"], ["Keep1", "Keep2"]],
+        "SheetC": [["C1", "C2"], ["Keep3", "Keep4"]],
+    }
+    expected_data = [
+        {"B1": "Keep1", "B2": "Keep2"},
+        {"C1": "Keep3", "C2": "Keep4"},
+    ]
+
+    with generate_multi_sheet_excel_file(sheet_data) as tmp:
+        records = []
+        for df_chunk in client.load_dataframes(fp=tmp.name):
+            records.extend(df_chunk.to_dict(orient="records"))
+        assert records == expected_data
+
+
+def test_excel_reader_option_sheet_name_missing(config):
+    config["format"] = "excel"
+    config["reader_options"] = {"sheet_name": "Missing"}
+    client = Client(**config)
+
+    sheet_data = {
+        "SheetA": [["A1", "A2"], ["Value1", "Value2"]],
+    }
+
+    with generate_multi_sheet_excel_file(sheet_data) as tmp:
+        with pytest.raises(AirbyteTracedException):
+            next(client.load_dataframes(fp=tmp.name))
diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md
@@ -186,6 +186,10 @@ For example, you can use the `{"orient" : "records"}` to change how orientation
 
 If you need to read Excel Binary Workbook, please specify `excel_binary` format in `File Format` select.
 
+#### Excel-specific reader options
+
+- `sheet_name`: Limit the sync to one or more worksheets inside the workbook. Accepts either a single sheet name (string), a zero-based sheet index (integer), or an array mixing names and indexes (for example, `{"sheet_name": ["Finance", 2]}`). When omitted, every sheet in the file is read sequentially and appended into the same destination table. If any requested sheet does not exist, setup will fail with a configuration error.
+
 :::caution
 This connector does not support syncing unstructured data files such as raw text, audio, or videos.
 :::
@@ -298,6 +302,7 @@ In order to read large files from a remote location, this connector uses the [sm
 
 | Version    | Date       | Pull Request                                             | Subject                                                                                                                                                                |
 |:-----------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 0.7.0      | 2025-11-20 | [69774](https://github.com/airbytehq/airbyte/pull/69774) | Add `sheet_name` or `sheet_names` handling for Excel `reader_options`                                                                                                                                             |
 | 0.6.0      | 2025-11-03 | [69148](https://github.com/airbytehq/airbyte/pull/69148) | Promoting release candidate 0.6.0-rc.1 to a main version.                                                                                                                                             |
 | 0.6.0-rc.1 | 2025-10-22 | [68588](https://github.com/airbytehq/airbyte/pull/68588) | Update to airbyte-cdk ^v7                                                                                                                                              |
 | 0.5.46     | 2025-10-21 | [68484](https://github.com/airbytehq/airbyte/pull/68484) | Update dependencies                                                                                                                                                    |