diff --git a/NOTICE b/NOTICE
index 7f8d102..3b91b93 100644
--- a/NOTICE
+++ b/NOTICE
@@ -2,8 +2,17 @@ Copyright (2023) Databricks, Inc.
 
 This Software includes software developed at Databricks (https://www.databricks.com/) and its use is subject to the included LICENSE file.
 
+____________________
 This Software contains code from the following open source projects, licensed under the Apache 2.0 license:
 
 Databricks SDK for Python - https://github.com/databricks/databricks-sdk-py
 Copyright 2023 Databricks, Inc.  All rights reserved.
 License - https://github.com/databricks/databricks-sdk-py/blob/main/LICENSE
+
+
+____________________
+This Software contains code from the following open source projects, licensed under the GNU Lesser GPL v2:
+
+chardet - https://github.com/chardet/chardet
+Copyright 2005-2024 Mark Pilgrim, Maintainer: Dan Blanchard
+License - https://github.com/chardet/chardet/blob/main/LICENSE
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index d0fc7e1..da57d7d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,10 @@ classifiers = [
 dependencies = ["databricks-sdk>=0.16.0"]
 
 [project.optional-dependencies]
-yaml = ["PyYAML>=6.0.0,<7.0.0"]
+yaml = [
+    "PyYAML>=6.0.0,<7.0.0",
+    "chardet>=5.1.0,<6.0.0",
+]
 
 [project.urls]
 Issues = "https://github.com/databrickslabs/blueprint/issues"
diff --git a/src/databricks/labs/blueprint/paths.py b/src/databricks/labs/blueprint/paths.py
index 1cf29f2..5c85837 100644
--- a/src/databricks/labs/blueprint/paths.py
+++ b/src/databricks/labs/blueprint/paths.py
@@ -18,6 +18,7 @@
 from typing import BinaryIO, Literal, NoReturn, TextIO, TypeVar
 from urllib.parse import quote_from_bytes as urlquote_from_bytes
 
+import chardet
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import DatabricksError, ResourceDoesNotExist
 from databricks.sdk.service.files import FileInfo
@@ -1150,6 +1151,7 @@ def decode_with_bom(
           a text-based IO wrapper that will decode the underlying binary-mode file as text.
     """
     use_encoding: str | None
+    _chardet_confidence_threshold: float = 0.6
     if encoding is not None:
         use_encoding = encoding
     else:
@@ -1157,7 +1159,12 @@ def decode_with_bom(
         if use_encoding is None and detect_xml:
             use_encoding = _detect_encoding_xml(file, preserve_position=True)
     if use_encoding is None:
-        use_encoding = locale.getpreferredencoding()
+        result = chardet.detect(file.read())
+        use_encoding = result["encoding"] or locale.getpreferredencoding()
+        if result["confidence"] < _chardet_confidence_threshold:
+            logger.debug(f"Low confidence ({result['confidence']}) in detected encoding: {result}")
+            use_encoding = locale.getpreferredencoding()
+        file.seek(0)
     return io.TextIOWrapper(file, encoding=use_encoding, errors=errors, newline=newline)
 
 
diff --git a/tests/unit/test_paths.py b/tests/unit/test_paths.py
index 82c7b51..6c4ab9b 100644
--- a/tests/unit/test_paths.py
+++ b/tests/unit/test_paths.py
@@ -1128,6 +1128,7 @@ def test_read_xml_file_default_utf8(tmp_path: Path, monkeypatch) -> None:
     path.write_text(example, encoding="utf-8")
 
     # Verify the monkey-patching means we're not defaulting to UTF-8.
+    # with chardet this would likely work, unless the confidence score is less than 0.6 for this example it is 0.506
     monkeypatch.setattr(locale, "getpreferredencoding", lambda: "Windows-1252")
     assert locale.getpreferredencoding() != "UTF-8"
     assert read_text(path, detect_xml=False) != example