Skip to content

Commit 9bebff8

Browse files
hhvrcCopilot
andauthored
fix: Support unicode dotenv files (#393)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 97a57b3 commit 9bebff8

File tree

1 file changed

+81
-26
lines changed

1 file changed

+81
-26
lines changed

scripts/utils/dotenv.py

Lines changed: 81 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
import os
21
from pathlib import Path
32
from typing import Mapping
43

5-
LOGLEVEL_MAP = {
4+
LOGLEVEL_MAP: dict[str, tuple[int, str]] = {
65
'none': (0, 'LOG_NONE'),
76
'log_none': (0, 'LOG_NONE'),
87
'error': (1, 'LOG_ERROR'),
@@ -20,17 +19,78 @@
2019
}
2120

2221

22+
def read_text_with_fallback(
23+
path: str | Path,
24+
encodings: list[str] | tuple[str, ...] | None = None,
25+
) -> str:
26+
"""
27+
Read a text file using multiple attempted encodings in order.
28+
29+
Handles BOM automatically via utf-8-sig and utf-16 encodings.
30+
Raises a clean, descriptive error if all encodings fail.
31+
"""
32+
33+
if encodings is None:
34+
# You can reorder these depending on what you expect most commonly.
35+
encodings = [
36+
'utf-8-sig', # handles UTF-8 BOM automatically
37+
'utf-16', # auto-detects LE/BE with BOM
38+
'utf-16-le',
39+
'utf-16-be',
40+
'latin-1', # fallback that never fails (for decoding)
41+
]
42+
43+
path = Path(path)
44+
raw = path.read_bytes()
45+
46+
last_error: UnicodeError | None = None
47+
48+
for encoding in encodings:
49+
try:
50+
text = raw.decode(encoding)
51+
return text
52+
except UnicodeError as e:
53+
last_error = e
54+
continue
55+
56+
# If we reach here, all decoding attempts failed (only possible if latin-1 is not in encodings).
57+
raise UnicodeDecodeError(
58+
'multi-encoding-reader',
59+
raw,
60+
0,
61+
len(raw),
62+
f"failed to decode file '{path}' using encodings: {', '.join(encodings)}",
63+
) from last_error
64+
65+
2366
class DotEnv:
2467
def __read_dotenv(self, path: str | Path):
25-
with open(path, 'r') as f:
26-
for line in f:
27-
line = line.strip()
28-
if line == '' or line.startswith('#'):
29-
continue
68+
text_data = read_text_with_fallback(path)
69+
70+
for line in text_data.splitlines():
71+
line = line.strip()
72+
73+
# Skip empty lines and comments
74+
if not line or line.startswith('#'):
75+
continue
3076

31-
key, value = line.strip().split('=', 1)
77+
# Ignore lines that don't contain '=' instead of raising
78+
if '=' not in line:
79+
continue
3280

33-
self.dotenv_vars[key] = value
81+
key, value = line.split('=', 1)
82+
key = key.strip()
83+
value = value.strip()
84+
85+
# Skip lines with empty keys
86+
if not key:
87+
continue
88+
# Strip optional surrounding quotes (must match)
89+
if len(value) >= 2:
90+
if (value[0] == '"' and value[-1] == '"') or (value[0] == "'" and value[-1] == "'"):
91+
value = value[1:-1]
92+
93+
self.dotenv_vars[key] = value
3494

3595
def __init__(self, path: str | Path, environment: str):
3696
self.dotenv_vars: dict[str, str] = {}
@@ -45,42 +105,37 @@ def __init__(self, path: str | Path, environment: str):
45105
env_specific_name = '.env.' + environment
46106

47107
# Read the .env files.
48-
for path in paths:
49-
env_file = path / '.env'
108+
for base in paths:
109+
env_file = base / '.env'
50110
if env_file.exists():
51111
self.__read_dotenv(env_file)
52112

53-
env_file = path / env_specific_name
113+
env_file = base / env_specific_name
54114
if env_file.exists():
55115
self.__read_dotenv(env_file)
56116

57-
env_file = path / '.env.local'
117+
env_file = base / '.env.local'
58118
if env_file.exists():
59119
self.__read_dotenv(env_file)
60120

61-
def get_string(self, key: str):
121+
def get_string(self, key: str) -> str | None:
62122
return self.dotenv_vars.get(key)
63123

64124
def get_all_prefixed(self, prefix: str) -> Mapping[str, str]:
65-
result: dict[str, str] = {}
66-
for key, value in self.dotenv_vars.items():
67-
if key.startswith(prefix):
68-
result[key] = value
69-
return result
125+
return {k: v for k, v in self.dotenv_vars.items() if k.startswith(prefix)}
70126

71127
def get_loglevel(self, key: str) -> int | None:
72128
value = self.get_string(key)
73-
if value == None:
129+
if value is None:
74130
return None
75131

76-
value = value.lower()
77-
78-
tup = LOGLEVEL_MAP.get(value)
79-
if tup == None:
80-
raise ValueError('Environment variable ' + key + ' (' + value + ') is not a valid log level.')
132+
normalized = value.strip().lower()
133+
tup = LOGLEVEL_MAP.get(normalized)
134+
if tup is None:
135+
raise ValueError(f'Environment variable {key} ({value}) is not a valid log level.')
81136

82137
return tup[0]
83138

84139

85-
def read(workdir: str, environment_name: str) -> DotEnv:
140+
def read(workdir: str | Path, environment_name: str) -> DotEnv:
86141
return DotEnv(workdir, environment=environment_name)

0 commit comments

Comments
 (0)