Skip to content

Commit 2762519

Browse files
committed
Update dotenv.py
1 parent 25192bd commit 2762519

File tree

1 file changed

+82
-37
lines changed

1 file changed

+82
-37
lines changed

scripts/utils/dotenv.py

Lines changed: 82 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
import os
21
from pathlib import Path
32
from typing import Mapping
43

5-
LOGLEVEL_MAP = {
4+
LOGLEVEL_MAP: dict[str, tuple[int, str]] = {
65
'none': (0, 'LOG_NONE'),
76
'log_none': (0, 'LOG_NONE'),
87
'error': (1, 'LOG_ERROR'),
@@ -20,33 +19,84 @@
2019
}
2120

2221

23-
class DotEnv:
24-
def __read_dotenv(self, path: str | Path):
25-
text_data = ''
22+
def read_text_with_fallback(
23+
path: str | Path,
24+
encodings: list[str] | tuple[str, ...] | None = None,
25+
) -> str:
26+
"""
27+
Read a text file using multiple attempted encodings in order.
28+
29+
Supports BOM-stripping for UTF-8, UTF-16-LE, UTF-16-BE.
30+
Raises a clean, descriptive error if all encodings fail.
31+
"""
32+
33+
if encodings is None:
34+
# You can reorder these depending on what you expect most commonly.
35+
encodings = [
36+
'utf-8-sig', # handles UTF-8 BOM automatically
37+
'utf-16', # auto-detects LE/BE with BOM
38+
'utf-16-le',
39+
'utf-16-be',
40+
'latin-1', # fallback that never fails (for decoding)
41+
]
42+
43+
path = Path(path)
44+
raw = path.read_bytes()
45+
46+
last_error: UnicodeError | None = None
47+
48+
for encoding in encodings:
49+
try:
50+
# Special handling for UTF-16 because utf-16 may incorrectly detect encoding without BOM.
51+
if encoding in ('utf-16', 'utf-16-le', 'utf-16-be'):
52+
try:
53+
text = raw.decode(encoding)
54+
except UnicodeError as e:
55+
last_error = e
56+
continue
57+
else:
58+
text = raw.decode(encoding)
59+
60+
return text
61+
62+
except UnicodeError as e:
63+
last_error = e
64+
continue
65+
66+
# If we reach here, all decoding attempts failed (only possible if latin-1 is not in encodings).
67+
raise UnicodeDecodeError(
68+
'multi-encoding-reader',
69+
raw,
70+
0,
71+
len(raw),
72+
f"failed to decode file '{path}' using encodings: {', '.join(encodings)}",
73+
) from last_error
2674

27-
with open(path, 'rb') as f: # Open the file in binary mode first to detect BOM
28-
raw_data = f.read()
2975

30-
# Check for BOM and strip it if present
31-
if raw_data.startswith(b'\xef\xbb\xbf'): # UTF-8 BOM
32-
text_data = raw_data[3:].decode('utf-8')
33-
elif raw_data.startswith(b'\xff\xfe'): # UTF-16 LE BOM
34-
text_data = raw_data[2:].decode('utf-16le')
35-
elif raw_data.startswith(b'\xfe\xff'): # UTF-16 BE BOM
36-
text_data = raw_data[2:].decode('utf-16be')
76+
class DotEnv:
77+
def __read_dotenv(self, path: str | Path):
78+
text_data = read_text_with_fallback(path)
3779

38-
# Now process the text data
3980
for line in text_data.splitlines():
4081
line = line.strip()
41-
if line == '' or line.startswith('#'):
82+
83+
# Skip empty lines and comments
84+
if not line or line.startswith('#'):
4285
continue
4386

44-
split = line.strip().split('=', 1)
45-
if len(split) != 2:
46-
print('Failed to parse: ' + line)
87+
# Ignore lines that don't contain '=' instead of raising
88+
if '=' not in line:
4789
continue
4890

49-
self.dotenv_vars[line[0]] = line[1]
91+
key, value = line.split('=', 1)
92+
key = key.strip()
93+
value = value.strip()
94+
95+
# Strip optional surrounding quotes
96+
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
97+
value = value[1:-1]
98+
99+
self.dotenv_vars[key] = value
50100

51101
def __init__(self, path: str | Path, environment: str):
52102
self.dotenv_vars: dict[str, str] = {}
@@ -61,42 +111,37 @@ def __init__(self, path: str | Path, environment: str):
61111
env_specific_name = '.env.' + environment
62112

63113
# Read the .env files.
64-
for path in paths:
65-
env_file = path / '.env'
114+
for base in paths:
115+
env_file = base / '.env'
66116
if env_file.exists():
67117
self.__read_dotenv(env_file)
68118

69-
env_file = path / env_specific_name
119+
env_file = base / env_specific_name
70120
if env_file.exists():
71121
self.__read_dotenv(env_file)
72122

73-
env_file = path / '.env.local'
123+
env_file = base / '.env.local'
74124
if env_file.exists():
75125
self.__read_dotenv(env_file)
76126

77-
def get_string(self, key: str):
127+
def get_string(self, key: str) -> str | None:
78128
return self.dotenv_vars.get(key)
79129

80130
def get_all_prefixed(self, prefix: str) -> Mapping[str, str]:
81-
result: dict[str, str] = {}
82-
for key, value in self.dotenv_vars.items():
83-
if key.startswith(prefix):
84-
result[key] = value
85-
return result
131+
return {k: v for k, v in self.dotenv_vars.items() if k.startswith(prefix)}
86132

87133
def get_loglevel(self, key: str) -> int | None:
88134
value = self.get_string(key)
89-
if value == None:
135+
if value is None:
90136
return None
91137

92-
value = value.lower()
93-
94-
tup = LOGLEVEL_MAP.get(value)
95-
if tup == None:
96-
raise ValueError('Environment variable ' + key + ' (' + value + ') is not a valid log level.')
138+
normalized = value.strip().lower()
139+
tup = LOGLEVEL_MAP.get(normalized)
140+
if tup is None:
141+
raise ValueError(f'Environment variable {key} ({value}) is not a valid log level.')
97142

98143
return tup[0]
99144

100145

101-
def read(workdir: str, environment_name: str) -> DotEnv:
146+
def read(workdir: str | Path, environment_name: str) -> DotEnv:
102147
return DotEnv(workdir, environment=environment_name)

0 commit comments

Comments
 (0)