1- import os
21from pathlib import Path
32from typing import Mapping
43
5- LOGLEVEL_MAP = {
4+ LOGLEVEL_MAP : dict [ str , tuple [ int , str ]] = {
65 'none' : (0 , 'LOG_NONE' ),
76 'log_none' : (0 , 'LOG_NONE' ),
87 'error' : (1 , 'LOG_ERROR' ),
2019}
2120
2221
23- class DotEnv :
24- def __read_dotenv (self , path : str | Path ):
25- text_data = ''
22+ def read_text_with_fallback (
23+ path : str | Path ,
24+ encodings : list [str ] | tuple [str , ...] | None = None ,
25+ ) -> str :
26+ """
27+ Read a text file using multiple attempted encodings in order.
28+
29+ Supports BOM-stripping for UTF-8, UTF-16-LE, UTF-16-BE.
30+ Raises a clean, descriptive error if all encodings fail.
31+ """
32+
33+ if encodings is None :
34+ # You can reorder these depending on what you expect most commonly.
35+ encodings = [
36+ 'utf-8-sig' , # handles UTF-8 BOM automatically
37+ 'utf-16' , # auto-detects LE/BE with BOM
38+ 'utf-16-le' ,
39+ 'utf-16-be' ,
40+ 'latin-1' , # fallback that never fails (for decoding)
41+ ]
42+
43+ path = Path (path )
44+ raw = path .read_bytes ()
45+
46+ last_error : UnicodeError | None = None
47+
48+ for encoding in encodings :
49+ try :
50+ # Special handling for UTF-16 because utf-16 may incorrectly detect encoding without BOM.
51+ if encoding in ('utf-16' , 'utf-16-le' , 'utf-16-be' ):
52+ try :
53+ text = raw .decode (encoding )
54+ except UnicodeError as e :
55+ last_error = e
56+ continue
57+ else :
58+ text = raw .decode (encoding )
59+
60+ return text
61+
62+ except UnicodeError as e :
63+ last_error = e
64+ continue
65+
66+ # If we reach here, all decoding attempts failed (only possible if latin-1 is not in encodings).
67+ raise UnicodeDecodeError (
68+ 'multi-encoding-reader' ,
69+ raw ,
70+ 0 ,
71+ len (raw ),
72+ f"failed to decode file '{ path } ' using encodings: { ', ' .join (encodings )} " ,
73+ ) from last_error
2674
27- with open (path , 'rb' ) as f : # Open the file in binary mode first to detect BOM
28- raw_data = f .read ()
2975
30- # Check for BOM and strip it if present
31- if raw_data .startswith (b'\xef \xbb \xbf ' ): # UTF-8 BOM
32- text_data = raw_data [3 :].decode ('utf-8' )
33- elif raw_data .startswith (b'\xff \xfe ' ): # UTF-16 LE BOM
34- text_data = raw_data [2 :].decode ('utf-16le' )
35- elif raw_data .startswith (b'\xfe \xff ' ): # UTF-16 BE BOM
36- text_data = raw_data [2 :].decode ('utf-16be' )
76+ class DotEnv :
77+ def __read_dotenv (self , path : str | Path ):
78+ text_data = read_text_with_fallback (path )
3779
38- # Now process the text data
3980 for line in text_data .splitlines ():
4081 line = line .strip ()
41- if line == '' or line .startswith ('#' ):
82+
83+ # Skip empty lines and comments
84+ if not line or line .startswith ('#' ):
4285 continue
4386
44- split = line .strip ().split ('=' , 1 )
45- if len (split ) != 2 :
46- print ('Failed to parse: ' + line )
87+ # Ignore lines that don't contain '=' instead of raising
88+ if '=' not in line :
4789 continue
4890
49- self .dotenv_vars [line [0 ]] = line [1 ]
91+ key , value = line .split ('=' , 1 )
92+ key = key .strip ()
93+ value = value .strip ()
94+
95+ # Strip optional surrounding quotes
96+ if (value .startswith ('"' ) and value .endswith ('"' )) or (value .startswith ("'" ) and value .endswith ("'" )):
97+ value = value [1 :- 1 ]
98+
99+ self .dotenv_vars [key ] = value
50100
51101 def __init__ (self , path : str | Path , environment : str ):
52102 self .dotenv_vars : dict [str , str ] = {}
@@ -61,42 +111,37 @@ def __init__(self, path: str | Path, environment: str):
61111 env_specific_name = '.env.' + environment
62112
63113 # Read the .env files.
64- for path in paths :
65- env_file = path / '.env'
114+ for base in paths :
115+ env_file = base / '.env'
66116 if env_file .exists ():
67117 self .__read_dotenv (env_file )
68118
69- env_file = path / env_specific_name
119+ env_file = base / env_specific_name
70120 if env_file .exists ():
71121 self .__read_dotenv (env_file )
72122
73- env_file = path / '.env.local'
123+ env_file = base / '.env.local'
74124 if env_file .exists ():
75125 self .__read_dotenv (env_file )
76126
77- def get_string (self , key : str ):
127+ def get_string (self , key : str ) -> str | None :
78128 return self .dotenv_vars .get (key )
79129
80130 def get_all_prefixed (self , prefix : str ) -> Mapping [str , str ]:
81- result : dict [str , str ] = {}
82- for key , value in self .dotenv_vars .items ():
83- if key .startswith (prefix ):
84- result [key ] = value
85- return result
131+ return {k : v for k , v in self .dotenv_vars .items () if k .startswith (prefix )}
86132
87133 def get_loglevel (self , key : str ) -> int | None :
88134 value = self .get_string (key )
89- if value == None :
135+ if value is None :
90136 return None
91137
92- value = value .lower ()
93-
94- tup = LOGLEVEL_MAP .get (value )
95- if tup == None :
96- raise ValueError ('Environment variable ' + key + ' (' + value + ') is not a valid log level.' )
138+ normalized = value .strip ().lower ()
139+ tup = LOGLEVEL_MAP .get (normalized )
140+ if tup is None :
141+ raise ValueError (f'Environment variable { key } ({ value } ) is not a valid log level.' )
97142
98143 return tup [0 ]
99144
100145
101- def read (workdir : str , environment_name : str ) -> DotEnv :
146+ def read (workdir : str | Path , environment_name : str ) -> DotEnv :
102147 return DotEnv (workdir , environment = environment_name )
0 commit comments