1111of the HostsEntry class.
1212"""
1313
14+ from __future__ import unicode_literals
1415import sys
1516
1617try :
2122 dedupe_list )
2223from python_hosts .exception import (InvalidIPv6Address , InvalidIPv4Address ,
2324 UnableToWriteHosts )
25+ from python_hosts .unicode_utils import (ensure_text , ensure_binary , safe_open ,
26+ normalize_hostname , normalize_comment ,
27+ text_type , string_types )
2428
2529
2630class HostsEntry (object ):
@@ -61,10 +65,16 @@ def __init__(self,
6165 if not is_ipv6 (address ):
6266 raise InvalidIPv6Address ()
6367
64- self .entry_type = entry_type
65- self .address = address
66- self .comment = comment
67- self .names = names
68+ # Normalize all string inputs to Unicode
69+ self .entry_type = ensure_text (entry_type ) if entry_type else entry_type
70+ self .address = ensure_text (address ) if address else address
71+ self .comment = normalize_comment (comment ) if comment else comment
72+
73+ # Normalize hostnames to Unicode and handle IDN
74+ if names :
75+ self .names = [normalize_hostname (name ) for name in names ]
76+ else :
77+ self .names = names
6878
6979 def is_real_entry (self ):
7080 return self .entry_type in ('ipv4' , 'ipv6' )
@@ -98,16 +108,18 @@ def get_entry_type(hosts_entry=None):
98108 :param hosts_entry: A line from the hosts file
99109 :return: 'comment' | 'blank' | 'ipv4' | 'ipv6'
100110 """
101- if hosts_entry and isinstance (hosts_entry , str ):
111+ if hosts_entry and isinstance (hosts_entry , string_types ):
112+ # Ensure the entry is Unicode text
113+ hosts_entry = ensure_text (hosts_entry )
102114 entry = hosts_entry .strip ()
103115 if not entry or not entry [0 ] or entry [0 ] == "\n " :
104116 return 'blank'
105117 if entry [0 ] == "#" :
106118 return 'comment'
107119 entry_chunks = entry .split ()
108- if is_ipv6 (entry_chunks [0 ]):
120+ if entry_chunks and is_ipv6 (entry_chunks [0 ]):
109121 return 'ipv6'
110- if is_ipv4 (entry_chunks [0 ]):
122+ if entry_chunks and is_ipv4 (entry_chunks [0 ]):
111123 return 'ipv4'
112124
113125 @staticmethod
@@ -117,14 +129,17 @@ def str_to_hostentry(entry):
117129 :param entry: A line from the hosts file
118130 :return: An instance of HostsEntry
119131 """
132+ # Ensure the entry is Unicode text
133+ entry = ensure_text (entry )
134+
120135 split_line = entry .split ('#' , 1 )
121136 line = split_line [0 ].strip ().split ()
122137 inline_comment = split_line [1 ].strip () if len (split_line ) == 2 else None
123138
124- if is_ipv4 (line [0 ]) and valid_hostnames (line [1 :]):
139+ if line and is_ipv4 (line [0 ]) and valid_hostnames (line [1 :]):
125140 return HostsEntry ('ipv4' , address = line [0 ], names = line [1 :],
126141 comment = inline_comment )
127- if is_ipv6 (line [0 ]) and valid_hostnames (line [1 :]):
142+ if line and is_ipv6 (line [0 ]) and valid_hostnames (line [1 :]):
128143 return HostsEntry ('ipv6' , address = line [0 ], names = line [1 :],
129144 comment = inline_comment )
130145 return False
@@ -206,7 +221,7 @@ def write(self, path=None, mode='w'):
206221 }
207222 output_file_path = path if path else self .path
208223 try :
209- with open (output_file_path , mode ) as hosts_file :
224+ with safe_open (output_file_path , mode , encoding = 'utf-8' ) as hosts_file :
210225 for entry in self .entries :
211226 if entry .entry_type == 'comment' :
212227 hosts_file .write (entry .comment + "\n " )
@@ -303,13 +318,19 @@ def import_url(self, url=None, force=None):
303318 :param url: The URL of where to download a hosts file
304319 :return: Counts reflecting the attempted additions
305320 """
306- file_contents = self .get_hosts_by_url (url = url ).decode ('utf-8' )
321+ file_contents = self .get_hosts_by_url (url = url )
322+ # Handle both Python 2 and 3 URL content
323+ if hasattr (file_contents , 'decode' ):
324+ file_contents = file_contents .decode ('utf-8' )
325+ file_contents = ensure_text (file_contents )
307326 file_contents = file_contents .rstrip ().replace ('^M' , '\n ' )
308327 file_contents = file_contents .rstrip ().replace ('\r \n ' , '\n ' )
309328 lines = file_contents .split ('\n ' )
310329 skipped = 0
311330 import_entries = []
312331 for line in lines :
332+ # Ensure each line is Unicode text
333+ line = ensure_text (line )
313334 stripped_entry = line .strip ()
314335 if (not stripped_entry ) or (stripped_entry .startswith ('#' )):
315336 skipped += 1
@@ -337,8 +358,10 @@ def import_file(self, import_file_path=None):
337358 invalid_count = 0
338359 if is_readable (import_file_path ):
339360 import_entries = []
340- with open (import_file_path , 'r' ) as infile :
361+ with safe_open (import_file_path , 'r' , encoding = 'utf-8 ' ) as infile :
341362 for line in infile :
363+ # Ensure line is Unicode text
364+ line = ensure_text (line )
342365 stripped_entry = line .strip ()
343366 if (not stripped_entry ) or (stripped_entry .startswith ('#' )):
344367 skipped += 1
@@ -463,9 +486,11 @@ def populate_entries(self):
463486 :return: None
464487 """
465488 try :
466- with open (self .path , 'r' ) as hosts_file :
489+ with safe_open (self .path , 'r' , encoding = 'utf-8 ' ) as hosts_file :
467490 hosts_entries = [line for line in hosts_file ]
468491 for hosts_entry in hosts_entries :
492+ # Ensure line is Unicode text
493+ hosts_entry = ensure_text (hosts_entry )
469494 entry_type = HostsEntry .get_entry_type (hosts_entry )
470495 if entry_type == "comment" :
471496 hosts_entry = hosts_entry .replace ("\r " , "" )
0 commit comments