Skip to content

Commit e2195b7

Browse files
committed
Add py2/py3 compat methods for ensuring an encoded string
1 parent 37da3a4 commit e2195b7

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

AdvancedHTMLParser/compat.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010

1111
import sys
1212

13-
__all__ = ('STRING_TYPES', 'RAW_STRING_TYPE', 'ALL_STRING_TYPES')
13+
__all__ = ('STRING_TYPES', 'RAW_STRING_TYPE', 'ALL_STRING_TYPES', 'ensureStringEncoded')
1414

1515
if sys.version_info.major < 3:
16-
16+
1717
# STRING_TYPES - Types that represent strings ("printable")
1818
STRING_TYPES = (str, unicode)
1919

@@ -23,8 +23,11 @@
2323
# ALL_STRING_TYPES - All string-like types, encoded or otherwise
2424
ALL_STRING_TYPES = (str, unicode)
2525

26+
# DECODED_STR_TYPE - String type that has been decoded
27+
DECODED_STR_TYPE = unicode
28+
2629
else:
27-
30+
2831
# STRING_TYPES - Types that represent strings ("printable")
2932
STRING_TYPES = (str, )
3033

@@ -34,4 +37,29 @@
3437
# ALL_STRING_TYPES - All string-like types, encoded or otherwise
3538
ALL_STRING_TYPES = (str, bytes)
3639

40+
# DECODED_STR_TYPE - String type that has been decoded
41+
DECODED_STR_TYPE = str
42+
43+
44+
def ensureStringEncoded(theString, encoding='utf-8'):
45+
'''
46+
ensureStringEncoded - Ensure we have the encoded type for a given string
47+
48+
49+
@param theString <str/unicode/bytes> - A string-like object
50+
51+
@param encoding <str> Default 'utf-8' - The encoding to use
52+
53+
NOTE: If this string is already encoded, we do NOT ensure it is encoded in this type,
54+
this type is only used when we have a decoded string, in order to encode it.
55+
56+
57+
@return (python3)<bytes> / (python2)<str> - A string encoded in utf-8
58+
'''
59+
60+
if issubclass( theString.__class__, DECODED_STR_TYPE ):
61+
return theString.encode('utf-8')
62+
63+
return theString
64+
3765
# vim: set ts=4 sw=4 st=4 expandtab :

0 commit comments

Comments
 (0)