@@ -19,6 +19,15 @@ def u(s):
1919 imap = imap
2020 izip = izip
2121 import unicodecsv as csv
22+
23+ def implements_to_string (cls ):
24+ '''Class decorator that renames __str__ to __unicode__ and
25+ modifies __str__ that returns utf-8.
26+ '''
27+ cls .__unicode__ = cls .__str__
28+ cls .__str__ = lambda x : x .__unicode__ ().encode ('utf-8' )
29+ return cls
30+
2231else : # PY3
2332 def b (s ):
2433 return s .encode ("latin-1" )
@@ -35,6 +44,8 @@ def u(s):
3544 izip = zip
3645 import csv
3746
47+ implements_to_string = lambda x : x
48+
3849
3950def add_metaclass (metaclass ):
4051 """Class decorator for creating a class with a metaclass.
@@ -48,122 +59,3 @@ def wrapper(cls):
4859 orig_vars .pop (slots_var )
4960 return metaclass (cls .__name__ , cls .__bases__ , orig_vars )
5061 return wrapper
51-
52- # ======= Compatibility layer for __str__ and __repr__ from NLTK ==========
53-
54- import unicodedata
55- import functools
56-
57- def remove_accents (text ):
58-
59- if isinstance (text , bytes ):
60- text = text .decode ('ascii' )
61-
62- category = unicodedata .category # this gives a small (~10%) speedup
63- return '' .join (
64- c for c in unicodedata .normalize ('NFKD' , text ) if category (c ) != 'Mn'
65- )
66-
67- # Select the best transliteration method:
68- try :
69- # Older versions of Unidecode are licensed under Artistic License;
70- # assume an older version is installed.
71- from unidecode import unidecode as transliterate
72- except ImportError :
73- try :
74- # text-unidecode implementation is worse than Unidecode
75- # implementation so Unidecode is preferred.
76- from text_unidecode import unidecode as transliterate
77- except ImportError :
78- # This transliteration method should be enough
79- # for many Western languages.
80- transliterate = remove_accents
81-
82-
83- def python_2_unicode_compatible (klass ):
84- """
85- This decorator defines __unicode__ method and fixes
86- __repr__ and __str__ methods under Python 2.
87-
88- To support Python 2 and 3 with a single code base,
89- define __str__ and __repr__ methods returning unicode
90- text and apply this decorator to the class.
91-
92- Original __repr__ and __str__ would be available
93- as unicode_repr and __unicode__ (under both Python 2
94- and Python 3).
95- """
96-
97- if not issubclass (klass , object ):
98- raise ValueError ("This decorator doesn't work for old-style classes" )
99-
100- # both __unicode__ and unicode_repr are public because they
101- # may be useful in console under Python 2.x
102-
103- # if __str__ or __repr__ are not overriden in a subclass,
104- # they may be already fixed by this decorator in a parent class
105- # and we shouldn't them again
106-
107- if not _was_fixed (klass .__str__ ):
108- klass .__unicode__ = klass .__str__
109- if PY2 :
110- klass .__str__ = _7bit (_transliterated (klass .__unicode__ ))
111-
112-
113- if not _was_fixed (klass .__repr__ ):
114- klass .unicode_repr = klass .__repr__
115- if PY2 :
116- klass .__repr__ = _7bit (klass .unicode_repr )
117-
118- return klass
119-
120-
121- def unicode_repr (obj ):
122- """
123- For classes that was fixed with @python_2_unicode_compatible
124- ``unicode_repr`` returns ``obj.unicode_repr()``; for unicode strings
125- the result is returned without "u" letter (to make output the
126- same under Python 2.x and Python 3.x); for other variables
127- it is the same as ``repr``.
128- """
129- if not PY2 :
130- return repr (obj )
131-
132- # Python 2.x
133- if hasattr (obj , 'unicode_repr' ):
134- return obj .unicode_repr ()
135-
136- if isinstance (obj , unicode ):
137- return repr (obj )[1 :] # strip "u" letter from output
138-
139- return repr (obj )
140-
141-
142- def _transliterated (method ):
143- def wrapper (self ):
144- return transliterate (method (self ))
145-
146- functools .update_wrapper (wrapper , method , ["__name__" , "__doc__" ])
147- if hasattr (method , "_nltk_compat_7bit" ):
148- wrapper ._nltk_compat_7bit = method ._nltk_compat_7bit
149-
150- wrapper ._nltk_compat_transliterated = True
151- return wrapper
152-
153-
154- def _7bit (method ):
155- def wrapper (self ):
156- return method (self ).encode ('ascii' , 'backslashreplace' )
157-
158- functools .update_wrapper (wrapper , method , ["__name__" , "__doc__" ])
159-
160- if hasattr (method , "_nltk_compat_transliterated" ):
161- wrapper ._nltk_compat_transliterated = method ._nltk_compat_transliterated
162-
163- wrapper ._nltk_compat_7bit = True
164- return wrapper
165-
166-
167- def _was_fixed (method ):
168- return (getattr (method , "_nltk_compat_7bit" , False ) or
169- getattr (method , "_nltk_compat_transliterated" , False ))
0 commit comments