Skip to content

Commit 84099ad

Browse files
committed
Fix __str__ behavior
- Adds implements_to_string decorator that makes __str__ return unicode. This fixes print statements so the output is as expected.
1 parent 56ea4ec commit 84099ad

File tree

4 files changed

+18
-129
lines changed

4 files changed

+18
-129
lines changed

HISTORY.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Changelog
88
- Basic extensions framework in place. TextBlob has been refactored to make it easier to develop extensions.
99
- Add ``text.classifiers.PositiveNaiveBayesClassifier``.
1010
- Update NLTK.
11+
- Fix ``__str__`` behavior. ``print blob`` should now print expected output in both Python 2 and 3.
1112
- *Backwards-incompatible*: All abstract base classes have been moved to the ``text.base`` module.
1213
- *Backwards-incompatible*: ``PerceptronTagger`` will now be maintained as an extension, ``textblob-aptagger``. Instantiating a ``text.taggers.PerceptronTagger()`` will raise a ``DeprecationWarning``.
1314

text/blob.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from text.utils import lowerstrip, PUNCTUATION_REGEX
2828
from text.inflect import singularize as _singularize, pluralize as _pluralize
2929
from text.mixins import BlobComparableMixin, StringlikeMixin
30-
from text.compat import unicode, basestring, python_2_unicode_compatible
30+
from text.compat import unicode, basestring
3131
from text.base import (BaseNPExtractor, BaseTagger, BaseTokenizer,
3232
BaseSentimentAnalyzer, BaseParser)
3333
from text.np_extractors import FastNPExtractor
@@ -287,7 +287,6 @@ def _initialize_models(obj, tokenizer, pos_tagger,
287287
obj.classifier = classifier
288288

289289

290-
@python_2_unicode_compatible
291290
class BaseBlob(StringlikeMixin, BlobComparableMixin):
292291

293292
'''An abstract base class that all text.blob classes will inherit from.

text/compat.py

Lines changed: 11 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,15 @@ def u(s):
1919
imap = imap
2020
izip = izip
2121
import unicodecsv as csv
22+
23+
def implements_to_string(cls):
24+
'''Class decorator that renames __str__ to __unicode__ and
25+
modifies __str__ that returns utf-8.
26+
'''
27+
cls.__unicode__ = cls.__str__
28+
cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
29+
return cls
30+
2231
else: # PY3
2332
def b(s):
2433
return s.encode("latin-1")
@@ -35,6 +44,8 @@ def u(s):
3544
izip = zip
3645
import csv
3746

47+
implements_to_string = lambda x: x
48+
3849

3950
def add_metaclass(metaclass):
4051
"""Class decorator for creating a class with a metaclass.
@@ -48,122 +59,3 @@ def wrapper(cls):
4859
orig_vars.pop(slots_var)
4960
return metaclass(cls.__name__, cls.__bases__, orig_vars)
5061
return wrapper
51-
52-
# ======= Compatibility layer for __str__ and __repr__ from NLTK ==========
53-
54-
import unicodedata
55-
import functools
56-
57-
def remove_accents(text):
58-
59-
if isinstance(text, bytes):
60-
text = text.decode('ascii')
61-
62-
category = unicodedata.category # this gives a small (~10%) speedup
63-
return ''.join(
64-
c for c in unicodedata.normalize('NFKD', text) if category(c) != 'Mn'
65-
)
66-
67-
# Select the best transliteration method:
68-
try:
69-
# Older versions of Unidecode are licensed under Artistic License;
70-
# assume an older version is installed.
71-
from unidecode import unidecode as transliterate
72-
except ImportError:
73-
try:
74-
# text-unidecode implementation is worse than Unidecode
75-
# implementation so Unidecode is preferred.
76-
from text_unidecode import unidecode as transliterate
77-
except ImportError:
78-
# This transliteration method should be enough
79-
# for many Western languages.
80-
transliterate = remove_accents
81-
82-
83-
def python_2_unicode_compatible(klass):
84-
"""
85-
This decorator defines __unicode__ method and fixes
86-
__repr__ and __str__ methods under Python 2.
87-
88-
To support Python 2 and 3 with a single code base,
89-
define __str__ and __repr__ methods returning unicode
90-
text and apply this decorator to the class.
91-
92-
Original __repr__ and __str__ would be available
93-
as unicode_repr and __unicode__ (under both Python 2
94-
and Python 3).
95-
"""
96-
97-
if not issubclass(klass, object):
98-
raise ValueError("This decorator doesn't work for old-style classes")
99-
100-
# both __unicode__ and unicode_repr are public because they
101-
# may be useful in console under Python 2.x
102-
103-
# if __str__ or __repr__ are not overriden in a subclass,
104-
# they may be already fixed by this decorator in a parent class
105-
# and we shouldn't them again
106-
107-
if not _was_fixed(klass.__str__):
108-
klass.__unicode__ = klass.__str__
109-
if PY2:
110-
klass.__str__ = _7bit(_transliterated(klass.__unicode__))
111-
112-
113-
if not _was_fixed(klass.__repr__):
114-
klass.unicode_repr = klass.__repr__
115-
if PY2:
116-
klass.__repr__ = _7bit(klass.unicode_repr)
117-
118-
return klass
119-
120-
121-
def unicode_repr(obj):
122-
"""
123-
For classes that was fixed with @python_2_unicode_compatible
124-
``unicode_repr`` returns ``obj.unicode_repr()``; for unicode strings
125-
the result is returned without "u" letter (to make output the
126-
same under Python 2.x and Python 3.x); for other variables
127-
it is the same as ``repr``.
128-
"""
129-
if not PY2:
130-
return repr(obj)
131-
132-
# Python 2.x
133-
if hasattr(obj, 'unicode_repr'):
134-
return obj.unicode_repr()
135-
136-
if isinstance(obj, unicode):
137-
return repr(obj)[1:] # strip "u" letter from output
138-
139-
return repr(obj)
140-
141-
142-
def _transliterated(method):
143-
def wrapper(self):
144-
return transliterate(method(self))
145-
146-
functools.update_wrapper(wrapper, method, ["__name__", "__doc__"])
147-
if hasattr(method, "_nltk_compat_7bit"):
148-
wrapper._nltk_compat_7bit = method._nltk_compat_7bit
149-
150-
wrapper._nltk_compat_transliterated = True
151-
return wrapper
152-
153-
154-
def _7bit(method):
155-
def wrapper(self):
156-
return method(self).encode('ascii', 'backslashreplace')
157-
158-
functools.update_wrapper(wrapper, method, ["__name__", "__doc__"])
159-
160-
if hasattr(method, "_nltk_compat_transliterated"):
161-
wrapper._nltk_compat_transliterated = method._nltk_compat_transliterated
162-
163-
wrapper._nltk_compat_7bit = True
164-
return wrapper
165-
166-
167-
def _was_fixed(method):
168-
return (getattr(method, "_nltk_compat_7bit", False) or
169-
getattr(method, "_nltk_compat_transliterated", False))

text/mixins.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import
33
import sys
4-
from text.compat import basestring, u
4+
from text.compat import basestring, implements_to_string
55

66

77
class ComparableMixin(object):
@@ -46,6 +46,7 @@ def _compare(self, other, method):
4646
return super(BlobComparableMixin, self)._compare(other, method)
4747

4848

49+
@implements_to_string
4950
class StringlikeMixin(object):
5051

5152
'''Make blob objects behave like Python strings.
@@ -61,18 +62,14 @@ def __repr__(self):
6162
return "{cls}({text})".format(cls=class_name,
6263
text=repr(self._strkey()))
6364

64-
def __len__(self):
65-
'''Returns the length of the raw text.'''
66-
return len(self._strkey())
67-
6865
def __str__(self):
6966
'''Returns a string representation used in print statements
7067
or str(my_blob).'''
7168
return self._strkey()
7269

73-
def __unicode__(self):
74-
'''Returns the unicode representation of the blob.'''
75-
return u(self._strkey())
70+
def __len__(self):
71+
'''Returns the length of the raw text.'''
72+
return len(self._strkey())
7673

7774
def __iter__(self):
7875
'''Makes the object iterable as if it were a string,

0 commit comments

Comments
 (0)