Skip to content

Commit 84c51aa

Browse files
author
Dean Malmgren
authored
Merge pull request #31 from davidread/py3
Python 3 compatibility
2 parents a718063 + 2c3d2ac commit 84c51aa

File tree

8 files changed

+30
-34
lines changed

8 files changed

+30
-34
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
language: python
22
python:
33
- "2.7"
4+
- "3.5"
45

56
# install python dependencies including this package in the travis
67
# virtualenv

requirements/python-dev

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,7 @@ nose
1414
sphinx
1515
sphinx_rtd_theme
1616

17-
# for convenience
18-
ipdb
17+
# ipdb for convenience.
18+
# old versions needed for python 2.7 compatibility, despite what the docs say
19+
ipython==5.4
20+
ipdb==0.10.2

scrubadub/detectors/skype.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def iter_filth(self, text):
4444
for i in skype_indices:
4545
jmin = max(i-self.word_radius, 0)
4646
jmax = min(i+self.word_radius+1, len(tokens))
47-
for j in range(jmin, i) + range(i+1, jmax):
47+
for j in list(range(jmin, i)) + list(range(i+1, jmax)):
4848
token = tokens[j]
4949
if self.filth_cls.SKYPE_USERNAME.match(token):
5050

scrubadub/filth/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def iter_filths():
2626
"""Iterate over all instances of filth"""
2727
for filth_cls in iter_filth_clss():
2828
if issubclass(filth_cls, RegexFilth):
29-
m = re.finditer(r"\s+", "fake pattern string").next()
29+
m = next(re.finditer(r"\s+", "fake pattern string"))
3030
yield filth_cls(m)
3131
else:
3232
yield filth_cls()

scrubadub/scrubbers.py

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1-
import re
21
import operator
3-
4-
import textblob
5-
import nltk
2+
import sys
63

74
from . import exceptions
85
from . import detectors
9-
from .filth import Filth, MergedFilth
6+
from .filth import Filth
107

118

129
class Scrubber(object):
@@ -50,8 +47,10 @@ def clean(self, text, **kwargs):
5047
through to the ``Filth.replace_with`` method to fine-tune how the
5148
``Filth`` is cleaned.
5249
"""
53-
if not isinstance(text, unicode):
54-
raise exceptions.UnicodeRequired
50+
if sys.version_info < (3, 0):
51+
# Only in Python 2. In 3 every string is a Python 2 unicode
52+
if not isinstance(text, unicode):
53+
raise exceptions.UnicodeRequired
5554

5655
clean_chunks = []
5756
filth = Filth()
@@ -73,14 +72,15 @@ def iter_filth(self, text):
7372
# over all detectors simultaneously. just trying to get something
7473
# working right now and we can worry about efficiency later
7574
all_filths = []
76-
for detector in self._detectors.itervalues():
75+
for detector in self._detectors.values():
7776
for filth in detector.iter_filth(text):
7877
if not isinstance(filth, Filth):
7978
raise TypeError('iter_filth must always yield Filth')
8079
all_filths.append(filth)
8180

82-
# Sort per start position and substrings
83-
all_filths = sorted(all_filths, cmp=_sort_filths)
81+
# Sort by start position. If two filths start in the same place then
82+
# return the longer one first
83+
all_filths.sort(key=lambda f: (f.beg, -f.end))
8484

8585
# this is where the Scrubber does its hard work and merges any
8686
# overlapping filths.
@@ -94,22 +94,3 @@ def iter_filth(self, text):
9494
else:
9595
filth = filth.merge(next_filth)
9696
yield filth
97-
98-
99-
def _sort_filths(a_filth, b_filth):
100-
"""Sort list of filths per starting position and substrings"""
101-
102-
# if a_filth starts first return a
103-
if a_filth.beg < b_filth.beg:
104-
return -1
105-
106-
# if b_filth starts first return b
107-
if a_filth.beg > b_filth.beg:
108-
return 1
109-
110-
# if boths filths start in the same position
111-
# return the inclusive filth
112-
if a_filth.end > b_filth.end:
113-
return -1
114-
115-
return 1

scrubadub/utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
1+
try:
2+
unicode
3+
except NameError:
4+
basestring = str # Compatibility for Python 2 and 3
5+
16

27
class CanonicalStringSet(set):
38
"""Just like a set, except it makes sure that all elements are lower case
49
strings.
510
"""
611

712
def _cast_as_lower(self, x):
8-
if not isinstance(x, (str, unicode)):
13+
if not isinstance(x, basestring):
914
raise TypeError('CanonicalStringSet only works with strings')
1015
return x.lower()
1116

tests/base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
import scrubadub
44

55

6+
try:
7+
unicode
8+
except NameError:
9+
unicode = str # Python 2 and 3 compatibility
10+
611
# this is a mixin class to make it easy to centralize a lot of the core
712
# functionality of the test suite
813
class BaseTestCase(object):

tests/test_unicode.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import sys
12
import unittest
23

34
import scrubadub
45

56
from base import BaseTestCase
67

8+
@unittest.skipIf(sys.version_info >= (3,0), "Test only needed in Python 2")
79
class UnicodeTestCase(unittest.TestCase, BaseTestCase):
810

911
def test_empty(self):

0 commit comments

Comments
 (0)