Skip to content

Commit 6346270

Browse files
authored
Merge pull request #25 from wuyue92tree/dev
Dev
2 parents e7addf1 + a0aab1d commit 6346270

File tree

9 files changed

+299
-28
lines changed

9 files changed

+299
-28
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Crwy
22

33
[![PyPI Version](https://img.shields.io/pypi/v/Crwy.svg)](https://pypi.python.org/pypi/Crwy)
4-
[![Build Status](https://travis-ci.org/wuyue92tree/crwy.svg?branch=1.1.2)](https://travis-ci.org/wuyue92tree/crwy)
4+
[![Build Status](https://travis-ci.org/wuyue92tree/crwy.svg?branch=1.1.3)](https://travis-ci.org/wuyue92tree/crwy)
55

66
# 简介
77

@@ -34,7 +34,7 @@ pip install crwy
3434
```
3535

3636
or
37-
前往下载: https://pypi.python.org/pypi/Crwy/1.1.2/
37+
前往下载: https://pypi.python.org/pypi/Crwy/1.1.3/
3838

3939
# 使用手册
4040

crwy/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.1.2
1+
1.1.3

crwy/utils/extend/xunma.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"""
1212

1313

14-
from __future__ import print_function
14+
from __future__ import print_function, unicode_literals
1515

1616
from crwy.spider import Spider
1717
from crwy.exceptions import CrwyExtendException
@@ -38,7 +38,7 @@ def login(self):
3838
password=self.password)
3939
res = self.html_downloader.download(url)
4040

41-
return res.content.strip().split("&")[0]
41+
return res.text.strip().split("&")[0]
4242
except Exception as e:
4343
raise CrwyExtendException(e)
4444

@@ -58,11 +58,7 @@ def get_phone(self, token, phone_type='', phone=''):
5858
phone_type=phone_type, phone=phone)
5959

6060
res = self.html_downloader.download(url)
61-
# if 'success' not in res.content:
62-
# raise MfExtendException("[XunMa] get phone failed.")
63-
#
64-
# # print(res.content)
65-
return res.content.strip().split(';')[0]
61+
return res.text.strip().split(';')[0]
6662

6763
except Exception as e:
6864
raise CrwyExtendException(e)
@@ -82,7 +78,7 @@ def get_message(self, token, phone):
8278
item_id=self.item_id, phone=phone)
8379
res = self.html_downloader.download(url)
8480

85-
return res.content.strip().split('&')[-1]
81+
return res.text.strip().split('&')[-1]
8682

8783
except Exception as e:
8884
raise CrwyExtendException(e)

crwy/utils/extend/yima.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
这一行开始写关于本文件的说明与解释
1111
"""
1212

13-
from __future__ import print_function
13+
from __future__ import print_function, unicode_literals
1414

1515
from crwy.spider import Spider
1616
from crwy.exceptions import CrwyException
@@ -39,10 +39,10 @@ def login(self):
3939
password=self.password)
4040
res = self.html_downloader.download(url)
4141

42-
if 'success' not in res.content:
42+
if 'success' not in res.text:
4343
raise CrwyException("[YiMa] Login failed.")
4444

45-
return res.content.strip().split("|")[-1]
45+
return res.text.strip().split("|")[-1]
4646
except Exception as e:
4747
raise CrwyException(e)
4848

@@ -65,11 +65,11 @@ def get_phone(self, token, phone_type='',
6565
phone_type=phone_type, phone=phone)
6666

6767
res = self.html_downloader.download(url)
68-
if 'success' not in res.content:
68+
if 'success' not in res.text:
6969
raise CrwyException("[YiMa] get phone failed.")
7070

71-
# print(res.content)
72-
return res.content.strip().split('|')[-1]
71+
# print(res.text)
72+
return res.text.strip().split('|')[-1]
7373

7474
except Exception as e:
7575
raise CrwyException(e)
@@ -88,11 +88,11 @@ def get_message(self, token, phone):
8888
phone=phone)
8989
res = self.html_downloader.download(url)
9090

91-
if 'success' not in res.content:
91+
if 'success' not in res.text:
9292
raise CrwyException("[YiMa] get message failed.")
9393

9494
else:
95-
return res.content.strip().split('|')[-1]
95+
return res.text.strip().split('|')[-1]
9696

9797
except Exception as e:
9898
raise CrwyException(e)
@@ -105,7 +105,7 @@ def release_phone(self, token, phone):
105105
phone=phone)
106106
res = self.html_downloader.download(url)
107107

108-
if 'success' not in res.content:
108+
if 'success' not in res.text:
109109
raise CrwyException("[YiMa] release phone failed.")
110110

111111
except Exception as e:
@@ -119,7 +119,7 @@ def add_black(self, token, phone):
119119
phone=phone)
120120
res = self.html_downloader.download(url)
121121

122-
if 'success' not in res.content:
122+
if 'success' not in res.text:
123123
raise CrwyException("[YiMa] black phone failed.")
124124

125125
except Exception as e:

crwy/utils/html/font_analysis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def analysis(self, is_clean=True, debug=False):
8686

8787
self.save_font()
8888
self.get_font_xml()
89-
with open(self.xml_path, 'r') as xml:
89+
with open(self.xml_path, 'rb') as xml:
9090
soups = self.html_parser.parser(xml.read())
9191
ttglyph_lst = soups.find('glyf').find_all('ttglyph')[1:]
9292
analysis_res = []

crwy/utils/no_sql/redis_m.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424
class RedisDb(object):
2525
def __init__(self, **kwargs):
2626
if 'url' in kwargs.keys():
27-
url = kwargs.get('url')
28-
db = kwargs.get('db', 0)
29-
self.pool = redis.ConnectionPool.from_url(url, db=db, **kwargs)
27+
url = kwargs.pop('url')
28+
self.pool = redis.ConnectionPool.from_url(url, **kwargs)
3029
else:
3130
self.pool = redis.ConnectionPool(**kwargs)
3231
self.db = redis.StrictRedis(connection_pool=self.pool)

crwy/utils/scrapy_plugs/dupefilters.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ def request_seen(self, request):
6969
if not request.meta.get('dupefilter_key', None):
7070
return False
7171

72+
if len(request.meta.get('redirect_urls', [])) > 0:
73+
# skip url from redirect
74+
return False
75+
7276
dupefilter_key = request.meta.get('dupefilter_key')
7377
dupefilter_key = hashlib.sha1(dupefilter_key).hexdigest() if \
7478
self.do_hash else dupefilter_key

crwy/utils/scrapy_plugs/middlewares.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ def process_request(self, request, spider):
6565
'no user in cookie_pool:{}'.format(self.site))
6666
cookie = self.h.hget(user)
6767
if cookie:
68-
request.cookies = json.loads(cookie)
69-
spider.logger.debug('get_cookie_success: {}'.format(user))
68+
# 字典存入redis,取出时未string,通过eval进行还原
69+
request.cookies = eval(cookie)
70+
spider.logger.debug('get_cookie_success: {}'.format(
71+
user.decode('utf-8')))
7072
else:
71-
spider.logger.warning('get_cookie_failed: {}'.format(user))
73+
spider.logger.warning('get_cookie_failed: {}'.format(
74+
user.decode('utf-8')))

0 commit comments

Comments
 (0)