Skip to content

Commit e7b3704

Browse files
committed
first commit
0 parents  commit e7b3704

File tree

2,138 files changed

+356061
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,138 files changed

+356061
-0
lines changed

btgy.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
# @Time : 6/19/19 10:38 AM
4+
# @Author : C0nners Chan
5+
# @File : btgy.py
6+
# @Software: PyCharm
7+
# 爬取补天公益src
8+
9+
# !/usr/bin/env python
10+
# -*- coding:utf-8 -*-
11+
# Auth: Conners Chan
12+
# TIME: 2018/9/8 14:33
13+
14+
import json
15+
import requests
16+
import time
17+
from bs4 import BeautifulSoup
18+
from requests.exceptions import ReadTimeout
19+
import lxml
20+
21+
22+
def spider(pcount):
23+
# '''
24+
# 爬取所有公益厂商的ID
25+
# 保存为gysrc_id.txt
26+
# :return:
27+
# '''
28+
headers = {
29+
'Host': 'www.butian.net',
30+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
31+
'Accept': 'application/json, text/javascript, */*; q=0.01',
32+
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
33+
'Accept-Encoding': 'gzip, deflate',
34+
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
35+
'X-Requested-With': 'XMLHttpRequest',
36+
'Referer': 'http://www.butian.net/Reward/pub//Message/send',
37+
'Cookie': 'PHPSESSID=enqo72aue45h1hbg6u3cguor62; __q__=1544692443138; test_cookie_enable=null; __guid=138613664.4415430679088757000.1544692095368.716; __DC_monitor_count=4; __DC_sid=138613664.2410675486827421000.1544692095528.0479; __DC_gid=138613664.693680020.1544692095531.1544692445689.10',
38+
'Connection': 'keep-alive'
39+
}
40+
for i in range(1, pcount):
41+
data = {
42+
'p': i,
43+
'token': ''
44+
}
45+
time.sleep(3)
46+
res = requests.get('http://www.butian.net/Reward/pub/Message/send', params=data, headers=headers, timeout=(4, 20))
47+
allResult = {}
48+
allResult = json.loads(res.text)
49+
currentPage = str(allResult['data']['current'])
50+
currentNum = str(len(allResult['data']['list']))
51+
print('正在获取第' + currentPage + '页厂商数据')
52+
print('本页共有' + currentNum + '条厂商')
53+
for num in range(int(currentNum)):
54+
print('厂商名字:' + allResult['data']['list'][int(num)]['company_name'] + '\t\tavatar:' + allResult \
55+
['data']['list'][int(num)]['avatar'] + '\t\t厂商ID:' + allResult['data']['list'][int(num)][
56+
'company_id'])
57+
base = 'http://www.butian.net/Loo/submit?cid='
58+
with open('gysrc_id.txt', 'a') as f:
59+
f.write(base + allResult['data']['list'][int(num)]['company_id'] + '\n')
60+
61+
62+
def url():
63+
# '''
64+
# 遍历所有的ID
65+
# 取得对应的域名
66+
# 保存为target.txt
67+
# :return:
68+
# '''
69+
headers = {
70+
71+
'Host': 'www.butian.net',
72+
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0 ',
73+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
74+
'Accept-Language': 'en-US,en;q=0.5',
75+
'Accept-Encoding': 'gzip, deflate',
76+
'Referer': 'https://www.butian.net/Reward/plan',
77+
'Upgrade-Insecure-Requests': '1',
78+
'Cache-Control': 'max-age=0'
79+
}
80+
81+
cookies = {
82+
83+
'__DC_monitor_count': '29',
84+
'__guid': '66782632.2534021926694334500.1560835589122.8506',
85+
'__DC_gid': '66782632.611115740.1560835589122.1560922605361.59',
86+
'btlc_ab7a660c7e054d9e446e06f4571ebe41': '1c6d2efdb0d9a62b23b77264b6d5512a42744a1198d5633eb7973f752fc704fa',
87+
'PHPSESSID': 'igaes901lfpks8fm71lqsr49r3',
88+
'__q__': '1560923806772',
89+
'__DC_sid': '66782632.3322532378039549000.1560921346741.3696'
90+
}
91+
with open('gysrc_id.txt', 'r') as f:
92+
for target in f.readlines():
93+
target = target.strip()
94+
try:
95+
get_url = requests.get(target, headers=headers, cookies=cookies, timeout=5)
96+
result = get_url.text
97+
info = BeautifulSoup(result, 'lxml')
98+
f_url = info.find(name='input', attrs={"name": "host"})
99+
name = info.find(name='input', attrs={"name": "company_name"})
100+
last_url = f_url.attrs['value']
101+
print('厂商:' + name.attrs['value'] + '\t网址:' + f_url.attrs['value'])
102+
with open('target.txt', 'a') as t:
103+
t.write(last_url + '\n')
104+
time.sleep(1)
105+
except AttributeError as ae:
106+
print(ae)
107+
time.sleep(181)
108+
except ReadTimeout:
109+
print('timeout')
110+
print('The target is right!')
111+
112+
113+
if __name__ == '__main__':
114+
# data = {
115+
# 's': '1',
116+
# 'p': '1',
117+
# 'token': ''
118+
# }
119+
# res = requests.post('http://www.butian.net/Reward/pub/Message/send', data=data)
120+
# allResult = {}
121+
# allResult = json.loads(res.text)
122+
# allPages = str(allResult['data']['count'])
123+
# print('共' + allPages + '页')
124+
# spider(int(allPages))
125+
url()

btgyurls.tar.gz

106 KB
Binary file not shown.

btprivate.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
# @Time : 6/19/19 10:34 AM
4+
# @Author : C0nners Chan
5+
# @File : btprivate.py
6+
# @Software: PyCharm
7+
# 爬取补天私有src
8+
9+
import requests
10+
from bs4 import BeautifulSoup
11+
import sys
12+
import threading
13+
14+
threadNumber = 10 # 设置线程数
15+
16+
def testOnline(number):
17+
print("############################")
18+
print("# " + str(number))
19+
url = "http://butian.360.cn/company/info/id/" + str(number)
20+
content = requests.get(url).text
21+
soup = BeautifulSoup(content, "html.parser")
22+
domain = soup.find("td", width="460")
23+
if domain != None:
24+
temp = domain.text
25+
if temp != "":
26+
if not (temp.startswith("http://") or temp.endswith("https://")):
27+
temp = "http://" + temp
28+
if not (temp.endswith("/")):
29+
temp += "/"
30+
_price = soup.find("td", align="center", class_="color_td3 font_width")
31+
if _price != None:
32+
price = _price.text
33+
print(u"# Max : " + price)
34+
else:
35+
priceTotal = "0"
36+
_priceTotal = soup.find("td", align="center", class_="color_td2 font_width")
37+
if _priceTotal != None:
38+
priceTotal = _priceTotal.text
39+
print(u"# Total : " + priceTotal)
40+
else:
41+
price = "0"
42+
print(temp)
43+
if len(price) == 1 and len(priceTotal) == 1:
44+
file = open("websites_free.txt","a+")
45+
file.write(temp + "\r\n")
46+
file.close()
47+
else:
48+
file = open("websites.txt","a+")
49+
file.write(temp + "\r\n")
50+
file.close()
51+
52+
class myThread (threading.Thread):
53+
def __init__(self, number):
54+
threading.Thread.__init__(self)
55+
self.number = number
56+
57+
def run(self):
58+
testOnline(self.number)
59+
60+
threads = [] # 线程池
61+
62+
for number in range(1247,10000):
63+
threads.append(myThread(number))
64+
65+
for t in threads:
66+
t.start()
67+
while True:
68+
if(len(threading.enumerate())<threadNumber):
69+
break
70+

0 commit comments

Comments
 (0)