Skip to content

Commit 4a2d68d

Browse files
author
danbai225
committed
py上传
1 parent 7f1c95a commit 4a2d68d

File tree

3 files changed

+198
-48
lines changed

3 files changed

+198
-48
lines changed

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,4 @@ rebel.xml
2929
build/
3030

3131
### VS Code ###
32-
.vscode/
33-
/py/gxpa.py
32+
.vscode/

.idea/workspace.xml

Lines changed: 44 additions & 46 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

py/gxpa.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import json
2+
import random
3+
import requests
4+
from lxml import etree
5+
import re
6+
import pymysql
7+
from DBUtils.PooledDB import PooledDB
8+
from multiprocessing.pool import ThreadPool
9+
10+
POOL = PooledDB(
11+
creator=pymysql, # 使用链接数据库的模块
12+
maxconnections=0, # 连接池允许的最大连接数,0和None表示不限制连接数
13+
mincached=20, # 初始化时,链接池中至少创建的空闲的链接,0表示不创建
14+
maxcached=5, # 链接池中最多闲置的链接,0和None不限制
15+
maxshared=0, # 链接池中最多共享的链接数量,0和None表示全部共享。PS: 无用,因为pymysql和MySQLdb等模块的 threadsafety都为1,所有值无论设置为多少,_maxcached永远为0,所以永远是所有链接都共享。
16+
blocking=True, # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错
17+
maxusage=None, # 一个链接最多被重复使用的次数,None表示无限制
18+
setsession=[], # 开始会话前执行的命令列表。如:["set datestyle to ...", "set time zone ..."]
19+
ping=0,
20+
# ping MySQL服务端,检查是否服务可用。# 如:0 = None = never, 1 = default = whenever it is requested, 2 = when a cursor is created, 4 = when a query is executed, 7 = always
21+
host='127.0.0.1',
22+
port=3306,
23+
user='123',
24+
password='123',
25+
database='ys',
26+
charset='utf8'
27+
)
28+
class Ji:
29+
name=""
30+
url=""
31+
def __init__(self, name, url):
32+
self.name = name
33+
self.url = url
34+
35+
proxys=[]
36+
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
37+
#获取代理
38+
def get_proxy():
39+
global proxys
40+
r=requests.get("http://ip.jiangxianli.com/")
41+
if(r.status_code==200):
42+
selector=etree.HTML(r.text)
43+
proxys=proxys+selector.xpath("//button[@class=\"btn btn-sm btn-copy\"]/@data-url")
44+
#获取页面html
45+
def getHtml(url):
46+
global proxys
47+
p=proxys[random.randint(0,len(proxys)-1)]
48+
try:
49+
html = requests.get(url,proxies={'http':p},headers=header,timeout=5)
50+
if html!=None:
51+
return html.text
52+
except Exception:
53+
proxys.remove(p)
54+
if len(proxys)<2:
55+
get_proxy()
56+
return getHtml(url)
57+
def main():
58+
get_proxy()
59+
list=[]
60+
for i in range(1,5):
61+
selector=etree.HTML(getHtml("http://www.zuidazy1.net/?m=vod-type-id-{}.html".format(i)))
62+
list+=selector.xpath("//li/span/a/@href")
63+
#创建线程池
64+
pool = ThreadPool(20)
65+
for urlstr in list:
66+
url="http://www.zuidazy1.net"+urlstr
67+
try:
68+
pool.apply_async(run, args=(url,))
69+
except Exception as e:
70+
print(e)
71+
pool.close()
72+
pool.join()
73+
print("结束")
74+
def run(url):
75+
conn = POOL.connection()
76+
cursor = conn.cursor()
77+
#获取页面数据
78+
r=getHtml(url)
79+
selector=etree.HTML(r)
80+
list=selector.xpath("//div[@class=\"vodh\"]/h2/text()")
81+
pm=ifnull(list)
82+
if cursor.execute("SELECT pf,dy,dq,yy,gkdz,xzdz,id,pctime FROM `ysb` WHERE `pm` LIKE '"+pm+"'")>0:
83+
try:
84+
ys=cursor.fetchall()[0]
85+
id=ys[6]
86+
pf=str(ys[0])
87+
dy=str(ys[1])
88+
dq=str(ys[2])
89+
yy=str(ys[3])
90+
gkdz=str(ys[4])
91+
xzdz=str(ys[5])
92+
pctime=str(ys[7])
93+
if pf=="" or pf=='0' or pf=="0.0":
94+
list=selector.xpath("//div[@class=\"vodh\"]/label/text()")
95+
gx=str(ifnull(list))
96+
cursor.execute("UPDATE `ysb` SET `pf` = '{}' WHERE `ysb`.`id` = {}".format(str(gx),str(id)))
97+
print("更新"+str(id)+"pf")
98+
conn.commit()
99+
if dy=="":
100+
list=selector.xpath("//div[@class=\"vodinfobox\"]/ul/li[2]/span/text()")
101+
gx=ifnull(list)
102+
cursor.execute("UPDATE `ysb` SET `dy` = '{}' WHERE `ysb`.`id` = {}".format(str(gx),str(id)))
103+
print("更新"+str(id)+"dy")
104+
conn.commit()
105+
if dq=="":
106+
list=selector.xpath("//div[@class=\"vodinfobox\"]/ul/li[5]/span/text()")
107+
gx=ifnull(list)
108+
cursor.execute("UPDATE `ysb` SET `dq` = '{}' WHERE `ysb`.`id` = {}".format(str(gx),str(id)))
109+
print("更新"+str(id)+"dq")
110+
conn.commit()
111+
if yy=="":
112+
list=selector.xpath("//div[@class=\"vodinfobox\"]/ul/li[6]/span/text()")
113+
gx=ifnull(list)
114+
cursor.execute("UPDATE `ysb` SET `yy` = '{}' WHERE `ysb`.`id` = {}".format(str(gx),str(id)))
115+
print("更新"+str(id)+"yy")
116+
conn.commit()
117+
if gkdz=="[]":
118+
list=selector.xpath("//div[@id=\"play_1\"]/ul/li/text()")
119+
if len(list)>0:
120+
list=fenji(list)
121+
gkdz = json.dumps(list, ensure_ascii=False)
122+
cursor.execute("UPDATE `ysb` SET `gkdz` = '{}' WHERE `ysb`.`id` = {}".format(gkdz,str(id)))
123+
print("更新"+str(id)+"gkdz")
124+
conn.commit()
125+
if xzdz=="[]":
126+
list=selector.xpath("//div[@id=\"down_1\"]/ul/li/text()")
127+
if len(list)>0:
128+
list=fenji(list)
129+
xzdz = json.dumps(list, ensure_ascii=False)
130+
cursor.execute("UPDATE `ysb` SET `xzdz` = '{}' WHERE `ysb`.`id` = {}".format(xzdz,str(id)))
131+
print("更新"+str(id)+"xzdz")
132+
conn.commit()
133+
if pctime=="" or ys[7]=='0':
134+
list=selector.xpath("//div[@class=\"vodinfobox\"]/ul/li[8]/span/text()")
135+
gx=str(ifnull(list))
136+
cursor.execute("UPDATE `ysb` SET `pctime` = '{}' WHERE `ysb`.`id` = {}".format(str(gx),str(id)))
137+
print("更新"+str(id)+"pctime")
138+
conn.commit()
139+
except Exception as e:
140+
print(e)
141+
conn.close()
142+
def fenji(jilist):
143+
list=[]
144+
for j in jilist:
145+
ji =Ji(j.split("$")[0],j.split("$")[1])
146+
list.append(ji.__dict__)
147+
return list
148+
def ifnull(list):
149+
if(len(list)>0):
150+
return list[0]
151+
return ""
152+
if __name__ == "__main__":
153+
main()

0 commit comments

Comments
 (0)