Skip to content

Commit 2de2e29

Browse files
committed
3.93
1 parent 62dcbfb commit 2de2e29

File tree

19 files changed

+2192
-1266
lines changed

19 files changed

+2192
-1266
lines changed

AVDC_Main.py

Lines changed: 506 additions & 334 deletions
Large diffs are not rendered by default.

Function/Function.py

Lines changed: 65 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
3-
43
import re
54
import os
65
import json
76
from configparser import ConfigParser
8-
import requests
9-
from lxml import etree
10-
from Getter import avsox, javbus, javdb, fc2fans_club, mgstage, dmm
7+
from Getter import avsox, javbus, javdb, fc2fans_club, mgstage, dmm, jav321
118

129

1310
# ========================================================================获取config
@@ -22,6 +19,35 @@ def get_config():
2219
return config
2320

2421

22+
# ========================================================================是否为无码
23+
def is_uncensored(number):
24+
if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper():
25+
return True
26+
config = get_config()
27+
prefix_list = str(config['uncensored']['uncensored_prefix']).split('|')
28+
for pre in prefix_list:
29+
if pre.upper() in number.upper():
30+
return True
31+
return False
32+
33+
34+
# ========================================================================元数据获取失败检测
35+
def getDataState(json_data):
36+
if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
37+
return 0
38+
else:
39+
return 1
40+
41+
42+
# ========================================================================去掉异常字符
43+
def escapePath(path, Config): # Remove escape literals
44+
escapeLiterals = Config['escape']['literals']
45+
backslash = '\\'
46+
for literal in escapeLiterals:
47+
path = path.replace(backslash + literal, '')
48+
return path
49+
50+
2551
# ========================================================================获取视频列表
2652
def movie_lists(escape_folder, movie_type, movie_path):
2753
if escape_folder != '':
@@ -99,31 +125,27 @@ def getNumber(filepath, escape_string):
99125
return os.path.splitext(filepath.split('/')[-1])[0]
100126

101127

102-
# ========================================================================去掉异常字符
103-
def escapePath(path, Config): # Remove escape literals
104-
escapeLiterals = Config['escape']['literals']
105-
backslash = '\\'
106-
for literal in escapeLiterals:
107-
path = path.replace(backslash + literal, '')
108-
return path
109-
110-
111128
# ========================================================================根据番号获取数据
112129
def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
113130
# ================================================网站规则添加开始================================================
131+
isuncensored = is_uncensored(file_number)
114132
json_data = {}
115133
if mode == 1: # 从全部网站刮削
116-
# =======================================================================无码抓取:111111-111,n1111,HEYZO-1111
117-
if re.match('^\d{4,}', file_number) or re.match('n\d{4}', file_number) or 'HEYZO' in file_number.upper():
134+
# =======================================================================无码抓取:111111-111,n1111,HEYZO-1111,SMD-115
135+
if isuncensored:
118136
json_data = json.loads(javbus.main_uncensored(file_number))
119137
if getDataState(json_data) == 0:
120-
json_data = json.loads(javdb.main(file_number))
138+
json_data = json.loads(javdb.main(file_number, True))
139+
if getDataState(json_data) == 0 and 'HEYZO' in file_number.upper():
140+
json_data = json.loads(jav321.main(file_number, True))
121141
if getDataState(json_data) == 0:
122142
json_data = json.loads(avsox.main(file_number))
123143
# =======================================================================259LUXU-1111
124144
elif re.match('\d+[a-zA-Z]+-\d+', file_number) or 'SIRO' in file_number.upper():
125145
json_data = json.loads(mgstage.main(file_number))
126146
file_number = re.search('[a-zA-Z]+-\d+', file_number).group()
147+
if getDataState(json_data) == 0:
148+
json_data = json.loads(jav321.main(file_number))
127149
if getDataState(json_data) == 0:
128150
json_data = json.loads(javdb.main(file_number))
129151
if getDataState(json_data) == 0:
@@ -144,6 +166,8 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
144166
# =======================================================================MIDE-139
145167
else:
146168
json_data = json.loads(javbus.main(file_number))
169+
if getDataState(json_data) == 0:
170+
json_data = json.loads(jav321.main(file_number))
147171
if getDataState(json_data) == 0:
148172
json_data = json.loads(javdb.main(file_number))
149173
if getDataState(json_data) == 0:
@@ -159,20 +183,22 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
159183
elif mode == 3: # 仅从fc2club
160184
json_data = json.loads(fc2fans_club.main(file_number))
161185
elif mode == 4: # 仅从javbus
162-
if re.match('^\d{5,}', file_number) or re.match('n\d{4}', file_number) or 'HEYZO' in file_number.upper():
186+
if isuncensored:
163187
json_data = json.loads(javbus.main_uncensored(file_number))
164188
elif re.search('\D+.\d{2}.\d{2}.\d{2}', file_number):
165189
json_data = json.loads(javbus.main_us(file_number))
166190
else:
167191
json_data = json.loads(javbus.main(file_number))
168-
elif mode == 5: # 仅从javdb
192+
elif mode == 5: # 仅从jav321
193+
json_data = json.loads(jav321.main(file_number, isuncensored))
194+
elif mode == 6: # 仅从javdb
169195
if re.search('\D+.\d{2}.\d{2}.\d{2}', file_number):
170196
json_data = json.loads(javdb.main_us(file_number))
171197
else:
172-
json_data = json.loads(javdb.main(file_number))
173-
elif mode == 6: # 仅从avsox
198+
json_data = json.loads(javdb.main(file_number, isuncensored))
199+
elif mode == 7: # 仅从avsox
174200
json_data = json.loads(avsox.main(file_number))
175-
elif mode == 7: # 仅从dmm
201+
elif mode == 8: # 仅从dmm
176202
json_data = json.loads(dmm.main(file_number))
177203

178204
# ================================================网站规则添加结束================================================
@@ -273,8 +299,9 @@ def save_config(json_config):
273299
print("success_output_folder = " + json_config['success_output_folder'], file=code)
274300
print("failed_file_move = " + str(json_config['failed_file_move']), file=code)
275301
print("soft_link = " + str(json_config['soft_link']), file=code)
302+
print("show_poster = " + str(json_config['show_poster']), file=code)
276303
print("website = " + json_config['website'], file=code)
277-
print("# all or mgstage or fc2club or javbus or javdb or avsox or dmm", file=code)
304+
print("# all or mgstage or fc2club or javbus or jav321 or javdb or avsox or dmm", file=code)
278305
print("", file=code)
279306
print("[proxy]", file=code)
280307
print("proxy = " + json_config['proxy'], file=code)
@@ -296,8 +323,6 @@ def save_config(json_config):
296323
print("media_type = " + json_config['media_type'], file=code)
297324
print("sub_type = " + json_config['sub_type'], file=code)
298325
print("media_path = " + json_config['media_path'], file=code)
299-
print("media_warehouse = " + json_config['media_warehouse'], file=code)
300-
print("# emby or plex or kodi ,emby = jellyfin", file=code)
301326
print("", file=code)
302327
print("[escape]", file=code)
303328
print("literals = " + json_config['literals'], file=code)
@@ -310,12 +335,20 @@ def save_config(json_config):
310335
print("[emby]", file=code)
311336
print("emby_url = " + json_config['emby_url'], file=code)
312337
print("api_key = " + json_config['api_key'], file=code)
313-
code.close()
314-
338+
print("", file=code)
339+
print("[mark]", file=code)
340+
print("poster_mark = " + str(json_config['poster_mark']), file=code)
341+
print("thumb_mark = " + str(json_config['thumb_mark']), file=code)
342+
print("mark_size = " + str(json_config['mark_size']), file=code)
343+
print("mark_type = " + json_config['mark_type'], file=code)
344+
print("mark_pos = " + json_config['mark_pos'], file=code)
345+
print("# mark_size : range 1-5", file=code)
346+
print("# mark_type : sub, leak, uncensored", file=code)
347+
print("# mark_pos : bottom_right or bottom_left or top_right or top_left", file=code)
348+
print("", file=code)
349+
print("[uncensored]", file=code)
350+
print("uncensored_prefix = " + str(json_config['uncensored_prefix']), file=code)
351+
print("uncensored_poster = " + str(json_config['uncensored_poster']), file=code)
352+
print("# 0 : official, 1 : cut", file=code)
315353

316-
# ========================================================================元数据获取失败检测
317-
def getDataState(json_data):
318-
if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
319-
return 0
320-
else:
321-
return 1
354+
code.close()

Function/getHtml.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,30 @@
33
from configparser import ConfigParser
44

55

6-
# ========================================================================网页请求
7-
def get_html(url, cookies=None):
6+
# ========================================================================获取config
7+
def get_config():
88
config_file = ''
99
if os.path.exists('../config.ini'):
1010
config_file = '../config.ini'
1111
elif os.path.exists('config.ini'):
1212
config_file = 'config.ini'
1313
config = ConfigParser()
1414
config.read(config_file, encoding='UTF-8')
15+
return config
16+
17+
18+
# ========================================================================网页请求
19+
def get_html(url, cookies=None):
20+
config = get_config()
1521
retry_count = 0
1622
proxy = ''
1723
timeout = 0
1824
try:
1925
proxy = str(config['proxy']['proxy'])
2026
timeout = int(config['proxy']['timeout'])
2127
retry_count = int(config['proxy']['retry'])
22-
except:
28+
except Exception as error_info:
29+
print('Error in get_html :' + str(error_info))
2330
print('[-]Proxy config error! Please check the config.')
2431
i = 0
2532
while i < retry_count:
@@ -39,10 +46,40 @@ def get_html(url, cookies=None):
3946
getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
4047
getweb.encoding = 'utf-8'
4148
return getweb.text
42-
except:
49+
except Exception as error_info:
4350
i += 1
51+
print('Error in get_html :' + str(error_info))
4452
print('[-]Connect retry ' + str(i) + '/' + str(retry_count))
4553
print('[-]Connect Failed! Please check your Proxy or Network!')
4654
return 'ProxyError'
4755

4856

57+
def post_html(url: str, query: dict):
58+
config = get_config()
59+
retry_count = 3
60+
proxy = ''
61+
timeout = 10
62+
try:
63+
proxy = str(config['proxy']['proxy'])
64+
timeout = int(config['proxy']['timeout'])
65+
retry_count = int(config['proxy']['retry'])
66+
except Exception as error_info:
67+
print('Error in post_html :' + str(error_info))
68+
print('[-]Proxy config error! Please check the config.')
69+
if proxy:
70+
proxies = {"http": "http://" + proxy, "https": "https://" + proxy}
71+
else:
72+
proxies = {}
73+
for i in range(retry_count):
74+
try:
75+
result = requests.post(url, data=query, proxies=proxies, timeout=timeout)
76+
result.encoding = 'utf-8'
77+
result = result.text
78+
return result
79+
except Exception as error_info:
80+
print('Error in post_html :' + str(error_info))
81+
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
82+
print("[-]Connect Failed! Please check your Proxy or Network!")
83+
return 'ProxyError'
84+
85+

Getter/avsox.py

Lines changed: 49 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import json
22
import re
3-
43
from bs4 import BeautifulSoup
54
from lxml import etree
65
from Function.getHtml import get_html
@@ -22,7 +21,7 @@ def getTitle(a):
2221
try:
2322
html = etree.fromstring(a, etree.HTMLParser())
2423
result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") # [0]
25-
return result.replace('/', '').replace('_', '-')
24+
return result.replace('/', '')
2625
except:
2726
return ''
2827

@@ -57,7 +56,7 @@ def getSeries(a):
5756

5857
def getNum(a):
5958
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
60-
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']").replace('_', '-')
59+
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
6160
return result1
6261

6362

@@ -81,17 +80,10 @@ def getCover(htmlcode):
8180
return result
8281

8382

84-
def getCover_small(htmlcode, number):
83+
def getCover_small(htmlcode, count):
8584
html = etree.fromstring(htmlcode, etree.HTMLParser())
86-
counts = len(html.xpath("//div[@id='waterfall']/div/a/div"))
87-
if counts == 0:
88-
return ''
89-
for count in range(1, counts + 1): # 遍历搜索结果,找到需要的番号
90-
number_get = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-info']/span/date[1]/text()")
91-
if len(number_get) > 0 and number_get[0] == number:
92-
cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
93-
return cover_small
94-
return ''
85+
cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
86+
return cover_small
9587

9688

9789
def getTag(a): # 获取演员
@@ -103,54 +95,64 @@ def getTag(a): # 获取演员
10395
return d
10496

10597

98+
def getUrl(number):
99+
response = get_html('https://avsox.host/cn/search/' + number)
100+
html = etree.fromstring(response, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
101+
url_list = html.xpath('//*[@id="waterfall"]/div/a/@href')
102+
if len(url_list) > 0:
103+
for i in range(1, len(url_list) + 1):
104+
number_get = str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/div[@class="photo-info"]/span/date[1]/text()')).strip(" ['']")
105+
if number.upper() == number_get.upper():
106+
return i, response, str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/@href')).strip(" ['']")
107+
return response, ''
108+
109+
106110
def main(number):
107-
a = get_html('https://avsox.host/cn/search/' + number)
108-
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
109-
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
110-
if result1 == '' or result1 == 'null' or result1 == 'None':
111-
a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
112-
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
113-
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
114-
if result1 == '' or result1 == 'null' or result1 == 'None':
115-
a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
116-
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
117-
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
118-
web = get_html(result1)
119-
soup = BeautifulSoup(web, 'lxml')
120-
info = str(soup.find(attrs={'class': 'row movie'}))
121111
try:
112+
count, response, url = getUrl(number)
113+
if str(response) == 'ProxyError':
114+
raise TimeoutError
115+
if url == '':
116+
raise Exception('Movie Data not found in avsox!')
117+
web = get_html(url)
118+
soup = BeautifulSoup(web, 'lxml')
119+
info = str(soup.find(attrs={'class': 'row movie'}))
120+
number = getNum(web)
121+
print(1)
122122
dic = {
123123
'actor': getActor(web),
124-
'title': getTitle(web).strip(getNum(web)).strip().replace(' ', '-'),
124+
'title': getTitle(web).strip(number).strip().replace(' ', '-'),
125125
'studio': getStudio(info),
126-
'publisher': '',
127-
'outline': '', #
128126
'runtime': getRuntime(info),
129-
'director': '', #
130127
'release': getRelease(info),
131128
'number': getNum(info),
132-
'cover': getCover(web),
133-
'cover_small': getCover_small(a, number),
134-
'imagecut': 3,
135129
'tag': getTag(web),
136130
'series': getSeries(info),
137-
'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
131+
'year': getYear(getRelease(info)),
138132
'actor_photo': getActorPhoto(web),
139-
'website': result1,
133+
'cover': getCover(web),
134+
'cover_small': getCover_small(response, count),
135+
'imagecut': 3,
136+
'director': '',
137+
'publisher': '',
138+
'outline': '',
139+
'score': '',
140+
'website': url,
140141
'source': 'avsox.py',
141142
}
142-
except:
143-
if a == 'ProxyError':
144-
dic = {
145-
'title': '',
146-
'website': 'timeout',
147-
}
148-
else:
149-
dic = {
150-
'title': '',
151-
'website': '',
152-
}
143+
except TimeoutError:
144+
dic = {
145+
'title': '',
146+
'website': 'timeout',
147+
}
148+
except Exception as error_info:
149+
print('Error in avsox.main : ' + str(error_info))
150+
dic = {
151+
'title': '',
152+
'website': '',
153+
}
153154
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
154155
return js
155156

156157
# print(main('051119-917'))
158+
# print(main('032620_001'))

0 commit comments

Comments
 (0)