forked from John-Gee/HFRSteam
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhfrparser.py
More file actions
125 lines (95 loc) · 4.09 KB
/
hfrparser.py
File metadata and controls
125 lines (95 loc) · 4.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from datetime import datetime
import re
import domparser
from game import Game
import stringutils
import web
def get_post(url, postid):
status, html = web.get_utf8_web_page(url)
document = domparser.load_html(html)
post = domparser.get_element(document, 'div', id = postid)
return str(post)
def get_games(liste, requirements):
striked = False
BEGIN_STRIKED = '<strike>'
END_STRIKED = '</strike>'
END_NEW = '----'
games = dict()
is_new = (requirements == "Standard")
for name in liste:
if (not name):
continue
name = name.strip()
if (name.startswith(END_NEW)):
is_new = False
continue
if (name.startswith(BEGIN_STRIKED)):
is_available = False
if (not name.endswith(END_STRIKED)):
striked = True
else:
if (striked):
is_available = False
if (name.endswith(END_STRIKED)):
striked = False
else:
is_available = True
cleanname = re.sub('<.*?>', '', name).replace('(+)', '').strip()
if (cleanname):
if (is_new):
game = Game(is_available, "Nouveauté")
game.hfr.gift_date = datetime.now()
else:
game = Game(is_available, requirements)
games[cleanname] = game
return games
def get_names_from_post(post, start, end, is_std):
subpost = stringutils.substringafter(post, start)
subpost = stringutils.substringbefore(subpost, end)
cleansubpost = subpost.replace('<br/>', '\r\n')
cleansubpost = cleansubpost.replace('&', "&")
cleansubpost = cleansubpost.replace('"', '')
if (not is_std):
cleansubpost = re.sub('.*\( *(Uplay|Rockstar Game Social club|GoG|GOG Galaxy|Battlenet|Android|clef Square Enix|Desura|Origin) *\).*', '',
cleansubpost, flags=re.IGNORECASE)
cleansubpost = re.sub('.*Clef Origin Humble Bundle.*', '', cleansubpost, flags=re.IGNORECASE)
cleansubpost = re.sub('\(.+\)', '', cleansubpost)
cleansubpost = re.sub(' *X[0-9] *', '', cleansubpost)
cleansubpost = re.sub('<strike>X[0-9]</strike>', '', cleansubpost)
cleansubpost = cleansubpost.replace('****', '')
cleansubpost = cleansubpost.strip()
# the separator is \x1c
return cleansubpost.splitlines()
def parse_hfr_std():
POST_ID = 'para8945000'
URL = 'http://forum.hardware.fr/hfr/JeuxVideo/Achat-Ventes/gratuit-origin-download-sujet_171605_1.htm'
post = get_post(URL, POST_ID)
START = '<strong>Clefs <img alt="[:icon4]" src="http://forum-images.hardware.fr/images/perso/icon4.gif" title="[:icon4]"/> Steam <img alt="[:icon4]" src="http://forum-images.hardware.fr/images/perso/icon4.gif" title="[:icon4]"/> :</strong> <br/><strong> <br/>'
END = '--------------------------------------------------------------------------'
names = get_names_from_post(post, START, END, True)
return get_games(names, "Standard")
def parse_hfr_donateur():
POST_ID = 'para8952242'
URL = 'http://forum.hardware.fr/hfr/JeuxVideo/Achat-Ventes/gratuit-origin-download-sujet_171605_1.htm'
post = get_post(URL, POST_ID)
START = '<strong>Liste donateur:</strong>'
END = '----'
names = get_names_from_post(post, START, END, False)
return get_games(names, "Donateur")
def parse_hfr_premium():
POST_ID = 'para8952242'
URL = 'http://forum.hardware.fr/hfr/JeuxVideo/Achat-Ventes/gratuit-origin-download-sujet_171605_1.htm'
post = get_post(URL, POST_ID)
START = '<strong>Liste Premium ( exclusivement réservée aux donateurs réguliers ):</strong>'
END = '----'
names = get_names_from_post(post, START, END, False)
return get_games(names, "Premium")
def parse_hfr():
games = parse_hfr_std()
games.update(parse_hfr_donateur())
games.update(parse_hfr_premium())
return games
if __name__ == '__main__':
games = parse_hfr_donateur()
for game in games:
print(game)