imascg/param2csv.py at master · rinrinne/imascg · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: ts=4:sw=4:sts=0
#

import sys
import urllib2
import optparse
import re
import unicodedata
import codecs
import StringIO
import time
import hashlib

URL_LIST = {
	'cu': 'http://www18.atwiki.jp/imas_cg/pages/13.html',
	'co': 'http://www18.atwiki.jp/imas_cg/pages/14.html',
	'pa': 'http://www18.atwiki.jp/imas_cg/pages/15.html'
}

ITEM_PTN  = re.compile('<!--([0-9]+)-([0-9]+)-->(.*)')
TAG_PTN   = re.compile('<.*?>')

RETRY_FETCH  = 3

def fetch_data(attr):
	for retry in range(RETRY_FETCH):
		req = None
		try:
			sys.stderr.write("%s: Fetch wiki page...\n" % attr)
			agent = 'imascg-' + hashlib.md5(str(time.time())).hexdigest()
			req = urllib2.Request(URL_LIST[attr], None, { 'User-Agent': agent })
			f = codecs.getreader('utf-8')(urllib2.urlopen(req))
		except urllib2.URLError:
			continue
		except urllib2.HTTPError:
			continue
		else:
			return f

	sys.exit('Cannot fetch wiki page.')


def getdata(attr, file):
	BIRTH_PTN = re.compile('([0-9]+)/([0-9]+)')
	SIZE_PTN  = re.compile('([0-9]+)-([0-9]+)-([0-9]+)')

#	f = fetch_data(attr):
#	for line in f.readlines():
#		pass

def print_header():
	HEADERS = (u'属性', u'アイドル名', u'レアリティ', u'レベル上限', u'親愛度上限',
				u'攻', u'守', u'コスト', u'1コスト攻', u'1コスト守', u'特技')
	str = ','.join(HEADERS)
	print str.encode(sys.getfilesystemencoding())

def pretty_print(record):
	UNEXPECTED_STRING = (u'アイドル名', u'名前', u'今日', u'[[]]', u'??', u'')
	if record[1] not in UNEXPECTED_STRING:
		str = ','.join(record)
		print str.encode(sys.getfilesystemencoding())

def main():

	parser = optparse.OptionParser()
	parser.add_option('-a', '--attr', dest='attribute', action='store', choices=['cu', 'co', 'pa'],
		help='Choice attribute: cu/co/pa. all attributes are enabled if no option.')
	parser.add_option('--no-header', dest='header', action='store_false', default=True,
        help='Prevent to writer header.')

	(options, args) = parser.parse_args()

	if options.header:
		print_header()

	if options.attribute is None:
		attr_list = URL_LIST.keys()
	else:
		attr_list = [ options.attribute ]

	for attr in attr_list:
		f = fetch_data(attr)
		record = [''] * 11
		record[0] = attr
		count = -1
		sys.stderr.write("%s: Processing...\n" % attr)
		for line in f.readlines():
			m = ITEM_PTN.search(line)
			if m is not None:
				(row, col, val) = (int(m.group(1)), int(m.group(2)), TAG_PTN.sub('', m.group(3)))
				if col == 0:
					pretty_print(record)

				record[col+1] = unicodedata.normalize('NFKC', val)

				if count > row:
					pretty_print(record)
					break
				count = row
		f.close()

if __name__ == '__main__':

	main()