-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathremove_entity_html.py
More file actions
115 lines (113 loc) · 6.51 KB
/
remove_entity_html.py
File metadata and controls
115 lines (113 loc) · 6.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import HTMLParser
h = HTMLParser.HTMLParser()
#print h.unescape('£682m')
with open('input.sql','r') as f:
newlines = []
for line in f.readlines():
#newlines.append(line.replace('0000-00-00 00:00:00', '2000-01-01 00:00:00'))
line = line.decode('utf-8')
line = line.replace('"', h.unescape('"'))
line = line.replace(''', h.unescape('''))
line = line.replace('&', h.unescape('&'))
line = line.replace('<', h.unescape('<'))
line = line.replace('>', h.unescape('>'))
line = line.replace(' ', h.unescape(' '))
line = line.replace('¡', h.unescape('¡'))
line = line.replace('¢', h.unescape('¢'))
line = line.replace('£', h.unescape('£'))
line = line.replace('¤', h.unescape('¤'))
line = line.replace('¥', h.unescape('¥'))
line = line.replace('¦', h.unescape('¦'))
line = line.replace('§', h.unescape('§'))
line = line.replace('¨', h.unescape('¨'))
line = line.replace('©', h.unescape('©'))
line = line.replace('ª', h.unescape('ª'))
line = line.replace('«', h.unescape('«'))
line = line.replace('¬', h.unescape('¬'))
line = line.replace('­', h.unescape('­'))
line = line.replace('®', h.unescape('®'))
line = line.replace('¯', h.unescape('¯'))
line = line.replace('°', h.unescape('°'))
line = line.replace('±', h.unescape('±'))
line = line.replace('²', h.unescape('²'))
line = line.replace('³', h.unescape('³'))
line = line.replace('´', h.unescape('´'))
line = line.replace('µ', h.unescape('µ'))
line = line.replace('¶', h.unescape('¶'))
line = line.replace('·', h.unescape('·'))
line = line.replace('¸', h.unescape('¸'))
line = line.replace('¹', h.unescape('¹'))
line = line.replace('º', h.unescape('º'))
line = line.replace('»', h.unescape('»'))
line = line.replace('¼', h.unescape('¼'))
line = line.replace('½', h.unescape('½'))
line = line.replace('¾', h.unescape('¾'))
line = line.replace('¿', h.unescape('¿'))
line = line.replace('×', h.unescape('×'))
line = line.replace('÷', h.unescape('÷'))
line = line.replace('À', h.unescape('À'))
line = line.replace('Á', h.unescape('Á'))
line = line.replace('Â', h.unescape('Â'))
line = line.replace('Ã', h.unescape('Ã'))
line = line.replace('Ä', h.unescape('Ä'))
line = line.replace('Å', h.unescape('Å'))
line = line.replace('Æ', h.unescape('Æ'))
line = line.replace('Ç', h.unescape('Ç'))
line = line.replace('È', h.unescape('È'))
line = line.replace('É', h.unescape('É'))
line = line.replace('Ê', h.unescape('Ê'))
line = line.replace('Ë', h.unescape('Ë'))
line = line.replace('Ì', h.unescape('Ì'))
line = line.replace('Í', h.unescape('Í'))
line = line.replace('Î', h.unescape('Î'))
line = line.replace('Ï', h.unescape('Ï'))
line = line.replace('Ð', h.unescape('Ð'))
line = line.replace('Ñ', h.unescape('Ñ'))
line = line.replace('Ò', h.unescape('Ò'))
line = line.replace('Ó', h.unescape('Ó'))
line = line.replace('Ô', h.unescape('Ô'))
line = line.replace('Õ', h.unescape('Õ'))
line = line.replace('Ö', h.unescape('Ö'))
line = line.replace('Ø', h.unescape('Ø'))
line = line.replace('Ù', h.unescape('Ù'))
line = line.replace('Ú', h.unescape('Ú'))
line = line.replace('Û', h.unescape('Û'))
line = line.replace('Ü', h.unescape('Ü'))
line = line.replace('Ý', h.unescape('Ý'))
line = line.replace('Þ', h.unescape('Þ'))
line = line.replace('ß', h.unescape('ß'))
line = line.replace('à', h.unescape('à'))
line = line.replace('á', h.unescape('á'))
line = line.replace('â', h.unescape('â'))
line = line.replace('ã', h.unescape('ã'))
line = line.replace('ä', h.unescape('ä'))
line = line.replace('å', h.unescape('å'))
line = line.replace('æ', h.unescape('æ'))
line = line.replace('ç', h.unescape('ç'))
line = line.replace('è', h.unescape('è'))
line = line.replace('é', h.unescape('é'))
line = line.replace('ê', h.unescape('ê'))
line = line.replace('ë', h.unescape('ë'))
line = line.replace('ì', h.unescape('ì'))
line = line.replace('í', h.unescape('í'))
line = line.replace('î', h.unescape('î'))
line = line.replace('ï', h.unescape('ï'))
line = line.replace('ð', h.unescape('ð'))
line = line.replace('ñ', h.unescape('ñ'))
line = line.replace('ò', h.unescape('ò'))
line = line.replace('ó', h.unescape('ó'))
line = line.replace('ô', h.unescape('ô'))
line = line.replace('õ', h.unescape('õ'))
line = line.replace('ö', h.unescape('ö'))
line = line.replace('ø', h.unescape('ø'))
line = line.replace('ù', h.unescape('ù'))
line = line.replace('ú', h.unescape('ú'))
line = line.replace('û', h.unescape('û'))
line = line.replace('ü', h.unescape('ü'))
line = line.replace('ý', h.unescape('ý'))
line = line.replace('þ', h.unescape('þ'))
line = line.replace('ÿ', h.unescape('ÿ'))
newlines.append(line.encode('utf-8'))
with open('output.sql', 'w') as f:
for line in newlines:
f.write(line)