Skip to content

Commit a802ffc

Browse files
committed
Fix encoding of html
1 parent 85738e5 commit a802ffc

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
venv
1+
venv
2+
__pycache__

CampusNet.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ def _get_semesters(self):
105105
:return: A list of semesters.
106106
"""
107107
response = self.session.get(self.create_url('COURSERESULTS'))
108+
# The webservice doesn't correctly set Content-Type: text/html; charset=utf-8
109+
# so requests uses ISO-8859-1 which is not correct. Requests is smart enough to
110+
# convert the response to UTF-8 if we tell it to take a guess at the real encoding.
111+
# also see https://stackoverflow.com/a/52615216
112+
response.encoding = response.apparent_encoding
108113
soup = BeautifulSoup(response.text, 'html.parser')
109114
semesters = {}
110115
for semester in soup.find_all('option'):
@@ -137,7 +142,12 @@ def _get_modules(self):
137142
'sessionno': self.session_number,
138143
'menuno': '000307'
139144
})
140-
145+
# The webservice doesn't correctly set Content-Type: text/html; charset=utf-8
146+
# so requests uses ISO-8859-1 which is not correct. Requests is smart enough to
147+
# convert the response to UTF-8 if we tell it to take a guess at the real encoding.
148+
# also see https://stackoverflow.com/a/52615216
149+
response.encoding = response.apparent_encoding
150+
print(response.text)
141151
soup = BeautifulSoup(response.text, 'html.parser')
142152
table = soup.find('table', {'class': 'nb list'})
143153
for row in table.find_all('tr')[1:]:
@@ -182,6 +192,11 @@ def get_exams_for_module(self, module: Module):
182192
:return: A list of exams.
183193
"""
184194
response = self.session.get(self.create_url('RESULTDETAILS', f",-N{module.id}"))
195+
# The webservice doesn't correctly set Content-Type: text/html; charset=utf-8
196+
# so requests uses ISO-8859-1 which is not correct. Requests is smart enough to
197+
# convert the response to UTF-8 if we tell it to take a guess at the real encoding.
198+
# also see https://stackoverflow.com/a/52615216
199+
response.encoding = response.apparent_encoding
185200
soup = BeautifulSoup(response.text, 'html.parser')
186201
exam_table = soup.find('table', {'class': 'tb'})
187202
exams = []

0 commit comments

Comments
 (0)