Semester Overview Table is now parsed correctly

JM-Lemmi · JM-Lemmi · commit ba76b5512c9a · 2022-06-30T22:28:56.000+02:00
diff --git a/campusnet.py b/campusnet.py
@@ -21,14 +21,15 @@
 response = requests.post('https://dualis.dhbw.de/scripts/mgrqispi.dll', headers=headers, data=data)
 
 cnsc = response.headers['Set-cookie'][0:38].replace(" ", "") #cookie in format "csnc =FA27B61020C03AA5A83046B13D6CC38D; HttpOnly; secure" broken down to "csnc=FA27B61020C03AA5A83046B13D6CC38D"
-
 # TODO: something is borken with that cnsc. It looks identical to a valid one, but only gives access denied.
-# debug:
 cnsc = 'cnsc=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
 
+# TODO get -N Arguments from response redirect?
+
 ## Get all Semesters
 
 # Prüfungsergebnisse Tab: https://dualis.dhbw.de/scripts/mgrqispi.dll?APPNAME=CampusNet&PRGNAME=COURSERESULTS&ARGUMENTS=-N422875220398735,-N000307,
+# die arguments sind wohl dynamisch...
 # Semesterlist: <select id="semester" .*> </select>
 # then get value from option, display name
 
@@ -37,7 +38,7 @@
     'Cookie': cnsc,
 }
 
-response = requests.get('https://dualis.dhbw.de/scripts/mgrqispi.dll?APPNAME=CampusNet&PRGNAME=COURSERESULTS&ARGUMENTS=-N591469968597102,-N000307', data="", headers=headers)
+response = requests.get('https://dualis.dhbw.de/scripts/mgrqispi.dll?APPNAME=CampusNet&PRGNAME=COURSERESULTS&ARGUMENTS=-N518587923698845,-N000019', data="", headers=headers)
 
 semester = {}
 
@@ -54,7 +55,7 @@
         'Host': 'dualis.dhbw.de',
         'Cookie': cnsc,
     }
-    response = requests.get(f'https://dualis.dhbw.de/scripts/mgrqispi.dll?APPNAME=CampusNet&PRGNAME=COURSERESULTS&ARGUMENTS=-N591469968597102,-N000307,-N{semester[semestername]}', data="", headers=headers) #get page with Prüfungen Table
+    response = requests.get(f'https://dualis.dhbw.de/scripts/mgrqispi.dll?APPNAME=CampusNet&PRGNAME=COURSERESULTS&ARGUMENTS=-N518587923698845,-N000019,-N{semester[semestername]}', data="", headers=headers) #get page with Prüfungen Table
     temptable = re.findall('<table class="nb list">[\s\S]*</table>', response.text)[0] # extract table from html body
     temptable = re.findall('<tbody>[\s\S]*</tbody>', temptable)[0] # extract table body from table
     temprows = temptable.split('<tr')[1:-1] # extract all rows from table. [0] is just <tbody ...>
@@ -63,8 +64,13 @@
         tempcells = row.split('<td')[1:] # extract all columns from table
         currentrow = list()
         for cell in tempcells:
-            cell = cell.split('>', 1)[1].split('</td')[0] # extract content from cell
+            cell = cell.split('>', 1)[1].split('</td')[0].lstrip().rstrip() # extract content from cell, remove whitespaces from left and right
+            if cell.startswith("<a"): #cell with the link
+                cell = cell.split('href="', 1)[1].split('">')[0].replace("&amp;", "&") #only take content in a href="..." and convert url-encoding back to normal
             currentrow.append(cell) # combine cells to row
+        currentrow = currentrow[:-1] # i know thats horrible coding, i dont know where that extra cell is from, please fix TODO
         table.append(currentrow) # combine rows to table
 
+print(table)
+
 ## Get Prüfungen Results