add Prüfungsloop

JM-Lemmi · JM-Lemmi · commit abe84a3d5277 · 2022-06-30T23:32:10.000+02:00
non-working.
L98 currently only returns one giant string instead of list of strings
diff --git a/campusnet.py b/campusnet.py
@@ -47,7 +47,9 @@
 
 ## Get all Prüfungen in Semester
 
-table=list() # table is cross semester, so initialized outside
+print("Getting Modules")
+
+overview=list() # table is cross semester, so initialized outside
 
 for semestername in semester:
     print(semestername)
@@ -56,9 +58,9 @@
         'Cookie': cnsc,
     }
     response = requests.get(f'https://dualis.dhbw.de/scripts/mgrqispi.dll?APPNAME=CampusNet&PRGNAME=COURSERESULTS&ARGUMENTS=-N518587923698845,-N000019,-N{semester[semestername]}', data="", headers=headers) #get page with Prüfungen Table
-    temptable = re.findall('<table class="nb list">[\s\S]*</table>', response.text)[0] # extract table from html body
-    temptable = re.findall('<tbody>[\s\S]*</tbody>', temptable)[0] # extract table body from table
-    temprows = temptable.split('<tr')[1:-1] # extract all rows from table. [0] is just <tbody ...>
+    tempoverview = re.findall('<table class="nb list">[\s\S]*</table>', response.text)[0] # extract table from html body
+    tempoverview = re.findall('<tbody>[\s\S]*</tbody>', tempoverview)[0] # extract table body from table
+    temprows = tempoverview.split('<tr')[1:-1] # extract all rows from table. [0] is just <tbody ...>
 
     for row in temprows:
         tempcells = row.split('<td')[1:] # extract all columns from table
@@ -69,8 +71,35 @@
                 cell = cell.split('href="', 1)[1].split('">')[0].replace("&amp;", "&") #only take content in a href="..." and convert url-encoding back to normal
             currentrow.append(cell) # combine cells to row
         currentrow = currentrow[:-1] # i know thats horrible coding, i dont know where that extra cell is from, please fix TODO
-        table.append(currentrow) # combine rows to table
+        overview.append(currentrow) # combine rows to table
+
+print(overview)
+
+## Get Prüfungen Results
 
-print(table)
+print("Getting Prüfungsresults")
 
-## Get Prüfungen Results
+detailview = list() # ['Modulenr', 'Name', 'Course', 'Semester', 'Prüfung', 'Date', 'Bewertung']
+
+for module in overview:
+    print(module[1])
+    headers = {'Host': 'dualis.dhbw.de', 'Cookie': cnsc,}
+    response = requests.get(f'https://dualis.dhbw.de{module[-1]}', data="", headers=headers)
+    tempdetails = re.findall('<table class="tb"[\s\S]*</table>', response.text)[0] # extract first table from html body, as it contains the prüfungen
+    while '<td class="level02"' in tempdetails:
+        currentrow = list()
+        currentrow.append(module[0])
+        currentrow.append(module[1])
+        tempdetails = tempdetails.split('<td class="level02"', 1)[1].split('>', 1)[1].split('</td',1) #remove everything before "level2"-heading. and split left of td end and right. left goes into table, right will be further edited.
+        currentrow.append(tempdetails[0])
+        tempdetails = tempdetails[1]
+        if '<tr>' not in tempdetails: detailview.append(currentrow); break
+        tempdetails = tempdetails.split('<tr>', 1)[1].split('</tr>',1) #cut off before next <tr> and split at /tr. [0] is not prüfungstable, [1] is for the next loop
+        for cell in re.findall('<td .*>[\s\S]*</td>', tempdetails[0]): #loops over all td elements #TODO this only returns one giant match instead of a list of multiple matches???
+            currentrow.append(tempdetails[0].split('>', 1)[1].split('<')[0]) #get content between html tags
+        #currentrow = currentrow[] #last two are extern anerkannt and empty, so they get cut off
+
+        detailview.append(currentrow)
+        tempdetails = tempdetails[1] #ready for the next loop
+
+print(detailview)