Skip to content

Commit 8402c6d

Browse files
Merge pull request #4 from zhackk/v2.0.0
V2.0.0
2 parents e961cc2 + 43771de commit 8402c6d

File tree

1 file changed

+80
-67
lines changed

1 file changed

+80
-67
lines changed

mscsploit.py

Lines changed: 80 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2,43 +2,41 @@
22

33
from bs4 import BeautifulSoup
44
from colorama import Fore
5-
5+
import requests
66
import argparse
7-
import html
8-
import os
97
import re
10-
import requests
8+
import os
119

1210
parser = argparse.ArgumentParser(description='API to download lectures off msc-mu.com')
1311
parser.add_argument('-b', '--batch', type=int, metavar='', help='to specify batch number')
1412
parser.add_argument('-c', '--course', type=int, metavar='', help='to specify course number')
1513
parser.add_argument('-f', '--folder', type=str, metavar='', help='to specify destination folder')
1614
args = parser.parse_args()
1715

18-
#FOLDER = '\\Documents\\Human Systems\\CVS\\' #Beggining with ~
19-
FOLDER = '/documents/med/' # For linux
16+
FOLDER = '/dox/med'
2017

2118
HEADERS = headers = {
22-
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
19+
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (HTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
2320
}
2421

22+
2523
def choose_batch():
2624
batches = [
27-
[1, '2022', 'https://msc-mu.com/level/17'],
25+
[1, 'Athar', 'https://msc-mu.com/level/17'],
2826
[2, 'Rou7', 'https://msc-mu.com/level/16'],
2927
[3, 'Wateen', 'https://msc-mu.com/level/15'],
3028
[4, 'Nabed', 'https://msc-mu.com/level/14'],
3129
[5, 'Wareed', 'https://msc-mu.com/level/13'],
3230
[6, 'Minors', 'https://msc-mu.com/level/10'],
33-
[7, 'Majors', 'https://msc-mu.com/level/9' ]
31+
[7, 'Majors', 'https://msc-mu.com/level/9']
3432
]
3533
print('\n')
3634
if args.batch:
3735
batch_url = batches[args.batch - 1][2]
3836
print(Fore.GREEN + '\n[*] Searching', batches[args.batch - 1][1] + '\'s batch...\n')
3937
return batch_url
4038
for batch in batches:
41-
print(str(batch[0]) + ') ' + batch[1] )
39+
print(str(batch[0]) + ') ' + batch[1])
4240
selected_batch = input('\n[*] Which batch are you?\n\n>> ')
4341
try:
4442
selected_batch = int(selected_batch)
@@ -51,6 +49,7 @@ def choose_batch():
5149
print('\n[*]Invalid Input\n')
5250
return choose_batch()
5351

52+
5453
def find_courses(url):
5554
page = requests.get(url, headers=HEADERS)
5655
doc = BeautifulSoup(page.text, 'html.parser')
@@ -63,18 +62,6 @@ def find_courses(url):
6362
courses.append([x + 1, course_name, course_number])
6463
return courses
6564

66-
def find_subject_folder(name, doc):
67-
if ''' not in name:
68-
name = html.unescape(name)
69-
else:
70-
name = name.strip(''')
71-
name = html.unescape(name)
72-
folder_source = doc.find_all("a", string=name)[0].parent.parent.parent.parent.parent.parent.parent.parent.parent.parent.parent.parent.parent.parent
73-
folder = re.findall('''</i>
74-
(.*)
75-
76-
</h6>''', folder_source.decode())
77-
return folder[0]
7865

7966
def choose_course(courses):
8067
if args.course:
@@ -98,44 +85,6 @@ def choose_course(courses):
9885
print('\n[*]Invalid Input\n')
9986
return choose_course(courses)
10087

101-
def download_lectures(url, folder):
102-
course_page = requests.get(url, headers=HEADERS)
103-
extensions = ['.pdf', '.pptx']
104-
for extension in extensions:
105-
links = re.findall('<a href="(.*)">.*' + extension + '</a>', course_page.content.decode())
106-
names = re.findall('<a href=".*">(.*)' + extension + '</a>', course_page.content.decode())
107-
doc = BeautifulSoup(course_page.text, 'html.parser')
108-
y = 0
109-
prev_sub_folder = None
110-
subject_folders_list =[]
111-
for x, link in enumerate(links):
112-
link = link.strip() + extension
113-
subject_folder = find_subject_folder(names[x] + extension, doc)
114-
if subject_folder != prev_sub_folder:
115-
if subject_folder in subject_folders_list:
116-
subject_folder = subject_folder + '-extras'
117-
y = 0
118-
new_name = str(y + 1) + '. ' + names[x] + extension
119-
y += 1
120-
subject_folders_list.append(subject_folder)
121-
prev_sub_folder = subject_folder
122-
file_path = folder + subject_folder + '/' + new_name
123-
if os.path.isfile(file_path):
124-
if new_name.startswith('1.'):
125-
print('\n################ ' + subject_folder + ' ################\n')
126-
print( Fore.MAGENTA + new_name + ' <is already downloaded there XD>' + Fore.RESET)
127-
continue
128-
if not os.path.isdir(folder + subject_folder):
129-
os.makedirs(folder + subject_folder)
130-
print('\n################ ' + subject_folder + ' ################\n')
131-
132-
response = requests.get(link, headers=HEADERS)
133-
with open(file_path, 'wb') as file:
134-
file.write(response.content)
135-
print('[*] Downloaded ' + new_name)
136-
137-
138-
# If not specified, prompt the user to input a folder
13988

14089
def choose_folder():
14190
folder = os.path.expanduser("~") + FOLDER
@@ -144,15 +93,17 @@ def choose_folder():
14493
args.folder = os.path.expanduser(args.folder)
14594
if os.path.isdir(args.folder):
14695
folder = args.folder
96+
if not folder[-1] == os.path.sep:
97+
folder = folder + os.path.sep
14798
return folder
14899
else:
149100
print('\n[*] Folder Not found! ', end='')
150101
quit()
151102
else:
152-
answer = input('[*] Your default destination is ' + folder + "\n[*] Do you want to keep that (Y/n): ")
103+
answer = input('[*] Your default destination is ' + folder + "\n[*] Do you want to keep that (Y/n): ")
153104
if answer == 'n' or answer == 'no' or answer == 'N':
154105
valid_folder = False
155-
while valid_folder == False:
106+
while not valid_folder:
156107
selected_folder = input('\n[*] Enter the Folder you want to save material in.\n\n>> ')
157108
# Adds a seperator at the end if the user didn't
158109
if not selected_folder.endswith(os.path.sep):
@@ -163,41 +114,103 @@ def choose_folder():
163114
valid_folder = True
164115
else:
165116
print('\n[*] Folder Not found! ', end='')
117+
if not folder[-1] == os.path.sep:
118+
folder = folder + os.path.sep
166119
return folder
167120

168-
# Gets the name of the course from the course number, and makes a folder with that name
121+
122+
def create_nav_links_dictionary(soup):
123+
navigate_dict = {}
124+
nav_links = soup.find_all('li', attrs={"class": "nav-item"})
125+
for navigate_link in nav_links:
126+
if navigate_link.h5:
127+
nav_name = navigate_link.h5.text.strip()
128+
nav_number = navigate_link.a.get('aria-controls')
129+
navigate_dict[nav_number] = nav_name
130+
return navigate_dict
131+
169132

170133
def make_course_folder(courses, index, folder):
171134
course_name = None
172135
for course in courses:
173136
if course[2] == index:
174137
course_name = course[1]
175138
break
176-
new_folder = folder + os.path.sep + course_name + os.path.sep
139+
new_folder = folder + course_name + os.path.sep
177140
if not os.path.isdir(new_folder):
178141
os.mkdir(new_folder)
179142
folder = new_folder
180143
return folder
181144

145+
146+
def find_files_paths_and_links(navigation_dict, soup):
147+
file_tags = soup.find_all('a', string=lambda text: text and '.pdf' in text) + soup.find_all('a', string=lambda text: text and '.ppt' in text)
148+
files_list = []
149+
path = []
150+
associated_nav_link_id = ''
151+
for file_tag in file_tags:
152+
current_tag = file_tag
153+
if not current_tag:
154+
print('no pdf or pptx files!')
155+
quit()
156+
while True:
157+
current_tag = current_tag.parent
158+
if current_tag.name == 'div' and 'mb-3' in current_tag.get('class', []):
159+
path.append(current_tag.h6.text.strip())
160+
if current_tag.name == 'div' and 'tab-pane' in current_tag.get('class', []):
161+
associated_nav_link_id = current_tag.get('id')
162+
if not current_tag.parent:
163+
break
164+
path.append(navigation_dict[associated_nav_link_id])
165+
path.reverse()
166+
basename = file_tag.text
167+
file_path = "/".join(path) + os.path.sep
168+
path.clear()
169+
170+
file_link = file_tag.get('href')
171+
files_list.append([file_path, file_link, basename])
172+
return files_list
173+
174+
175+
def download_from_dict(path_link_dict, folder):
176+
for path, link, name in path_link_dict:
177+
178+
if os.path.isfile(folder + path + name):
179+
print(Fore.MAGENTA + path + name + ' <is already downloaded there XD>' + Fore.RESET)
180+
continue
181+
182+
if not os.path.isdir(folder + path):
183+
os.makedirs(folder + path)
184+
185+
response = requests.get(link, headers=HEADERS)
186+
with open(folder + path + name, 'wb') as file:
187+
file.write(response.content)
188+
print('[*] Downloaded ' + name)
189+
190+
182191
def main():
183192
folder = choose_folder()
184193
batch_url = choose_batch()
185194
courses = find_courses(batch_url)
186195
course_number = choose_course(courses)
187196
folder = make_course_folder(courses, course_number, folder)
188197
download_url = 'https://msc-mu.com/courses/' + course_number
189-
download_lectures(download_url, folder)
198+
course_page = requests.get(download_url, headers=HEADERS)
199+
soup = BeautifulSoup(course_page.text, 'html.parser')
200+
201+
nav_dict = create_nav_links_dictionary(soup)
202+
file_dict = find_files_paths_and_links(nav_dict, soup)
203+
download_from_dict(file_dict, folder)
204+
190205

191206
if __name__ == '__main__':
192207
print(Fore.CYAN + '#'*54 + Fore.RESET)
193-
194208
try:
195209
main()
196210
except KeyboardInterrupt:
197211
print(Fore.RED + '\n[*] KeyboardInterrupt')
198212
print(Fore.GREEN + '[*] Good bye!')
199213
quit()
200-
201214
print(Fore.GREEN + '\n\n[*] Done...')
202215
print('[*] Goodbye!')
203216
input('[*] Press anything to' + Fore.RED + ' exit')

0 commit comments

Comments
 (0)