Skip to content

Commit f8a19a2

Browse files
real extension config added
1 parent 46e9775 commit f8a19a2

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
FOLDER = '/dox/med'
2-
32
HEADERS = headers = {
43
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (HTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
54
}
65
DECOR = ' \033[34;1m::\033[0m '
6+
EXTENSIONS = ["pdf", "ppt", "m4a"]

scraper.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ def make_course_folder(self, courses, index, folder):
144144

145145

146146
def find_files_paths_and_links(self, navigation_dict, soup):
147-
file_tags = soup.find_all('a', string=lambda text: text and '.pdf' in text) + soup.find_all('a', string=lambda text: text and '.ppt' in text) + soup.find_all('a', string=lambda text: text and '.m4a' in text)
147+
file_tags = []
148+
for extension in EXTENSIONS:
149+
file_tags += soup.find_all('a', string=lambda text: text and f'.{extension}' in text)
148150
files_list = []
149151
path = []
150152
associated_nav_link_id = ''

0 commit comments

Comments
 (0)