Skip to content

Commit ac38a1f

Browse files
Merge pull request #2326 from Juhibhojani/master
GeeksforGeeks Scrapper
2 parents d496483 + 11e3dac commit ac38a1f

File tree

3 files changed

+219
-0
lines changed

3 files changed

+219
-0
lines changed

GeeksforGeeks-Scrapper/gfg.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
from bs4 import BeautifulSoup
2+
from selenium import webdriver
3+
from selenium.common.exceptions import WebDriverException, NoSuchElementException
4+
from webdriver_manager.chrome import ChromeDriverManager
5+
from selenium.webdriver.chrome.service import Service
6+
from selenium.webdriver.support.ui import WebDriverWait
7+
8+
class geeksforgeeks:
9+
service = Service(ChromeDriverManager().install())
10+
driver = webdriver.Chrome(service=service)
11+
wait = WebDriverWait(driver, 100)
12+
# using seleinum to access html content
13+
url = f"https://practice.geeksforgeeks.org/courses?utm_source=geeksforgeeks&utm_medium=main_header&utm_campaign=courses"
14+
driver.get(url)
15+
html = driver.page_source
16+
soup = BeautifulSoup(html, "html.parser")
17+
18+
def get_popular_now(self):
19+
"""
20+
Fetches popular now courses and related information from gfg portal
21+
22+
:return: datatype : dictionary containing:
23+
-> Name : Name of courses
24+
-> Rating : Rating of courses
25+
-> Interested : Number of people interested
26+
-> Price : Price of given course
27+
"""
28+
try:
29+
popular_now = geeksforgeeks.soup.find(
30+
"div",
31+
{
32+
"class": "ui cards courseListingPage_cardLayout__multW courseListingPage_toggleCourseCards__pWBVA"
33+
},
34+
)
35+
name = []
36+
rating = []
37+
interested = []
38+
price = []
39+
40+
for items in popular_now.find_all(
41+
"a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}
42+
):
43+
course_name = items.find(
44+
"h4",
45+
{
46+
"class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"
47+
},
48+
)
49+
name.append(course_name.text)
50+
rating_geek = items.find("span", {"class": "urw-din"})
51+
if not rating_geek:
52+
rating_geek = "Information not available"
53+
else:
54+
rating_geek = rating_geek.text
55+
rating.append(rating_geek)
56+
interseted_geeks = items.find(
57+
"div",
58+
{
59+
"class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"
60+
},
61+
)
62+
interested.append(interseted_geeks.text.split(" ")[0])
63+
course_price = items.find(
64+
"p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}
65+
)
66+
price.append(course_price.text)
67+
68+
course_popular_now = dict(
69+
{
70+
"Name": name,
71+
"Rating": rating,
72+
"Interested": interested,
73+
"Price": price,
74+
}
75+
)
76+
return {
77+
"data": course_popular_now,
78+
"message": "Popular Courses are now fetched",
79+
}
80+
except (WebDriverException, NoSuchElementException) as e:
81+
raise Exception(f"An error occurred while scraping popular courses: {str(e)}")
82+
83+
def get_self_paced(self):
84+
"""
85+
Fetches self-paced courses and related information from gfg portal
86+
87+
:return: datatype : dictionary containing:
88+
-> Name : Name of courses
89+
-> Rating : Rating of courses
90+
-> Interested : Number of people interested
91+
-> Price : Price of given course
92+
"""
93+
try:
94+
self_paced = geeksforgeeks.soup.find(
95+
"div",
96+
{
97+
"class": "ui cards courseListingPage_cardLayout__multW courseListingPage_courseCardsGrid__VYBzZ"
98+
},
99+
)
100+
name = []
101+
rating = []
102+
interested = []
103+
price = []
104+
for items in self_paced.find_all(
105+
"a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}
106+
):
107+
course_name = items.find(
108+
"h4",
109+
{
110+
"class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"
111+
},
112+
)
113+
name.append(course_name.text)
114+
course_rating = items.find("div", {"class": "courseListingPage_courseCardContentsGrid__jk3VM"}).find("span",{"class":"urw-din"})
115+
if not course_rating:
116+
course_rating = "Information not available"
117+
else:
118+
course_rating = course_rating.text
119+
rating.append(course_rating)
120+
course_interseted = items.find(
121+
"div",
122+
{
123+
"class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"
124+
},
125+
)
126+
interested.append(course_interseted.text.split(" ")[0])
127+
course_price = items.find(
128+
"p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}
129+
)
130+
price.append(course_price.text)
131+
132+
course_self_paced = dict(
133+
{
134+
"Name": name,
135+
"Rating": rating,
136+
"Interested": interested,
137+
"Price": price,
138+
}
139+
)
140+
return {
141+
"data": course_self_paced,
142+
"message": "Self paced Courses are now fetched",
143+
}
144+
except (WebDriverException, NoSuchElementException) as e:
145+
raise Exception(f"An error occurred while scraping popular courses: {str(e)}")
146+
147+
def get_live_course(self):
148+
"""
149+
Fetches self-paced courses and related information from gfg portal
150+
151+
:return: datatype : dictionary containing:
152+
-> Name : Name of courses
153+
-> Rating : Rating of courses
154+
-> Interested : Number of people interested
155+
-> Price : Price of given course
156+
"""
157+
try:
158+
live = geeksforgeeks.soup.find(
159+
"div", {"class": "g-mt-8"}
160+
).next_sibling.next_sibling.next_sibling
161+
name = []
162+
rating = []
163+
interested = []
164+
price = []
165+
for item in live.find_all(
166+
"a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}
167+
):
168+
course_name = item.find(
169+
"h4",
170+
{
171+
"class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"
172+
},
173+
)
174+
name.append(course_name.text)
175+
course_rating = item.find("div", {"class": "meta"})
176+
if not course_rating:
177+
course_rating = "Information not available"
178+
else:
179+
course_rating = course_rating.text
180+
rating.append(course_rating)
181+
course_interseted = item.find(
182+
"div",
183+
{
184+
"class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"
185+
},
186+
)
187+
interested.append(course_interseted.text.split(" ")[0])
188+
course_price = item.find(
189+
"p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}
190+
)
191+
if not course_price:
192+
course_price = "0"
193+
else:
194+
course_price = course_price.text
195+
price.append(course_price)
196+
197+
course_live = dict(
198+
{
199+
"Name": name,
200+
"Rating": rating,
201+
"Interested": interested,
202+
"Price": price,
203+
}
204+
)
205+
return {
206+
"data": course_live,
207+
"message": "Live Courses are now fetched",
208+
}
209+
except (WebDriverException, NoSuchElementException) as e:
210+
raise Exception(f"An error occurred while scraping popular courses: {str(e)}")
211+

GeeksforGeeks-Scrapper/readme.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Geeks for Geeks Scrapper
2+
3+
Scraps the courses in 3 different categories when the method is called. Returns data in form of JSON
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
requests==2.28.2
2+
beautifulsoup4==4.11.1
3+
selenium==4.9.1
4+
undetected-chromedriver==3.5.0
5+
webdriver_manager

0 commit comments

Comments
 (0)