Skip to content

Commit c509819

Browse files
Merge pull request #2319 from Swapnil-Singh-99/youtube-scraper
added a youtube channel details scrapper
2 parents fae4035 + dbd6280 commit c509819

File tree

3 files changed

+105
-0
lines changed

3 files changed

+105
-0
lines changed

Youtube_Scraper/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Youtube Scrapper
2+
It is a script that scrapes channel details from youtube and displays it to the user on the terminal
3+
4+
# Installation & Run
5+
`pip install -r requirements.txt`
6+
7+
`python main.py`
8+
9+
# Screenshots
10+
![Screenshot-from-2023-07-22-02-01-53.png](https://i.postimg.cc/cLT8hnCC/Screenshot-from-2023-07-22-02-01-53.png)
11+
![Screenshot-from-2023-07-22-02-03-04.png](https://i.postimg.cc/wT8Jjznq/Screenshot-from-2023-07-22-02-03-04.png)

Youtube_Scraper/main.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
import json
4+
5+
def getAbout(channel_username):
6+
"""
7+
Returns:
8+
```
9+
{
10+
"name": Name of the channel
11+
"description": Description of the channel
12+
"channel_url": Link to the channel
13+
"channel_avatar": Channel avatar
14+
"channel_banner": Channel banner
15+
"subscriber_count": No. of subscribers of the channel
16+
"toal_videos": Total videos uploaded in the channel
17+
"total_views": Total views till date of the channel
18+
"join_date": Date the channel joined YouTube
19+
"country": Country of origin of the channel
20+
"links": Additional links provided from the channel
21+
}
22+
```
23+
"""
24+
url = f"https://www.youtube.com/@{channel_username}/about"
25+
try:
26+
res = requests.get(url)
27+
soup = BeautifulSoup(res.text, "html.parser")
28+
channel_data = {"channel_data": []}
29+
link_data = {"link_data": []}
30+
scripts = soup.find_all("script")
31+
req_script = scripts[35].text.strip()
32+
script = req_script[20:-1]
33+
data = json.loads(script)
34+
35+
metadata = data["metadata"]["channelMetadataRenderer"]
36+
title = metadata["title"]
37+
desc = metadata["description"]
38+
channel_url = metadata["vanityChannelUrl"]
39+
channel_avatar = metadata["avatar"]["thumbnails"][0]["url"]
40+
header = data["header"]["c4TabbedHeaderRenderer"]
41+
channel_banner = header["banner"]["thumbnails"][5]["url"]
42+
subs = header["subscriberCountText"]["simpleText"]
43+
total_videos = header["videosCountText"]["runs"][0]["text"]
44+
45+
baser = data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
46+
for b in baser:
47+
try:
48+
base = b["tabRenderer"]["content"]["sectionListRenderer"][
49+
"contents"
50+
][0]["itemSectionRenderer"]["contents"][0][
51+
"channelAboutFullMetadataRenderer"
52+
]
53+
54+
total_views = base["viewCountText"]["simpleText"]
55+
join_date = base["joinedDateText"]["runs"][1]["text"]
56+
country = base["country"]["simpleText"]
57+
58+
links = base["primaryLinks"]
59+
for i in links:
60+
link_data["link_data"].append(
61+
{
62+
"link_url": i["navigationEndpoint"]["urlEndpoint"][
63+
"url"
64+
],
65+
"link_name": i["title"]["simpleText"],
66+
"link_icon": i["icon"]["thumbnails"][0]["url"],
67+
}
68+
)
69+
except:
70+
pass
71+
72+
channel_data["channel_data"].append(
73+
{
74+
"name": title,
75+
"description": desc,
76+
"channel_url": channel_url,
77+
"channel_avatar": channel_avatar,
78+
"channel_banner": channel_banner,
79+
"subscriber_count": subs,
80+
"toal_videos": total_videos,
81+
"total_views": total_views,
82+
"join_date": join_date,
83+
"country": country,
84+
"links": link_data,
85+
}
86+
)
87+
return channel_data["channel_data"][0]
88+
except:
89+
return None
90+
91+
if __name__ == "__main__":
92+
print(getAbout("BeaBetterDev"))

Youtube_Scraper/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
beautifulsoup4==4.11.1
2+
Requests==2.31.0

0 commit comments

Comments
 (0)