Skip to content

Commit 23a0ceb

Browse files
Merge pull request #2669 from Mihan786Chistie/youtube
added Youtube Video Scraper
2 parents 27f24a8 + 4c7399e commit 23a0ceb

File tree

3 files changed

+124
-0
lines changed

3 files changed

+124
-0
lines changed

YouTube-Scraper/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
## YouTube
2+
3+
### Scrape Video Details
4+
5+
Create an instance of `Video` class.
6+
7+
```python
8+
video = Video(video_url="video_url")
9+
```
10+
11+
| Methods | Details |
12+
| --------------- | ------------------------ |
13+
| `.getDetails()` | Return the video details |

YouTube-Scraper/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
beautifulsoup4
2+
requests
3+
json

YouTube-Scraper/video.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
import json
4+
5+
6+
class Video:
7+
"""
8+
Create an instance of `Video` class.
9+
```python
10+
video = Video(video_url="video_url")
11+
```
12+
13+
| Methods | Details |
14+
| --------------- | ------------------------ |
15+
| `.getDetails()` | Return the video details |
16+
"""
17+
18+
def __init__(self, video_url):
19+
self.video_url = video_url
20+
21+
def getDetails(self):
22+
"""
23+
Class - `Video`
24+
Example:
25+
```
26+
video_data = Video("https://www.youtube.com/watch?v=pBy1zgt0XPc")
27+
video_data.getDetails()
28+
```
29+
Returns:
30+
{
31+
"title": Tile of the video
32+
"description": Description of the video
33+
"views_count": No. of views of the video,
34+
"upload_date": Upload date of the video,
35+
"comment_count": No. of comments,
36+
"channel_name": Name of the channel,
37+
"channel_avatar": Channel avatar,
38+
"subscriber_count": No. of channel subscribers,
39+
"channel_url": Link to the channel
40+
}
41+
"""
42+
url = self.video_url
43+
try:
44+
res = requests.get(url)
45+
soup = BeautifulSoup(res.text, "html.parser")
46+
video_data = {"video_data": []}
47+
48+
scripts = soup.find_all("script")
49+
req_script = scripts[44].text.strip()
50+
script = req_script[20:-1]
51+
data = json.loads(script)
52+
53+
base = data["contents"]["twoColumnWatchNextResults"]["results"]["results"][
54+
"contents"
55+
]
56+
57+
first = base[0]["videoPrimaryInfoRenderer"]
58+
title = (
59+
first["title"]["runs"][0]["text"]
60+
.strip()
61+
.encode("ascii", "ignore")
62+
.decode()
63+
)
64+
views = first["viewCount"]["videoViewCountRenderer"]["viewCount"][
65+
"simpleText"
66+
]
67+
date = first["dateText"]["simpleText"]
68+
69+
channel_data = base[1]["videoSecondaryInfoRenderer"]["owner"][
70+
"videoOwnerRenderer"
71+
]
72+
avatar = channel_data["thumbnail"]["thumbnails"][2]["url"]
73+
name = channel_data["title"]["runs"][0]["text"]
74+
channel_url = channel_data["title"]["runs"][0]["navigationEndpoint"][
75+
"commandMetadata"
76+
]["webCommandMetadata"]["url"]
77+
subs = channel_data["subscriberCountText"]["accessibility"][
78+
"accessibilityData"
79+
]["label"]
80+
81+
desc = (
82+
base[1]["videoSecondaryInfoRenderer"]["attributedDescription"][
83+
"content"
84+
]
85+
.strip()
86+
.encode("ascii", "ignore")
87+
.decode()
88+
)
89+
comment_count = base[2]["itemSectionRenderer"]["contents"][0][
90+
"commentsEntryPointHeaderRenderer"
91+
]["commentCount"]["simpleText"]
92+
93+
video_data["video_data"].append(
94+
{
95+
"title": title,
96+
"description": desc[:200] + "...",
97+
"views_count": views,
98+
"upload_date": date,
99+
"comment_count": comment_count,
100+
"channel_name": name,
101+
"channel_avatar": avatar,
102+
"subscriber_count": subs,
103+
"channel_url": "https://youtube.com" + channel_url,
104+
}
105+
)
106+
return video_data
107+
except:
108+
return None

0 commit comments

Comments
 (0)