Skip to content

Commit 720b959

Browse files
authored
Uploading python file for scrapping
1 parent 417a4ba commit 720b959

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

TOI_Scrapper/TOI_Scrapper.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import pandas as pd
2+
import requests
3+
from bs4 import BeautifulSoup
4+
5+
6+
def scrapper():
7+
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36'}
8+
#accessing TOI webpage disguised as a browser
9+
webpage=requests.get('https://timesofindia.indiatimes.com/',headers=headers).text
10+
11+
soup=BeautifulSoup(webpage,'lxml')
12+
news = []
13+
link_list = []
14+
15+
for i in soup.find_all('div', class_='col_l_6'):
16+
figcaption = i.find('figcaption')
17+
if figcaption is not None:
18+
#finding news headline as well its corresponding link
19+
link_news = i.find('a').get("href")
20+
text_news = figcaption.text.strip()
21+
22+
news.append(text_news)
23+
link_list.append(link_news)
24+
df = pd.DataFrame({'News_Headline': news, 'News_Link': link_list})
25+
return df
26+
27+
TOI_headline = scrapper()
28+
print(TOI_headline)

0 commit comments

Comments
 (0)