File tree Expand file tree Collapse file tree 3 files changed +32
-0
lines changed Expand file tree Collapse file tree 3 files changed +32
-0
lines changed Original file line number Diff line number Diff line change
1
+ # Times of India Scrapper
2
+ The news headlines as well as links for the same are scrapped and output is presented in the form of a dataframe
Original file line number Diff line number Diff line change
1
+ import pandas as pd
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+
6
+ def scrapper ():
7
+ headers = {'User-Agent' :'Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36' }
8
+ #accessing TOI webpage disguised as a browser
9
+ webpage = requests .get ('https://timesofindia.indiatimes.com/' ,headers = headers ).text
10
+
11
+ soup = BeautifulSoup (webpage ,'lxml' )
12
+ news = []
13
+ link_list = []
14
+
15
+ for i in soup .find_all ('div' , class_ = 'col_l_6' ):
16
+ figcaption = i .find ('figcaption' )
17
+ if figcaption is not None :
18
+ #finding news headline as well its corresponding link
19
+ link_news = i .find ('a' ).get ("href" )
20
+ text_news = figcaption .text .strip ()
21
+
22
+ news .append (text_news )
23
+ link_list .append (link_news )
24
+ df = pd .DataFrame ({'News_Headline' : news , 'News_Link' : link_list })
25
+ return df
26
+
27
+ TOI_headline = scrapper ()
28
+ print (TOI_headline )
Original file line number Diff line number Diff line change
1
+ requests
2
+ beautifulsoup4
You can’t perform that action at this time.
0 commit comments