1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import json
4
+
5
+
6
+ class TechCrunch :
7
+ """
8
+ Class - `TechCrunch`
9
+ Example:
10
+ ```
11
+ articles = TechCrunch()
12
+ ```\n
13
+ Methods :\n
14
+ 1. ``.getArticles() | Response - Articles with title, descriptions, images, date and link.
15
+ """
16
+
17
+ def getArticles (self , category ):
18
+ self .category = category
19
+ """
20
+ Class - `TechCrunch`
21
+ Example:
22
+ ```
23
+ articles = TechCrunch()
24
+ articles.getArticles("artificial-intelligence")
25
+ ```
26
+ Returns:
27
+ {
28
+ "title": Tile of the article
29
+ "description": Description of the article
30
+ "image": Image of the article
31
+ "author": Author of the Article
32
+ "date": Date the article was posted
33
+ "link": Link to the article
34
+ }
35
+ """
36
+ url = (
37
+ "https://techcrunch.com/category/" + self .category .replace (" " , "-" ).lower ()
38
+ )
39
+ try :
40
+ res = requests .get (url )
41
+ soup = BeautifulSoup (res .text , "html.parser" )
42
+
43
+ articles_data = {"articles" : []}
44
+
45
+ articles = soup .find_all (
46
+ "div" , class_ = "post-block post-block--image post-block--unread"
47
+ )
48
+ for n in articles :
49
+ name = (
50
+ n .select_one (".post-block__title__link" )
51
+ .getText ()
52
+ .strip ()
53
+ .encode ("ascii" , "ignore" )
54
+ .decode ()
55
+ )
56
+ desc = (
57
+ n .select_one (".post-block__content" )
58
+ .getText ()
59
+ .strip ()
60
+ .encode ("ascii" , "ignore" )
61
+ .decode ()
62
+ )
63
+ img = n .find_all ("img" , src = True )
64
+ image = img [0 ]["src" ]
65
+ author = (
66
+ n .select_one (".river-byline__authors" )
67
+ .getText ()
68
+ .strip ()
69
+ .encode ("ascii" , "ignore" )
70
+ .decode ()
71
+ )
72
+ time = n .find_all ("div" , class_ = "river-byline" )
73
+ date = (
74
+ time [0 ]
75
+ .select_one (".river-byline__time" )
76
+ .getText ()
77
+ .strip ()
78
+ .encode ("ascii" , "ignore" )
79
+ .decode ()
80
+ )
81
+ links = n .find_all ("a" , class_ = "post-block__title__link" , href = True )
82
+ link = links [0 ]["href" ]
83
+ articles_data ["articles" ].append (
84
+ {
85
+ "title" : name ,
86
+ "description" : desc ,
87
+ "image" : image ,
88
+ "author" : author ,
89
+ "date" : date ,
90
+ "link" : link ,
91
+ }
92
+ )
93
+ res_json = json .dumps (articles_data )
94
+ return res_json
95
+ except ValueError :
96
+ error_message = {
97
+ "message" : "Can't fetch any articles from the topic provided."
98
+ }
99
+ ejson = json .dumps (error_message )
100
+ return ejson
101
+
102
+ def search (self , topic ):
103
+ self .topic = topic
104
+ """
105
+ Class - `TechCrunch`
106
+ Example:
107
+ ```
108
+ articles = TechCrunch()
109
+ articles.search("github")
110
+ ```
111
+ Returns:
112
+ {
113
+ "title": Tile of the article
114
+ "description": Description of the article
115
+ "image": Image of the article
116
+ "author": Author of the Article
117
+ "date": Date the article was posted
118
+ "link": Link to the article
119
+ }
120
+ """
121
+ url = "https://search.techcrunch.com/search?p=" + self .topic + "&fr=techcrunch"
122
+ try :
123
+ res = requests .get (url )
124
+ soup = BeautifulSoup (res .text , "html.parser" )
125
+
126
+ articles_data = {"articles" : []}
127
+
128
+ articles = soup .find_all ("li" , class_ = "ov-a mt-0 pt-26 pb-26 bt-dbdbdb" )
129
+ for i in articles :
130
+ name = i .find ("a" , class_ = "fz-20 lh-22 fw-b" ).getText ()
131
+ desc = i .find ("p" , class_ = "fz-14 lh-20 c-777" ).getText ()
132
+ img = i .find ("img" , class_ = "s-img mr-10 s-img-errchk" , src = True )
133
+ image = img ["src" ]
134
+ author = i .find ("span" , class_ = "mr-15" ).getText ()
135
+ date = i .find ("span" , class_ = "pl-15 bl-1-666" ).getText ()
136
+ links = i .find ("a" , class_ = "fz-20 lh-22 fw-b" , href = True )
137
+ link = links ["href" ]
138
+ articles_data ["articles" ].append (
139
+ {
140
+ "title" : name ,
141
+ "description" : desc ,
142
+ "image" : image ,
143
+ "author" : author ,
144
+ "date" : date ,
145
+ "link" : link ,
146
+ }
147
+ )
148
+ return articles_data
149
+ except ValueError :
150
+ error_message = {
151
+ "message" : "Can't fetch any articles from the topic provided."
152
+ }
153
+ ejson = json .dumps (error_message )
154
+ return ejson
0 commit comments