1
+ #===================================================================
2
+ #Install Libraries
3
+ #===================================================================
4
+
5
+ import re
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+ import pprint
9
+ from requests import get
10
+
11
+
12
+
13
+ # https://www.redhat.com/en/contact #email page to scrape
14
+
15
+ #===================================================================
16
+ #Collect HTML w/ Beautiful Soup
17
+ #===================================================================
18
+ target_webpage = input ("Type a target url to scrape the emails from that page: " )
19
+ # # print(target_webpage)
20
+
21
+ target_html = requests .get (target_webpage ).text
22
+ # print(target_html)
23
+
24
+ target_html_parsed = BeautifulSoup (target_html , "lxml" )
25
+ a_tags = target_html_parsed .find_all (['a' ])
26
+ # print(target_html_parsed)
27
+ # print(a_tags)
28
+
29
+ # print(type(a_tags))
30
+
31
+
32
+ # pprint.pprint((target_html_parsed.prettify()))
33
+ # print(target_html_parsed.title.text)
34
+ # print(target_html_parsed.get_text())
35
+
36
+
37
+ #===================================================================
38
+ #
39
+ #===================================================================
40
+
41
+ regex_pattern = '\S+@\S+'
42
+
43
+
44
+ for tag in a_tags :
45
+ a_tag_text = tag .get_text ()
46
+ # print(type(a_tag_text))
47
+ # print("a_tag_text: ", a_tag_text)
48
+ email = re .findall (regex_pattern , a_tag_text )
49
+ print (email )
50
+
51
+
52
+
53
+ #===================================================================
54
+ #Regex Examples
55
+ #===================================================================
56
+
57
+ # email = input("Type email: ")
58
+
59
+ # regex = re.compile(r'([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+')
60
+
61
+
62
+ # def isValid(email):
63
+ # if re.fullmatch(regex, email):
64
+ # print("Valid email")
65
+ # else:
66
+ # print("Invalid email")
67
+
68
+ # isValid(email)
69
+
70
+ # (separator=" ")
0 commit comments