Skip to content

Commit ef75414

Browse files
Add files via upload
1 parent 6ba45ff commit ef75414

File tree

5 files changed

+90
-0
lines changed

5 files changed

+90
-0
lines changed

email_filter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def filter_emails(emails, domain):
2+
return {email for email in emails if email.endswith(domain)}

email_saver.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import csv
2+
3+
def save_emails_to_csv(emails, filename):
4+
with open(filename, mode='w', newline='') as file:
5+
writer = csv.writer(file)
6+
for email in emails:
7+
writer.writerow([email])
8+
print(f"E-mails salvos com sucesso em {filename}")

email_scraper.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
import re
4+
import time
5+
6+
class EmailScraper:
7+
def __init__(self, urls_file):
8+
self.urls_file = urls_file
9+
self.email_regex = re.compile(r'''
10+
([a-zA-Z0-9_.+]+
11+
@
12+
[a-zA-Z0-9_.+]+)
13+
''', re.VERBOSE)
14+
15+
def scrape_emails_from_url(self, url):
16+
try:
17+
headers = {'User-Agent': 'Mozilla/5.0'}
18+
response = requests.get(url, headers=headers)
19+
emails = set(self.email_regex.findall(response.text))
20+
return emails
21+
except requests.exceptions.RequestException as e:
22+
print(f"Erro ao acessar {url}: {e}")
23+
return set()
24+
25+
def run(self, output_file):
26+
with open(self.urls_file, 'r') as urls, open(output_file, 'a') as email_file:
27+
for i, url in enumerate(urls, 1):
28+
url = url.strip()
29+
emails = self.scrape_emails_from_url(url)
30+
for email in emails:
31+
email_file.write(f"{email}\n")
32+
print(f"{i}. Processed {url} with {len(emails)} emails found.")
33+
34+
# Uso da classe
35+
# scraper = EmailScraper('urls.txt')
36+
# scraper.run('emails.txt')

main.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from email_scraper import EmailScraper
2+
from email_filter import filter_emails
3+
from email_saver import save_emails_to_csv
4+
5+
def print_banner():
6+
banner = """
7+
██████╗ ███████╗███████╗██████╗ ███╗ ██╗████████╗ █████╗ ██╗ ██╗███████╗
8+
██╔══██╗██╔════╝██╔════╝██╔══██╗████╗ ██║╚══██╔══╝██╔══██╗██║ ██╔╝██╔════╝
9+
██████╔╝█████╗ █████╗ ██████╔╝██╔██╗ ██║ ██║ ███████║█████╔╝ █████╗
10+
██╔═══╝ ██╔══╝ ██╔══╝ ██╔══██╗██║╚██╗██║ ██║ ██╔══██║██╔═██╗ ██╔══╝
11+
██║ ███████╗███████╗██║ ██║██║ ╚████║ ██║ ██║ ██║██║ ██╗███████╗
12+
╚═╝ ╚══════╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
13+
"""
14+
print(f"\033[92m{banner}\033[0m") # 92 é o código para a cor verde no terminal
15+
16+
def main_menu():
17+
print("1. Raspagem de e-mails brasileiros")
18+
print("2. Raspagem de e-mails do Gmail")
19+
print("3. Raspagem de e-mails em massa de arquivo de URLs")
20+
choice = input("Escolha uma opção: ")
21+
return choice
22+
23+
def main():
24+
print_banner()
25+
choice = main_menu()
26+
if choice == '3':
27+
urls_file = input("Digite o caminho do arquivo com as URLs: ")
28+
output_file = input("Digite o nome do arquivo de saída para os e-mails: ")
29+
scraper = EmailScraper(urls_file)
30+
scraper.run(output_file)
31+
else:
32+
url = input("Digite a URL para fazer o scraping de e-mails: ")
33+
scraper = EmailScraper(url)
34+
emails = scraper.scrape_emails_from_url(url)
35+
if choice == '1':
36+
filtered_emails = filter_emails(emails, '.br')
37+
elif choice == '2':
38+
filtered_emails = filter_emails(emails, 'gmail.com')
39+
save_emails_to_csv(filtered_emails, f'emails_{choice}.csv')
40+
41+
if __name__ == "__main__":
42+
main()

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
requests
2+
beautifulsoup4

0 commit comments

Comments
 (0)