-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmtg.py
More file actions
90 lines (76 loc) · 3.29 KB
/
mtg.py
File metadata and controls
90 lines (76 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import requests
from bs4 import BeautifulSoup
from discordFunc import process_sets
import os
try:
import cloudscraper
USE_CLOUDSCRAPER = True
except ImportError:
USE_CLOUDSCRAPER = False
newSets = [] # List to store new sets that are found
oldSets = [] # List to store previously seen sets
allSets = set() # Set to store all unique sets (including new and old)
# Get the directory of this file for proper path handling
script_dir = os.path.dirname(os.path.abspath(__file__))
# Load the list of all sets from the 'all_sets.txt' file to keep track of the sets
with open(os.path.join(script_dir, "all_sets.txt"), "r") as f:
allSets = set(line.strip() for line in f.readlines())
# Load old sets (sets that were previously seen) from the 'old_sets.txt' file
with open(os.path.join(script_dir, "old_sets.txt"), "r") as f:
oldSets = [line.strip() for line in f.readlines()]
# Scrape the website to find new sets
url = "https://www.magicspoiler.com/mtg-spoilers/"
# Use cloudscraper if available (handles Cloudflare protection)
if USE_CLOUDSCRAPER:
scraper = cloudscraper.create_scraper(
browser={
'browser': 'firefox',
'platform': 'windows',
'desktop': True
}
)
response = scraper.get(url)
else:
# Fallback to requests with browser-like headers
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Sec-GPC': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Priority': 'u=0, i',
'TE': 'trailers',
}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# Extract new sets from the website by checking the appropriate HTML structure
for a_tag in soup.find_all('a', href=True):
# Check if the anchor contains a div with class "upcoming-set" (indicating a new set)
upcoming_set_div = a_tag.find('div', class_='upcoming-set')
if upcoming_set_div:
href = a_tag['href']
# Extract the set ID by splitting the URL at '/' and taking the second-to-last part
parts = href.strip('/').split('/')
set_id = parts[-1] # The second-to-last part is the set ID
# Only add the set if it is not already in the "oldSets" list
if set_id not in oldSets:
# Add the set to the list of sets if it hasn't been processed before
allSets.add(set_id) # Add the new set to the allSets collection to keep track
for set in allSets:
if set not in oldSets:
newSets.append(set)
# Update the 'all_sets.txt' file with the latest set data
# This ensures that we keep track of all the sets we've seen
with open(os.path.join(script_dir, "all_sets.txt"), "w") as f:
for item in allSets:
f.write(item + '\n')
# Process the newly found sets by passing them to the discordFunc process_sets function
import asyncio
asyncio.run(process_sets(newSets))