-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmain.py
More file actions
212 lines (182 loc) · 10.2 KB
/
main.py
File metadata and controls
212 lines (182 loc) · 10.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
from bs4 import BeautifulSoup
import requests
import json
from datetime import datetime
import re
def get_testflight_data(url_testflight):
try:
r = requests.get(url_testflight)
soup = BeautifulSoup(r.text, 'html.parser')
# Try to find the app name
name_testflight_tag = soup.find('h3')
if name_testflight_tag:
name_testflight = name_testflight_tag.text.strip()
else:
# Try alternative selectors for the app name
name_testflight_tag = soup.find('h1') or soup.find('h2') or soup.find('title')
if name_testflight_tag:
name_testflight = name_testflight_tag.text.strip()
else:
print(f"Name tag not found for {url_testflight}")
return None
# Try multiple approaches to find the TestFlight URL
testflight_url = None
# Method 1: Look for button with wire:snapshot (new structure)
button_snapshot = soup.find('button', {'wire:snapshot': True})
if button_snapshot:
snapshot_data = button_snapshot.attrs.get('wire:snapshot')
if snapshot_data:
try:
data = json.loads(snapshot_data)
testflight_url = data.get('data', {}).get('url')
except json.JSONDecodeError:
print(f"Failed to parse wire:snapshot JSON data for {url_testflight}")
# Method 2: Look for button with wire:initial-data (old structure)
if not testflight_url:
button_data_attribute = soup.find('button', {'wire:initial-data': True})
if button_data_attribute:
jsondata = button_data_attribute.attrs.get('wire:initial-data')
if jsondata:
try:
data = json.loads(jsondata)
testflight_url = data.get('serverMemo', {}).get('data', {}).get('url')
except json.JSONDecodeError:
print(f"Failed to parse wire:initial-data JSON data for {url_testflight}")
# Method 3: Look for direct TestFlight links
if not testflight_url:
testflight_links = soup.find_all('a', href=lambda x: x and 'testflight.apple.com' in x)
if testflight_links:
testflight_url = testflight_links[0]['href']
# Method 4: Look for any external links that might be TestFlight
if not testflight_url:
external_links = soup.find_all('a', href=lambda x: x and x.startswith('http'))
for link in external_links:
if 'testflight' in link['href'].lower() or 'apple.com' in link['href']:
testflight_url = link['href']
break
if testflight_url:
name_testflight = name_testflight.replace('|', '-')
hashtag_testflights = re.findall(r"\b\w+\b", name_testflight)
hashtag_testflights = " ".join(["#" + hashtag.upper() for hashtag in hashtag_testflights])
return {
'name': name_testflight,
'url': testflight_url,
'hashtags': hashtag_testflights
}
else:
print(f"No TestFlight URL found for {url_testflight}")
return None
except Exception as e:
print(f"Error processing {url_testflight}: {str(e)}")
return None
def main():
txtResult_AvailableTestflight = "Result_BetaAppsAvailable.md"
url = 'https://departures.to/tags'
r = requests.Session()
response = r.get(url)
# Store apps in sets to avoid duplicates
open_apps = set()
full_apps = set()
with open(txtResult_AvailableTestflight, 'w', encoding='utf-8') as txtResult_AvailableTestflight_file:
nowTime = datetime.now().strftime("%d/%m/%Y %I:%M %p")
txtResult_AvailableTestflight_file.write(f"# Beta Apps is available\t[{nowTime}]\n")
txtResult_AvailableTestflight_file.write('| Name | #HASHTAG | \n | --- | --- | \n')
if response.status_code == 200:
page_number = 1
betaapps_open = "mt-1 text-xs font-medium uppercase text-gray-500 dark:text-green-500"
betaapps_full = "mt-1 text-xs font-medium uppercase text-gray-500 dark:text-red-500"
soup = BeautifulSoup(response.text, "html.parser")
div_with_class = soup.find("div", class_="grid sm:grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-4 sm:gap-6")
if not div_with_class:
print("Could not find the main grid div. The website structure might have changed.")
return
for categories in div_with_class.find_all("a", href=True):
linkCategory = categories['href']
category = linkCategory.split('/')[-1].upper()
print(f"==> {category} <==")
txtResult_AvailableTestflight_file.write(f"| => **[{category}]({linkCategory})** <= ||\n")
# Reset page number for each category
page_number = 1
apps_found_in_category = 0
while True:
urlPage = f'{linkCategory}?page={str(page_number)}'
print(f" Processing page {page_number}: {urlPage}")
response = r.get(urlPage)
soup = BeautifulSoup(response.text, 'html.parser')
a_tags = soup.find_all('a', href=lambda x: x and 'departures.to/apps/' in x)
if not a_tags:
print(f" No more app links found on page {page_number}")
break
else:
print(f" Found {len(a_tags)} app links on page {page_number}")
page_has_apps = False
for a_tag in a_tags:
# Look for "open" status (green dot)
appsOpening = a_tag.find_all('p', {"class": betaapps_open})
if appsOpening:
for p_tag in appsOpening:
if 'open' in p_tag.get_text().lower():
# Look for green dot span
green_dot = p_tag.find('span', class_='w-2 h-2 inline-block bg-green-400 rounded-full')
if green_dot:
url_testflight = a_tag["href"]
print(f" Processing OPEN app: {url_testflight}")
result = get_testflight_data(url_testflight)
if result:
# Use TestFlight URL as unique identifier
open_apps.add((result['url'], result['name'], result['hashtags']))
apps_found_in_category += 1
page_has_apps = True
# Look for "full" status (red dot)
appsFull = a_tag.find_all('p', {"class": betaapps_full})
if appsFull:
for p_tag in appsFull:
if 'full' in p_tag.get_text().lower():
# Look for red dot span
red_dot = p_tag.find('span', class_='w-2 h-2 inline-block bg-red-400 rounded-full')
if red_dot:
url_testflight = a_tag["href"]
print(f" Processing FULL app: {url_testflight}")
result = get_testflight_data(url_testflight)
if result:
# Use TestFlight URL as unique identifier
full_apps.add((result['url'], result['name'], result['hashtags']))
apps_found_in_category += 1
page_has_apps = True
# If no apps found on this page, continue to next page
if not page_has_apps:
page_number += 1
else:
# Found apps on this page, continue to next page to find more
page_number += 1
continue
else:
# If we've gone through all pages and found no apps, break
if apps_found_in_category == 0:
print(f" No available apps found in {category}")
else:
print(f" Found {apps_found_in_category} available apps in {category}")
else:
print("Failed to retrieve the web page. Status code:", response.status_code)
# Write separate files for open and full apps
write_sorted_apps("open_apps.md", open_apps, "Open Apps")
write_sorted_apps("full_apps.md", full_apps, "Full Apps")
# Write combined file
write_sorted_apps("all_apps.md", open_apps.union(full_apps), "All Available Apps")
print(f"\nSummary:")
print(f"Open apps found: {len(open_apps)}")
print(f"Full apps found: {len(full_apps)}")
print(f"Total unique apps: {len(open_apps.union(full_apps))}")
def write_sorted_apps(filename, apps_set, title):
"""Write sorted apps to a markdown file"""
nowTime = datetime.now().strftime("%d/%m/%Y %I:%M %p")
with open(filename, 'w', encoding='utf-8') as f:
f.write(f"# {title}\t[{nowTime}]\n")
f.write('| Name | TestFlight URL | Hashtags |\n')
f.write('| --- | --- | --- |\n')
# Sort by app name
sorted_apps = sorted(apps_set, key=lambda x: x[1].lower())
for url, name, hashtags in sorted_apps:
f.write(f"| {name} | [{url}]({url}) | {hashtags} |\n")
if __name__ == "__main__":
main()