Skip to content

Commit 3841e0e

Browse files
committed
misc update and tidelift scraping
1 parent 634a083 commit 3841e0e

File tree

2 files changed

+114
-3
lines changed

2 files changed

+114
-3
lines changed

tools/private_sec_report.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,18 @@ async def get_private_report(session, org, repo):
7272
) as repo_response:
7373
repo_info = await repo_response.json()
7474
archived = repo_info.get("archived", False)
75+
private = repo_info.get("private", False)
7576
async with session.get(private_report_url, headers=headers) as response:
7677
if response.status == 200:
77-
return org, repo, (await response.json()).get("enabled", False), archived
78+
return (
79+
org,
80+
repo,
81+
(await response.json()).get("enabled", False),
82+
archived,
83+
private,
84+
)
7885
else:
79-
return org, repo, False, archived
86+
return org, repo, False, archived, private
8087

8188

8289
async def main():
@@ -90,14 +97,16 @@ async def main():
9097

9198
results = await asyncio.gather(*tasks)
9299
prev_org = None
93-
for org, repo, enabled, archived in results:
100+
for org, repo, enabled, archived, private in results:
94101
if org != prev_org:
95102
print()
96103
print(f"[bold]{org}[/bold]")
97104
prev_org = org
98105
if enabled:
99106
print(f" [green]{repo}: {enabled}[/green]")
100107
else:
108+
if private:
109+
print(f" [yellow]{org}/{repo}: {enabled} (private)[/yellow]")
101110
if archived:
102111
print(f" [yellow]{org}/{repo}: {enabled} (archived)[/yellow]")
103112
elif f"{org}/{repo}" in ignore_repos:

tools/tide.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import requests
2+
from rich import print
3+
from bs4 import BeautifulSoup
4+
import sys
5+
6+
7+
def get_packages(url):
8+
# Send a GET request to the webpage with a custom user agent
9+
headers = {"User-Agent": "python/request/jupyter"}
10+
response = requests.get(url, headers=headers, allow_redirects=True)
11+
12+
if response.status_code != 200:
13+
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
14+
exit(1)
15+
16+
# Parse the HTML content
17+
soup = BeautifulSoup(response.content, "html.parser")
18+
19+
# Find all <h3> tags and accumulate their text in a list
20+
h3_tags = [h3.get_text(strip=True) for h3 in soup.find_all("h3")]
21+
22+
# Sort the list of <h3> contents
23+
h3_tags.sort()
24+
25+
if not h3_tags:
26+
print("No packages found")
27+
exit(1)
28+
return h3_tags
29+
30+
31+
# Print the sorted list
32+
33+
34+
def get_tidelift_data(packages):
35+
36+
packages_data = [{"platform": "pypi", "name": h3} for h3 in packages]
37+
38+
data = {"packages": packages_data}
39+
res = requests.post("https://tidelift.com/api/depci/estimate/bulk_estimates", json=data)
40+
41+
res.raise_for_status()
42+
43+
# Collecting all package data for aligned printing
44+
package_data = []
45+
response_data = res.json()
46+
47+
for package in response_data:
48+
name = package["name"]
49+
lifted = package["lifted"]
50+
estimated_money = package["estimated_money"]
51+
package_data.append((name, lifted, estimated_money))
52+
53+
package_names = {p["name"] for p in response_data}
54+
for package in packages:
55+
if package not in package_names:
56+
package_data.append((package, None, None))
57+
58+
59+
# Print the collected data in aligned columns
60+
from rich.columns import Columns
61+
from rich.table import Table
62+
63+
# Create a table for aligned output
64+
table = Table(show_header=True, header_style="bold magenta")
65+
table.add_column("Package Name")
66+
table.add_column("Estimated Money")
67+
table.add_column("Lifted")
68+
69+
def maybefloat(x):
70+
if x is None:
71+
return 0
72+
try:
73+
return float(x)
74+
except TypeError:
75+
return 0
76+
77+
package_data.sort(
78+
key=lambda x: (x[1] is None, x[1], -maybefloat(x[2]), x[0])
79+
) # sort lifted True first, then None, then False, then amount, then by name
80+
for name, lifted, estimated_money in package_data:
81+
if lifted:
82+
table.add_row(name, "-- need login ––", f"[green]{lifted}[/green]")
83+
else:
84+
table.add_row(name, str(estimated_money), f"[red]{lifted}[/red]")
85+
86+
print(table)
87+
88+
if __name__ == "__main__":
89+
# URL of the webpage
90+
if sys.argv[1] == "--org":
91+
url = f"https://pypi.org/org/{sys.argv[2]}/"
92+
packages = get_packages(url)
93+
elif sys.argv[1] == "--user":
94+
url = f"https://pypi.org/user/{sys.argv[2]}/"
95+
packages = get_packages(url)
96+
elif sys.argv[1] == "--packages":
97+
packages = sys.argv[2:]
98+
else:
99+
print("Invalid argument. Please use either --org ORG or --user USER")
100+
exit(1)
101+
102+
get_tidelift_data(packages)

0 commit comments

Comments
 (0)