-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathroles.py
More file actions
117 lines (90 loc) · 3.51 KB
/
roles.py
File metadata and controls
117 lines (90 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from job import JobPostingSimplify
import base64
import requests
import shutil
from loguru import logger
import os
import json
from dotenv import load_dotenv
# Load environment variables
_ = load_dotenv()
# Constants
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
REPO_ROUTE = "SimplifyJobs/Summer2025-Internships"
LISTING_URL = f"https://api.github.com/repos/{REPO_ROUTE}/contents/.github/scripts/listings.json"
LISTING_PATH = "listings.json"
PREVIOUS_LISTING_PATH = "previous_listings.json"
# By default, we ignore existing job posts that
# are made active after a period of inactivity
#
# Only fresh posts are included
INCLUDE_REPOSTS = False
def parse_file(path: str) -> list[JobPostingSimplify]:
"""
Parse JSON file to `list[JobPostingSimplify]`
"""
with open(path, "r") as f:
data = list([JobPostingSimplify(**x) for x in json.load(f)])
return data
def pull_data() -> list[JobPostingSimplify]:
"""
Retrieve updated `JobPostingSimplify` list from GitHub,
fully authenticated and without using raw.githubusercontent.com,
so we get the higher API rate limits.
"""
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") or "YOUR_PERSONAL_ACCESS_TOKEN"
if not GITHUB_TOKEN:
raise RuntimeError("No GitHub token found in environment or code.")
# 1) GET file metadata via the 'contents' endpoint
# This remains on api.github.com, so we see X-RateLimit-* headers and stay authenticated.
headers = {
"Authorization": f"Bearer {GITHUB_TOKEN}",
"Accept": "application/vnd.github.v3+json",
}
# Optional: If we want to handle ETags (conditional requests),
# we could add:
# headers["If-None-Match"] = 'W/"some-previous-etag"'
# or handle them after this request.
resp = requests.get(LISTING_URL, headers=headers)
resp.raise_for_status()
info = resp.json()
file_sha = info["sha"]
blob_url = f"https://api.github.com/repos/{REPO_ROUTE}/git/blobs/{file_sha}"
blob_resp = requests.get(blob_url, headers=headers)
blob_resp.raise_for_status()
blob_info = blob_resp.json()
decoded_bytes = base64.b64decode(blob_info["content"])
# Write the JSON to a local file
with open(LISTING_PATH, "wb") as f:
_ = f.write(decoded_bytes)
# Now parse the local JSON
return parse_file(LISTING_PATH)
def get_new_roles() -> list[JobPostingSimplify]:
"""
Retrieve new or newly active roles
"""
# Pull new data
new_data: list[JobPostingSimplify] = pull_data()
new_roles: list[JobPostingSimplify] = []
# Check if PREVIOUS_LISTING_PATH exists
# If not, initialize and return empty
if not os.path.exists(PREVIOUS_LISTING_PATH):
logger.info(f"File {PREVIOUS_LISTING_PATH} not found. Initializing file..")
shutil.copy(LISTING_PATH, PREVIOUS_LISTING_PATH)
return new_roles
# Build a dictionary of old roles keyed by their ID
old_dict = {job.id: job for job in parse_file(PREVIOUS_LISTING_PATH)}
# Iterate over new postings
for new_post in new_data:
old_post = old_dict.get(new_post.id)
# If job didn't exist before, it's new
if not old_post:
new_roles.append(new_post)
# Otherwise, check if it just became active
else:
# If old was inactive, but new is active, it's newly active
if INCLUDE_REPOSTS and (not old_post.active and new_post.active):
new_roles.append(new_post)
# Update previous roles file
shutil.copy(LISTING_PATH, PREVIOUS_LISTING_PATH)
return new_roles