Skip to content

Commit b08a24b

Browse files
author
Jeremy Dai
authored
return warning for owner or repo name mismatch (#9)
* return warning * fix
1 parent 1154a5f commit b08a24b

File tree

1 file changed

+83
-56
lines changed

1 file changed

+83
-56
lines changed

scripts/prepare.py

Lines changed: 83 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,18 @@
1414
BATCH_SIZE = 50 # Process repositories in batches of 50 to avoid rate limits
1515
SCHEMA_PATH = Path("mcp-registry/schema/server-schema.json")
1616

17+
1718
def error_exit(message: str) -> None:
1819
"""Print error message and exit with error code"""
1920
print(f"❌ {message}")
2021
sys.exit(1)
2122

23+
2224
def status_message(message: str) -> None:
2325
"""Print status message"""
2426
print(f"🔄 {message}")
2527

28+
2629
def load_schema() -> Dict[str, Any]:
2730
"""Load the JSON schema for validation"""
2831
try:
@@ -35,77 +38,82 @@ def load_schema() -> Dict[str, Any]:
3538
except Exception as e:
3639
error_exit(f"Error reading schema file: {e}")
3740

41+
3842
def load_manifest(manifest_path: Path) -> Dict[str, Any]:
3943
"""Load and parse a manifest file with schema validation"""
4044
try:
4145
with open(manifest_path, 'r') as f:
4246
manifest = json.load(f)
43-
47+
4448
# Get the schema
4549
schema = load_schema()
46-
50+
4751
# Validate against schema (will raise exception if invalid)
4852
try:
4953
jsonschema.validate(instance=manifest, schema=schema)
5054
except jsonschema.exceptions.ValidationError:
5155
# If validation fails, we continue but log a warning
5256
# This allows the site to build even with some schema issues
53-
print(f"⚠️ Warning: {manifest_path} does not fully conform to the schema")
54-
57+
print(
58+
f"⚠️ Warning: {manifest_path} does not fully conform to the schema")
59+
5560
return manifest
5661
except json.JSONDecodeError as e:
5762
error_exit(f"Invalid JSON in {manifest_path}: {e}")
5863
except Exception as e:
5964
error_exit(f"Error reading manifest file {manifest_path}: {e}")
6065

66+
6167
def find_server_manifests(servers_dir: Path) -> List[Path]:
6268
"""Find all server manifest files in the servers directory"""
6369
if not servers_dir.exists() or not servers_dir.is_dir():
6470
error_exit(f"Servers directory not found: {servers_dir}")
65-
71+
6672
server_files = []
6773
for file_path in servers_dir.glob('*.json'):
6874
if file_path.is_file():
6975
server_files.append(file_path)
70-
76+
7177
return server_files
7278

79+
7380
def extract_github_repos(server_manifests: List[Path]) -> Dict[str, str]:
7481
"""Extract GitHub repository URLs from server manifests"""
7582
github_repos = {}
76-
83+
7784
for manifest_path in server_manifests:
7885
server_name = manifest_path.stem # Get filename without extension
7986
manifest = load_manifest(manifest_path)
80-
87+
8188
# Check if manifest has GitHub repository URL
8289
if 'repository' in manifest:
8390
repo_url = manifest['repository']
84-
91+
8592
# Handle both string and dictionary repository formats
8693
if isinstance(repo_url, str) and repo_url.startswith('https://github.com/'):
8794
github_repos[server_name] = repo_url
88-
elif (isinstance(repo_url, dict) and 'url' in repo_url and
89-
isinstance(repo_url['url'], str) and
95+
elif (isinstance(repo_url, dict) and 'url' in repo_url and
96+
isinstance(repo_url['url'], str) and
9097
repo_url['url'].startswith('https://github.com/')):
9198
github_repos[server_name] = repo_url['url']
92-
99+
93100
return github_repos
94101

102+
95103
def fetch_github_stars_batch(repo_urls: List[str]) -> Dict[str, int]:
96104
"""Fetch GitHub stars for multiple repositories using GraphQL API"""
97105
# Get GitHub token from environment variable
98106
github_token = os.environ.get('GITHUB_TOKEN')
99-
107+
100108
# Prepare headers
101109
headers = {
102110
'Content-Type': 'application/json',
103111
}
104-
112+
105113
# Add authorization if token is provided
106114
if github_token:
107115
headers['Authorization'] = f"Bearer {github_token}"
108-
116+
109117
# Extract owner and repo from URLs
110118
repos = []
111119
for url in repo_urls:
@@ -114,20 +122,20 @@ def fetch_github_stars_batch(repo_urls: List[str]) -> Dict[str, int]:
114122
if len(parts) >= 2:
115123
owner, repo = parts[0], parts[1]
116124
repos.append((owner, repo))
117-
125+
118126
if not repos:
119127
return {}
120-
128+
121129
stars = {}
122-
130+
123131
# Process repositories in batches
124132
for batch_start in range(0, len(repos), BATCH_SIZE):
125133
batch = repos[batch_start:batch_start + BATCH_SIZE]
126-
134+
127135
# Construct GraphQL query
128136
query_parts = []
129137
variables = {}
130-
138+
131139
for i, (owner, repo) in enumerate(batch):
132140
query_parts.append(
133141
f"""repo{i}: repository(owner: $owner{i}, name: $repo{i}) {{
@@ -137,142 +145,161 @@ def fetch_github_stars_batch(repo_urls: List[str]) -> Dict[str, int]:
137145
)
138146
variables[f"owner{i}"] = owner
139147
variables[f"repo{i}"] = repo
140-
148+
141149
# Join the query parts with proper line length
142-
variable_defs = ', '.join(f'$owner{i}: String!, $repo{i}: String!'
143-
for i in range(len(batch)))
150+
variable_defs = ', '.join(f'$owner{i}: String!, $repo{i}: String!'
151+
for i in range(len(batch)))
144152
query_body = ' '.join(query_parts)
145-
153+
146154
query = f"""query ({variable_defs}) {{
147155
{query_body}
148156
}}"""
149-
150-
157+
151158
# Send GraphQL request
152159
try:
153160
response = requests.post(
154161
GITHUB_API_URL,
155162
headers=headers,
156163
json={'query': query, 'variables': variables}
157164
)
158-
165+
159166
# Check for errors
160167
if response.status_code != 200:
161168
if response.status_code == 401:
162-
print("⚠️ GitHub API authentication failed. Set GITHUB_TOKEN for higher rate limits.")
169+
print(
170+
"⚠️ GitHub API authentication failed. Set GITHUB_TOKEN for higher rate limits.")
163171
elif response.status_code == 403:
164-
print("⚠️ GitHub API rate limit exceeded. Set GITHUB_TOKEN for higher rate limits.")
172+
print(
173+
"⚠️ GitHub API rate limit exceeded. Set GITHUB_TOKEN for higher rate limits.")
165174
else:
166-
print(f"⚠️ GitHub API request failed: status {response.status_code}")
175+
print(
176+
f"⚠️ GitHub API request failed: status {response.status_code}")
167177
continue
168-
178+
169179
data = response.json()
170-
180+
171181
# Check for GraphQL errors
172182
if 'errors' in data:
173183
print(f"⚠️ GraphQL errors: {data['errors']}")
174184
continue
175-
185+
176186
# Extract star counts
177187
for i, (owner, repo) in enumerate(batch):
178188
repo_key = f"repo{i}"
179189
if repo_key in data['data'] and data['data'][repo_key]:
180190
url = data['data'][repo_key]['url']
181191
star_count = data['data'][repo_key]['stargazerCount']
182192
stars[url] = star_count
183-
193+
if url.startswith('https://github.com/'):
194+
returned_parts = url.replace(
195+
'https://github.com/', '').split('/')
196+
if len(returned_parts) >= 2:
197+
returned_owner, returned_repo = returned_parts[0], returned_parts[1]
198+
if owner != returned_owner:
199+
print(
200+
f"⚠️owner mismatch:: {owner} != {returned_owner}")
201+
if repo != returned_repo:
202+
print(
203+
f"⚠️repo mismatch:: {repo} != {returned_repo}")
204+
184205
except Exception as e:
185206
print(f"⚠️ Error fetching GitHub stars for batch: {e}")
186-
207+
187208
return stars
188209

210+
189211
def get_github_stars(github_repos: Dict[str, str]) -> Dict[str, int]:
190212
"""Fetch GitHub stars for all repositories"""
191213
if not github_repos:
192214
return {}
193-
215+
194216
repo_count = len(github_repos)
195217
status_message(f"Fetching GitHub stars for {repo_count} repositories...")
196-
218+
197219
# Convert dict values to list for batch processing
198220
repo_urls = list(github_repos.values())
199-
221+
200222
# Fetch stars
201223
url_to_stars = fetch_github_stars_batch(repo_urls)
202-
224+
203225
# Map server names to star counts
204226
server_stars = {}
205227
for server_name, repo_url in github_repos.items():
206228
if repo_url in url_to_stars:
207229
server_stars[server_name] = url_to_stars[repo_url]
208-
230+
209231
return server_stars
210232

233+
211234
def generate_servers_json(server_manifests: List[Path], output_path: Path) -> Dict[str, Dict[str, Any]]:
212235
"""Generate servers.json file with server metadata"""
213236
status_message("Generating servers.json...")
214-
237+
215238
servers_data = {}
216-
239+
217240
for manifest_path in server_manifests:
218241
server_name = manifest_path.stem # Get filename without extension
219242
manifest = load_manifest(manifest_path)
220-
243+
221244
# Use the entire manifest as is, preserving all fields
222245
# Ensure the name field at minimum is present
223246
if 'name' not in manifest:
224247
manifest['name'] = server_name
225-
248+
226249
servers_data[server_name] = manifest
227-
250+
228251
# Write servers.json
229252
with open(output_path, 'w') as f:
230253
json.dump(servers_data, f, indent=2)
231-
254+
232255
return servers_data
233256

257+
234258
def generate_stars_json(stars: Dict[str, int], output_path: Path) -> None:
235259
"""Generate stars.json file with GitHub star counts"""
236260
status_message("Generating stars.json...")
237-
261+
238262
# Write stars.json
239263
with open(output_path, 'w') as f:
240264
json.dump(stars, f, indent=2)
241265

266+
242267
def main() -> None:
243268
"""Main function to prepare site data"""
244269
if len(sys.argv) < 3:
245-
error_exit("Usage: prepare.py <source_dir> <target_dir> [--skip-stars]")
246-
270+
error_exit(
271+
"Usage: prepare.py <source_dir> <target_dir> [--skip-stars]")
272+
247273
source_dir = Path(sys.argv[1])
248274
target_dir = Path(sys.argv[2])
249275
skip_stars = "--skip-stars" in sys.argv
250-
276+
251277
# Find server manifests
252278
servers_dir = source_dir / "servers"
253279
server_manifests = find_server_manifests(servers_dir)
254-
280+
255281
if not server_manifests:
256282
error_exit(f"No server manifests found in {servers_dir}")
257-
283+
258284
# Generate servers.json
259285
servers_json_path = target_dir / "api" / "servers.json"
260286
generate_servers_json(server_manifests, servers_json_path)
261-
287+
262288
# Extract GitHub repositories
263289
github_repos = extract_github_repos(server_manifests)
264-
290+
265291
# Generate stars.json (if not skipped)
266292
stars_json_path = target_dir / "api" / "stars.json"
267-
293+
268294
if skip_stars and stars_json_path.exists():
269295
status_message("Skipping GitHub stars fetch as requested.")
270296
else:
271297
# Fetch GitHub stars
272298
stars = get_github_stars(github_repos)
273299
generate_stars_json(stars, stars_json_path)
274-
300+
275301
print("✅ Site preparation completed successfully!")
276302

303+
277304
if __name__ == "__main__":
278305
main()

0 commit comments

Comments
 (0)