1414BATCH_SIZE = 50 # Process repositories in batches of 50 to avoid rate limits
1515SCHEMA_PATH = Path ("mcp-registry/schema/server-schema.json" )
1616
17+
1718def error_exit (message : str ) -> None :
1819 """Print error message and exit with error code"""
1920 print (f"❌ { message } " )
2021 sys .exit (1 )
2122
23+
2224def status_message (message : str ) -> None :
2325 """Print status message"""
2426 print (f"🔄 { message } " )
2527
28+
2629def load_schema () -> Dict [str , Any ]:
2730 """Load the JSON schema for validation"""
2831 try :
@@ -35,77 +38,82 @@ def load_schema() -> Dict[str, Any]:
3538 except Exception as e :
3639 error_exit (f"Error reading schema file: { e } " )
3740
41+
3842def load_manifest (manifest_path : Path ) -> Dict [str , Any ]:
3943 """Load and parse a manifest file with schema validation"""
4044 try :
4145 with open (manifest_path , 'r' ) as f :
4246 manifest = json .load (f )
43-
47+
4448 # Get the schema
4549 schema = load_schema ()
46-
50+
4751 # Validate against schema (will raise exception if invalid)
4852 try :
4953 jsonschema .validate (instance = manifest , schema = schema )
5054 except jsonschema .exceptions .ValidationError :
5155 # If validation fails, we continue but log a warning
5256 # This allows the site to build even with some schema issues
53- print (f"⚠️ Warning: { manifest_path } does not fully conform to the schema" )
54-
57+ print (
58+ f"⚠️ Warning: { manifest_path } does not fully conform to the schema" )
59+
5560 return manifest
5661 except json .JSONDecodeError as e :
5762 error_exit (f"Invalid JSON in { manifest_path } : { e } " )
5863 except Exception as e :
5964 error_exit (f"Error reading manifest file { manifest_path } : { e } " )
6065
66+
6167def find_server_manifests (servers_dir : Path ) -> List [Path ]:
6268 """Find all server manifest files in the servers directory"""
6369 if not servers_dir .exists () or not servers_dir .is_dir ():
6470 error_exit (f"Servers directory not found: { servers_dir } " )
65-
71+
6672 server_files = []
6773 for file_path in servers_dir .glob ('*.json' ):
6874 if file_path .is_file ():
6975 server_files .append (file_path )
70-
76+
7177 return server_files
7278
79+
7380def extract_github_repos (server_manifests : List [Path ]) -> Dict [str , str ]:
7481 """Extract GitHub repository URLs from server manifests"""
7582 github_repos = {}
76-
83+
7784 for manifest_path in server_manifests :
7885 server_name = manifest_path .stem # Get filename without extension
7986 manifest = load_manifest (manifest_path )
80-
87+
8188 # Check if manifest has GitHub repository URL
8289 if 'repository' in manifest :
8390 repo_url = manifest ['repository' ]
84-
91+
8592 # Handle both string and dictionary repository formats
8693 if isinstance (repo_url , str ) and repo_url .startswith ('https://github.com/' ):
8794 github_repos [server_name ] = repo_url
88- elif (isinstance (repo_url , dict ) and 'url' in repo_url and
89- isinstance (repo_url ['url' ], str ) and
95+ elif (isinstance (repo_url , dict ) and 'url' in repo_url and
96+ isinstance (repo_url ['url' ], str ) and
9097 repo_url ['url' ].startswith ('https://github.com/' )):
9198 github_repos [server_name ] = repo_url ['url' ]
92-
99+
93100 return github_repos
94101
102+
95103def fetch_github_stars_batch (repo_urls : List [str ]) -> Dict [str , int ]:
96104 """Fetch GitHub stars for multiple repositories using GraphQL API"""
97105 # Get GitHub token from environment variable
98106 github_token = os .environ .get ('GITHUB_TOKEN' )
99-
107+
100108 # Prepare headers
101109 headers = {
102110 'Content-Type' : 'application/json' ,
103111 }
104-
112+
105113 # Add authorization if token is provided
106114 if github_token :
107115 headers ['Authorization' ] = f"Bearer { github_token } "
108-
116+
109117 # Extract owner and repo from URLs
110118 repos = []
111119 for url in repo_urls :
@@ -114,20 +122,20 @@ def fetch_github_stars_batch(repo_urls: List[str]) -> Dict[str, int]:
114122 if len (parts ) >= 2 :
115123 owner , repo = parts [0 ], parts [1 ]
116124 repos .append ((owner , repo ))
117-
125+
118126 if not repos :
119127 return {}
120-
128+
121129 stars = {}
122-
130+
123131 # Process repositories in batches
124132 for batch_start in range (0 , len (repos ), BATCH_SIZE ):
125133 batch = repos [batch_start :batch_start + BATCH_SIZE ]
126-
134+
127135 # Construct GraphQL query
128136 query_parts = []
129137 variables = {}
130-
138+
131139 for i , (owner , repo ) in enumerate (batch ):
132140 query_parts .append (
133141 f"""repo{ i } : repository(owner: $owner{ i } , name: $repo{ i } ) {{
@@ -137,142 +145,161 @@ def fetch_github_stars_batch(repo_urls: List[str]) -> Dict[str, int]:
137145 )
138146 variables [f"owner{ i } " ] = owner
139147 variables [f"repo{ i } " ] = repo
140-
148+
141149 # Join the query parts with proper line length
142- variable_defs = ', ' .join (f'$owner{ i } : String!, $repo{ i } : String!'
143- for i in range (len (batch )))
150+ variable_defs = ', ' .join (f'$owner{ i } : String!, $repo{ i } : String!'
151+ for i in range (len (batch )))
144152 query_body = ' ' .join (query_parts )
145-
153+
146154 query = f"""query ({ variable_defs } ) {{
147155 { query_body }
148156 }}"""
149-
150-
157+
151158 # Send GraphQL request
152159 try :
153160 response = requests .post (
154161 GITHUB_API_URL ,
155162 headers = headers ,
156163 json = {'query' : query , 'variables' : variables }
157164 )
158-
165+
159166 # Check for errors
160167 if response .status_code != 200 :
161168 if response .status_code == 401 :
162- print ("⚠️ GitHub API authentication failed. Set GITHUB_TOKEN for higher rate limits." )
169+ print (
170+ "⚠️ GitHub API authentication failed. Set GITHUB_TOKEN for higher rate limits." )
163171 elif response .status_code == 403 :
164- print ("⚠️ GitHub API rate limit exceeded. Set GITHUB_TOKEN for higher rate limits." )
172+ print (
173+ "⚠️ GitHub API rate limit exceeded. Set GITHUB_TOKEN for higher rate limits." )
165174 else :
166- print (f"⚠️ GitHub API request failed: status { response .status_code } " )
175+ print (
176+ f"⚠️ GitHub API request failed: status { response .status_code } " )
167177 continue
168-
178+
169179 data = response .json ()
170-
180+
171181 # Check for GraphQL errors
172182 if 'errors' in data :
173183 print (f"⚠️ GraphQL errors: { data ['errors' ]} " )
174184 continue
175-
185+
176186 # Extract star counts
177187 for i , (owner , repo ) in enumerate (batch ):
178188 repo_key = f"repo{ i } "
179189 if repo_key in data ['data' ] and data ['data' ][repo_key ]:
180190 url = data ['data' ][repo_key ]['url' ]
181191 star_count = data ['data' ][repo_key ]['stargazerCount' ]
182192 stars [url ] = star_count
183-
193+ if url .startswith ('https://github.com/' ):
194+ returned_parts = url .replace (
195+ 'https://github.com/' , '' ).split ('/' )
196+ if len (returned_parts ) >= 2 :
197+ returned_owner , returned_repo = returned_parts [0 ], returned_parts [1 ]
198+ if owner != returned_owner :
199+ print (
200+ f"⚠️owner mismatch:: { owner } != { returned_owner } " )
201+ if repo != returned_repo :
202+ print (
203+ f"⚠️repo mismatch:: { repo } != { returned_repo } " )
204+
184205 except Exception as e :
185206 print (f"⚠️ Error fetching GitHub stars for batch: { e } " )
186-
207+
187208 return stars
188209
210+
189211def get_github_stars (github_repos : Dict [str , str ]) -> Dict [str , int ]:
190212 """Fetch GitHub stars for all repositories"""
191213 if not github_repos :
192214 return {}
193-
215+
194216 repo_count = len (github_repos )
195217 status_message (f"Fetching GitHub stars for { repo_count } repositories..." )
196-
218+
197219 # Convert dict values to list for batch processing
198220 repo_urls = list (github_repos .values ())
199-
221+
200222 # Fetch stars
201223 url_to_stars = fetch_github_stars_batch (repo_urls )
202-
224+
203225 # Map server names to star counts
204226 server_stars = {}
205227 for server_name , repo_url in github_repos .items ():
206228 if repo_url in url_to_stars :
207229 server_stars [server_name ] = url_to_stars [repo_url ]
208-
230+
209231 return server_stars
210232
233+
211234def generate_servers_json (server_manifests : List [Path ], output_path : Path ) -> Dict [str , Dict [str , Any ]]:
212235 """Generate servers.json file with server metadata"""
213236 status_message ("Generating servers.json..." )
214-
237+
215238 servers_data = {}
216-
239+
217240 for manifest_path in server_manifests :
218241 server_name = manifest_path .stem # Get filename without extension
219242 manifest = load_manifest (manifest_path )
220-
243+
221244 # Use the entire manifest as is, preserving all fields
222245 # Ensure the name field at minimum is present
223246 if 'name' not in manifest :
224247 manifest ['name' ] = server_name
225-
248+
226249 servers_data [server_name ] = manifest
227-
250+
228251 # Write servers.json
229252 with open (output_path , 'w' ) as f :
230253 json .dump (servers_data , f , indent = 2 )
231-
254+
232255 return servers_data
233256
257+
234258def generate_stars_json (stars : Dict [str , int ], output_path : Path ) -> None :
235259 """Generate stars.json file with GitHub star counts"""
236260 status_message ("Generating stars.json..." )
237-
261+
238262 # Write stars.json
239263 with open (output_path , 'w' ) as f :
240264 json .dump (stars , f , indent = 2 )
241265
266+
242267def main () -> None :
243268 """Main function to prepare site data"""
244269 if len (sys .argv ) < 3 :
245- error_exit ("Usage: prepare.py <source_dir> <target_dir> [--skip-stars]" )
246-
270+ error_exit (
271+ "Usage: prepare.py <source_dir> <target_dir> [--skip-stars]" )
272+
247273 source_dir = Path (sys .argv [1 ])
248274 target_dir = Path (sys .argv [2 ])
249275 skip_stars = "--skip-stars" in sys .argv
250-
276+
251277 # Find server manifests
252278 servers_dir = source_dir / "servers"
253279 server_manifests = find_server_manifests (servers_dir )
254-
280+
255281 if not server_manifests :
256282 error_exit (f"No server manifests found in { servers_dir } " )
257-
283+
258284 # Generate servers.json
259285 servers_json_path = target_dir / "api" / "servers.json"
260286 generate_servers_json (server_manifests , servers_json_path )
261-
287+
262288 # Extract GitHub repositories
263289 github_repos = extract_github_repos (server_manifests )
264-
290+
265291 # Generate stars.json (if not skipped)
266292 stars_json_path = target_dir / "api" / "stars.json"
267-
293+
268294 if skip_stars and stars_json_path .exists ():
269295 status_message ("Skipping GitHub stars fetch as requested." )
270296 else :
271297 # Fetch GitHub stars
272298 stars = get_github_stars (github_repos )
273299 generate_stars_json (stars , stars_json_path )
274-
300+
275301 print ("✅ Site preparation completed successfully!" )
276302
303+
277304if __name__ == "__main__" :
278305 main ()
0 commit comments