Skip to content

Commit 77dcbdb

Browse files
committed
Add initial implementation of status aggregator
This combines Maven and GitHub data into a single enriched JSON dataset.
1 parent 547ade7 commit 77dcbdb

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

status.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/usr/bin/env python
2+
#
3+
# This is free and unencumbered software released into the public domain.
4+
# See the UNLICENSE file for details.
5+
#
6+
# ------------------------------------------------------------------------
7+
# status.py
8+
# ------------------------------------------------------------------------
9+
# Aggregates information for components of the SciJava component collection,
10+
# using multiple sources, including Maven repositories and GitHub.
11+
12+
import json, logging, re
13+
from collections import Counter
14+
from pathlib import Path
15+
16+
import github, maven
17+
18+
# -- Constants --
19+
20+
cache_dir = Path('.cache')
21+
22+
# -- Functions --
23+
24+
def issues_repo(pom):
25+
"""
26+
If this POM record declares GitHub Issues for its issue management,
27+
return the GitHub (org, repo) pair. Otherwise, return (None, None).
28+
"""
29+
if pom['issues'] is None: return None, None
30+
m = re.match('https?://github.com/([^/]+)/([^/]+)/issues', pom['issues'])
31+
if not m: return None, None # does not use GitHub Issues
32+
return m.group(1), m.group(2)
33+
34+
def fetch_issues(orgs):
35+
ghi = github.GitHubIssues()
36+
query = "+".join(f"user:{org}" for org in orgs)
37+
ghi.download(query)
38+
return ghi
39+
40+
def run():
41+
# Get all the juicy details from the Maven metadata.
42+
bom_file = cache_dir / 'maven.json'
43+
if bom_file.is_file():
44+
logging.info(f"Reading Maven metadata from {bom_file}...")
45+
with open(bom_file) as f:
46+
bom = json.loads(f.read())
47+
else:
48+
logging.info("Reading Maven metadata from local repository storage...")
49+
bom = maven.process()
50+
if bom and cache_dir.is_dir():
51+
logging.info(f"Writing Maven metadata to {bom_file}...")
52+
with open(bom_file, "w") as f:
53+
json.dump(bom, f, sort_keys=True, indent=4)
54+
if not bom:
55+
logging.error("This script must be run from the SciJava Maven server,\n"
56+
f"or you must have a {bom_file} with cached metadata.")
57+
sys.exit(1)
58+
59+
# Augment the BOM records with team information.
60+
logging.info("Augmenting BOM with team info...")
61+
for c in bom:
62+
c["team"] = {}
63+
if not c["pom"]: continue
64+
65+
# Populate the team section: map developer roles to list of developer ids.
66+
for dev in c["pom"]["developers"]:
67+
if not "roles" in dev: continue # developer has no roles
68+
if not "id" in dev: continue # developer has no id
69+
for role in dev["roles"]:
70+
if role in c["team"]:
71+
c["team"][role].append(dev["id"])
72+
else:
73+
c["team"][role] = [dev["id"]]
74+
75+
# Augment the BOM records with statistics about issues.
76+
logging.info(f"Cataloging usages of GitHub issues...")
77+
for c in bom:
78+
c["issues"] = None
79+
if not c["pom"]: continue
80+
81+
# Populate a barebones issues section, if component uses GitHub Issues.
82+
org, repo = issues_repo(c["pom"])
83+
if org and repo:
84+
c["issues"] = {"org": org, "repo": repo}
85+
86+
# Compile a list of orgs containing any repository that:
87+
# 1. Uses GitHub Issues; and
88+
# 2. Has any developer with reviewer or support role.
89+
orgs = {c["issues"]["org"] for c in bom \
90+
if c["issues"] and any(role in c["team"] for role in ["reviewer", "support"])}
91+
orgs = list(orgs)
92+
orgs.sort()
93+
94+
# Retrieve all the open issues for those orgs.
95+
logging.info(f"Loading issues for orgs: {orgs}")
96+
ghi = github.GitHubIssues()
97+
issues_file = cache_dir / 'issues.json'
98+
if issues_file.is_file():
99+
logging.info(f"Reading GitHub issues from {issues_file}...")
100+
ghi.load(issues_file)
101+
else:
102+
logging.info("Fetching issues from GitHub...")
103+
ghi = fetch_issues(orgs)
104+
if cache_dir.is_dir():
105+
logging.info(f"Writing GitHub issues to {issues_file}...")
106+
ghi.save(issues_file)
107+
logging.info(f"Retrieved {len(ghi.issues())} issues")
108+
109+
# Augment the BOM records with statistics about issues.
110+
logging.info(f"Augmenting BOM with issues info...")
111+
for c in bom:
112+
if not c["issues"]: continue # Component does not use itHub Issues.
113+
114+
issues = ghi.repo(c["issues"]["org"], c["issues"]["repo"]).issues()
115+
116+
c["issues"].update({
117+
"count": len(issues),
118+
"prs": sum(1 for issue in issues if issue.is_pr),
119+
"drafts": sum(1 for issue in issues if issue.is_draft),
120+
"unscheduled": sum(1 for issue in issues if issue.milestone == 'unscheduled'),
121+
"labels": Counter([label for issue in issues for label in issue.labels]),
122+
"oldest": str(min(issue.created_at for issue in issues)) if issues else None,
123+
"updated": str(max(issue.updated_at for issue in issues)) if issues else None,
124+
"assignees": Counter([assignee for issue in issues for assignee in issue.assignees])
125+
})
126+
127+
print(json.dumps(bom, sort_keys=True, indent=4))
128+
129+
# -- Main --
130+
131+
if __name__ == '__main__':
132+
logging.root.setLevel(logging.INFO)
133+
run()

0 commit comments

Comments
 (0)