Skip to content

Commit 00c1297

Browse files
authored
Support non GitHub repo hosts, ignore metrics (#325)
* Support non GitHub repo hosts, ignore metrics * Fix parsing * Update changelog * lint
1 parent 3d296b2 commit 00c1297

File tree

8 files changed

+157
-14
lines changed

8 files changed

+157
-14
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ See [GitHub releases](https://github.com/pyOpenSci/pyosMeta/releases) page for a
66

77
## [Unreleased]
88

9+
* Feature: allow non-GitHub repository hosts and establish a pattern for collecting repository metrics (@banesullivan, #325)
10+
911
## [v1.7.6] - 2025-10-10
1012
* Bug fix: handle markdown styling in issue template for package submission
1113
* Feature: allow empty reviewers for in issue template for package submission

src/pyosmeta/cli/process_reviews.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def main():
5353
# Contrib count is only available via rest api
5454
logger.info("Getting GitHub metrics for all packages...")
5555
repo_paths = process_review.get_repo_paths(accepted_reviews)
56-
all_reviews = github_api.get_gh_metrics(repo_paths, accepted_reviews)
56+
all_reviews = github_api.get_metrics(repo_paths, accepted_reviews)
5757

5858
with open("all_reviews.pickle", "wb") as f:
5959
pickle.dump(all_reviews, f)

src/pyosmeta/github_api.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from dotenv import load_dotenv
1919

2020
from pyosmeta.models import ReviewModel
21+
from pyosmeta.models.base import RepositoryHost
2122

2223
from .logging import logger
2324

@@ -181,7 +182,7 @@ def _get_response_rest(self, url: str) -> list[dict[str, Any]]:
181182

182183
return results
183184

184-
def get_gh_metrics(
185+
def get_metrics(
185186
self,
186187
endpoints: dict[dict[str, str]],
187188
reviews: dict[str, ReviewModel],
@@ -204,7 +205,15 @@ def get_gh_metrics(
204205
"""
205206

206207
for pkg_name, owner_repo in endpoints.items():
207-
reviews[pkg_name].gh_meta = self.get_repo_meta(owner_repo)
208+
review = reviews[pkg_name]
209+
if review.repository_host == RepositoryHost.github:
210+
reviews[pkg_name].gh_meta = self.get_repo_meta_github(
211+
owner_repo
212+
)
213+
else:
214+
logger.warning(
215+
f"Unsupported repository host for {pkg_name}: {review.repository_host}"
216+
)
208217

209218
return reviews
210219

@@ -363,7 +372,7 @@ def _get_metrics_graphql(
363372
)
364373
return None
365374

366-
def get_repo_meta(
375+
def get_repo_meta_github(
367376
self, repo_info: dict[str, str]
368377
) -> dict[str, Any] | None:
369378
"""Get GitHub metrics from the GitHub GraphQL API for a repository.
@@ -392,6 +401,11 @@ def get_repo_meta(
392401

393402
return metrics
394403

404+
def get_repo_meta_gitlab(
405+
self, repo_info: dict[str, str]
406+
) -> dict[str, Any] | None:
407+
raise NotImplementedError
408+
395409
def get_user_info(
396410
self, gh_handle: str, name: Optional[str] = None
397411
) -> dict[str, Union[str, Any]]:

src/pyosmeta/models/base.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Field,
1616
field_serializer,
1717
field_validator,
18+
model_validator,
1819
)
1920

2021
from pyosmeta.logging import logger
@@ -32,6 +33,56 @@ class Partnerships(str, Enum):
3233
pangeo = "pangeo"
3334

3435

36+
class RepositoryHost(str, Enum):
37+
other = "other"
38+
github = "github"
39+
gitlab = "gitlab"
40+
codeberg = "codeberg"
41+
42+
@classmethod
43+
def from_url(cls, url: str) -> "RepositoryHost":
44+
"""Determine the repository host from a given URL.
45+
46+
Parameters
47+
----------
48+
url : str
49+
The URL of the repository.
50+
51+
Returns
52+
-------
53+
RepositoryHost
54+
The corresponding RepositoryHost enum value.
55+
"""
56+
if "github.com" in url:
57+
return cls.github
58+
elif "gitlab.com" in url:
59+
return cls.gitlab
60+
elif "codeberg.org" in url:
61+
return cls.codeberg
62+
else:
63+
return cls.other
64+
65+
def parse_url(self, url: str) -> tuple[str, str]:
66+
"""Parse the URL to extract the repository host name.
67+
68+
Parameters
69+
----------
70+
url : str
71+
The URL of the repository.
72+
73+
Returns
74+
-------
75+
tuple[str, str]
76+
The owner and repository name.
77+
"""
78+
match = re.match(r"https?://[^/]+/([^/]+)/([^/]+)", url)
79+
if match:
80+
owner, repo = match.group(1), match.group(2)
81+
return owner, repo
82+
else:
83+
raise ValueError(f"Could not parse owner/repo from URL: {url}")
84+
85+
3586
class UrlValidatorMixin:
3687
"""A mixin to validate classes that are of the same type across
3788
several models.
@@ -252,6 +303,7 @@ class ReviewModel(BaseModel):
252303
all_current_maintainers: list[ReviewUser] = Field(default_factory=list)
253304
# Support presubmissions with an alias
254305
repository_link: str = Field(..., alias="repository_link_(if_existing)")
306+
repository_host: RepositoryHost = Field(default=None)
255307
version_submitted: Optional[str] = None
256308
categories: Optional[list[str]] = None
257309
editor: ReviewUser | list[ReviewUser] | None = None
@@ -273,6 +325,15 @@ class ReviewModel(BaseModel):
273325
labels: list[str] = Field(default_factory=list)
274326
active: bool = True # To indicate if package is maintained or archived
275327

328+
@model_validator(mode="after")
329+
def set_repository_host_from_link(self):
330+
"""Set repository_host based on repository_link if not already set."""
331+
if self.repository_host is None and self.repository_link:
332+
self.repository_host = RepositoryHost.from_url(
333+
self.repository_link
334+
)
335+
return self
336+
276337
@field_validator(
277338
"date_accepted",
278339
mode="before",
@@ -388,6 +449,7 @@ def listify(cls, item: Any):
388449
return item
389450

390451
@field_validator("labels", mode="before")
452+
@classmethod
391453
def extract_label(cls, labels: list[str | Labels]) -> list[str]:
392454
"""
393455
Get just the ``name`` from the Labels model, if given

src/pyosmeta/parse_issues.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from tqdm.contrib.logging import logging_redirect_tqdm
99

1010
from pyosmeta.models import ReviewModel, ReviewUser
11+
from pyosmeta.models.base import RepositoryHost
1112
from pyosmeta.models.github import Issue, Labels, LabelType
1213

1314
from .github_api import GitHubAPI
@@ -388,16 +389,8 @@ def get_repo_paths(
388389
all_repos = {}
389390
for a_package in review_issues.keys():
390391
repo_url = review_issues[a_package].repository_link
391-
# for now skip if it's a gitlab repo
392-
if "gitlab" in repo_url:
393-
continue
394-
owner, repo = (
395-
repo_url.replace("https://github.com/", "")
396-
.replace("https://www.github.com/", "")
397-
.rstrip("/")
398-
.split("/", 1)
399-
)
400-
392+
host = RepositoryHost(review_issues[a_package].repository_host)
393+
owner, repo = host.parse_url(repo_url)
401394
all_repos[a_package] = {"owner": owner, "repo_name": repo}
402395
return all_repos
403396

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Submitting Author: Fakename (@fakeauthor)
2+
All current maintainers: (@fakeauthor1, @fakeauthor2)
3+
Package Name: fake_package
4+
One-Line Description of Package: A fake python package
5+
Repository Link: https://github.com/fakeauthor1/fake_package
6+
Version submitted: v1.0.0
7+
EiC: @fakeeic1
8+
Editor: @fakeeditor1
9+
Reviewers: @fakereviewer1 , @fakereviewer2, @fakereviewer3
10+
Reviews Expected By: fake date
11+
Archive: [![DOI](https://example.com/fakearchive)](https://zenodo.org/records/8415866)
12+
JOSS DOI: [![DOI](https://example.com/fakearchive)](https://doi.org/10.21105/joss.01450)
13+
Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive))
14+
Date accepted (month/day/year): 06/29/2024
15+
16+
---
17+
18+
## Scope
19+
20+
- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.
21+
- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].
22+
(etc)
23+
24+
## Community Partnerships
25+
26+
- [ ] etc
27+
- [ ] aaaaaa
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Submitting Author: Fakename (@fakeauthor)
2+
All current maintainers: (@fakeauthor1, @fakeauthor2)
3+
Package Name: fake_package
4+
One-Line Description of Package: A fake python package
5+
Repository Link: https://gitlab.com/fakeauthor1/fake_package
6+
Version submitted: v1.0.0
7+
EiC: @fakeeic1
8+
Editor: @fakeeditor1
9+
Reviewers: @fakereviewer1 , @fakereviewer2, @fakereviewer3
10+
Reviews Expected By: fake date
11+
Archive: [![DOI](https://example.com/fakearchive)](https://zenodo.org/records/8415866)
12+
JOSS DOI: [![DOI](https://example.com/fakearchive)](https://doi.org/10.21105/joss.01450)
13+
Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive))
14+
Date accepted (month/day/year): 06/29/2024
15+
16+
---
17+
18+
## Scope
19+
20+
- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.
21+
- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].
22+
(etc)
23+
24+
## Community Partnerships
25+
26+
- [ ] etc
27+
- [ ] aaaaaa

tests/integration/test_parse_issues.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,21 @@ def test_multiple_editors_and_eic(process_issues, data_file):
143143
review = data_file("reviews/multiple_editors.txt", True)
144144
review = process_issues.parse_issue(review)
145145
assert review.package_name == "fake_package"
146+
147+
148+
def test_repository_host_github(process_issues, data_file):
149+
"""
150+
Test handling of submissions with GitHub repository hosts.
151+
"""
152+
review = data_file("reviews/github_submission.txt", True)
153+
review = process_issues.parse_issue(review)
154+
assert review.repository_host == "github"
155+
156+
157+
def test_repository_host_gitlab(process_issues, data_file):
158+
"""
159+
Test handling of submissions with GitLab repository hosts.
160+
"""
161+
review = data_file("reviews/gitlab_submission.txt", True)
162+
review = process_issues.parse_issue(review)
163+
assert review.repository_host == "gitlab"

0 commit comments

Comments
 (0)