Skip to content

Commit 11a9aa0

Browse files
authored
Merge to 14 (#15)
* fix: handle missing key more elegantly (#11) * fix: handle missing key more elegantly * fix: simplify type * Update version * Allow skipping the extraction of organization members and/or repositories. (#13) * feat: allow skipping org repos or members * chore: refactor args * fix: user-repo gathering should only import owned (#9) * fix: user-repo gathering should only import owned * Upgrade to nodestream 0.14.0 (#10) * chore: upgrade to newest nodestream * chore: bump version * chore: merge/rebase * fix: tests * chore: poetry update
1 parent 3975d0b commit 11a9aa0

File tree

14 files changed

+570
-283
lines changed

14 files changed

+570
-283
lines changed

nodestream_github/client/githubclient.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
import nodestream_github.types as types
2525
from nodestream_github.logging import get_plugin_logger
26-
from nodestream_github.types.enums import CollaboratorAffiliation
26+
from nodestream_github.types import enums
2727

2828
DEFAULT_REQUEST_RATE_LIMIT_PER_MINUTE = int(13000 / 60)
2929
DEFAULT_MAX_RETRIES = 20
@@ -261,7 +261,10 @@ async def _get_item(
261261
return {}
262262

263263
async def fetch_repos_for_org(
264-
self, org_login: str, repo_type: str | None = None
264+
self,
265+
*,
266+
org_login: str,
267+
repo_type: enums.OrgRepoType | None = None,
265268
) -> AsyncGenerator[types.GithubRepo]:
266269
"""Fetches repositories for the specified organization.
267270
@@ -287,7 +290,10 @@ async def fetch_repos_for_org(
287290
_fetch_problem(f"repos for org {org_login}", e)
288291

289292
async def fetch_members_for_org(
290-
self, org_login: str, role: str | None = None
293+
self,
294+
*,
295+
org_login: str,
296+
role: enums.OrgMemberRole | None = None,
291297
) -> AsyncGenerator[types.GithubUser]:
292298
"""Fetch all users who are members of an organization.
293299
@@ -339,8 +345,9 @@ async def fetch_full_org(self, org_login: str) -> types.GithubOrg | None:
339345

340346
async def fetch_repos_for_user(
341347
self,
348+
*,
342349
user_login: str,
343-
repo_type: str | None = None,
350+
repo_type: enums.UserRepoType | None = None,
344351
) -> AsyncGenerator[types.GithubRepo]:
345352
"""Fetches repositories for a user.
346353
@@ -361,7 +368,10 @@ async def fetch_repos_for_user(
361368
_fetch_problem(f"repos for user {user_login}", e)
362369

363370
async def fetch_languages_for_repo(
364-
self, owner_login: str, repo_name: str
371+
self,
372+
*,
373+
owner_login: str,
374+
repo_name: str,
365375
) -> AsyncGenerator[str]:
366376
"""Fetch languages for the specified repository.
367377
@@ -380,7 +390,10 @@ async def fetch_languages_for_repo(
380390
_fetch_problem(f"languages for repo {owner_login}/{repo_name}", e)
381391

382392
async def fetch_webhooks_for_repo(
383-
self, owner_login: str, repo_name: str
393+
self,
394+
*,
395+
owner_login: str,
396+
repo_name: str,
384397
) -> AsyncGenerator[types.Webhook]:
385398
"""Try to get types.webhook data for this repo.
386399
@@ -402,7 +415,7 @@ async def fetch_collaborators_for_repo(
402415
*,
403416
owner_login: str,
404417
repo_name: str,
405-
affiliation: CollaboratorAffiliation,
418+
affiliation: enums.CollaboratorAffiliation,
406419
) -> AsyncGenerator[types.GithubUser]:
407420
"""Try to get collaborator data for this repo.
408421
@@ -469,7 +482,9 @@ async def fetch_all_users(self) -> AsyncGenerator[types.GithubUser]:
469482
_fetch_problem("all users", e)
470483

471484
async def fetch_teams_for_org(
472-
self, org_login: str
485+
self,
486+
*,
487+
org_login: str,
473488
) -> AsyncGenerator[types.GithubTeamSummary]:
474489
"""Fetch all teams in an organization visible to the authenticated user.
475490
@@ -487,7 +502,7 @@ async def fetch_teams_for_org(
487502
except httpx.HTTPError as e:
488503
_fetch_problem(f"teams for org {org_login}", e)
489504

490-
async def fetch_team(self, org_login: str, slug: str) -> types.GithubTeam | None:
505+
async def fetch_team(self, *, org_login: str, slug: str) -> types.GithubTeam | None:
491506
"""Fetches a single team for an org by the team slug.
492507
493508
https://docs.github.com/en/[email protected]/rest/teams/teams?apiVersion=2022-11-28#get-a-team-by-name
@@ -498,7 +513,10 @@ async def fetch_team(self, org_login: str, slug: str) -> types.GithubTeam | None
498513
_fetch_problem(f"full team info for {org_login}/{slug}", e)
499514

500515
async def fetch_members_for_team(
501-
self, team_id: int, role: str | None = None
516+
self,
517+
*,
518+
team_id: int,
519+
role: enums.TeamMemberRole | None = None,
502520
) -> AsyncGenerator[types.GithubUser]:
503521
"""Fetch all users that have a given role for a specified team.
504522
@@ -523,7 +541,10 @@ async def fetch_members_for_team(
523541
_fetch_problem(f"members for team {team_id}", e)
524542

525543
async def fetch_repos_for_team(
526-
self, org_login: str, slug: str
544+
self,
545+
*,
546+
org_login: str,
547+
slug: str,
527548
) -> AsyncGenerator[types.GithubRepo]:
528549
"""Fetch all repos for a specified team visible to the authenticated user.
529550

nodestream_github/orgs.py

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,24 @@
1414
from .interpretations.relationship.user import simplify_user
1515
from .logging import get_plugin_logger
1616
from .types import OrgRecord, SimplifiedUser
17+
from .types.enums import OrgMemberRole
1718

1819
logger = get_plugin_logger(__name__)
1920

2021

2122
class GithubOrganizationsExtractor(Extractor):
22-
def __init__(self, **github_client_kwargs: any):
23-
self.client = GithubRestApiClient(**github_client_kwargs)
23+
def __init__(
24+
self,
25+
*,
26+
include_members: bool | None = True,
27+
include_repositories: bool | None = True,
28+
**kwargs: any,
29+
):
30+
31+
self.include_members = include_members is True
32+
self.include_repositories = include_repositories is True
33+
34+
self.client = GithubRestApiClient(**kwargs)
2435

2536
async def extract_records(self) -> AsyncGenerator[OrgRecord]:
2637
async for org in self.client.fetch_all_organizations():
@@ -34,19 +45,34 @@ async def _extract_organization(self, login: str) -> OrgRecord | None:
3445
if not full_org:
3546
return None
3647

37-
full_org["members"] = [user async for user in self._fetch_all_members(login)]
48+
if self.include_members:
49+
full_org["members"] = [
50+
user async for user in self._fetch_all_members(login)
51+
]
52+
else:
53+
full_org["members"] = []
54+
55+
if self.include_repositories:
56+
full_org["repositories"] = [
57+
simplify_repo(
58+
repo, permission=full_org.get("default_repository_permission")
59+
)
60+
async for repo in self.client.fetch_repos_for_org(org_login=login)
61+
]
62+
else:
63+
full_org["repositories"] = []
3864

39-
full_org["repositories"] = [
40-
simplify_repo(
41-
repo, permission=full_org.get("default_repository_permission")
42-
)
43-
async for repo in self.client.fetch_repos_for_org(login)
44-
]
4565
return full_org
4666

4767
async def _fetch_all_members(self, login: str) -> AsyncGenerator[SimplifiedUser]:
48-
async for admin in self.client.fetch_members_for_org(login, "admin"):
68+
async for admin in self.client.fetch_members_for_org(
69+
org_login=login,
70+
role=OrgMemberRole.ADMIN,
71+
):
4972
yield simplify_user(admin) | {"role": "admin"}
5073

51-
async for member in self.client.fetch_members_for_org(login, "member"):
74+
async for member in self.client.fetch_members_for_org(
75+
org_login=login,
76+
role=OrgMemberRole.MEMBER,
77+
):
5278
yield simplify_user(member) | {"role": "member"}

nodestream_github/repos.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from .interpretations.relationship.user import simplify_user
1515
from .logging import get_plugin_logger
1616
from .types import GithubRepo, RepositoryRecord
17-
from .types.enums import CollaboratorAffiliation
17+
from .types.enums import CollaboratorAffiliation, OrgRepoType, UserRepoType
1818

1919
logger = get_plugin_logger(__name__)
2020

@@ -95,13 +95,15 @@ async def _extract_repo(self, repo: GithubRepo) -> RepositoryRecord:
9595
repo["languages"] = [
9696
{"name": lang}
9797
async for lang in self.client.fetch_languages_for_repo(
98-
owner["login"], repo["name"]
98+
owner_login=owner["login"],
99+
repo_name=repo["name"],
99100
)
100101
]
101102
repo["webhooks"] = [
102103
hook
103104
async for hook in self.client.fetch_webhooks_for_repo(
104-
owner["login"], repo["name"]
105+
owner_login=owner["login"],
106+
repo_name=repo["name"],
105107
)
106108
]
107109
repo["collaborators"] = []
@@ -126,12 +128,14 @@ async def _fetch_repos_by_org(self) -> AsyncGenerator[GithubRepo]:
126128
async for org in self.client.fetch_all_organizations():
127129
if self.collecting.org_public:
128130
async for repo in self.client.fetch_repos_for_org(
129-
org["login"], "public"
131+
org_login=org["login"],
132+
repo_type=OrgRepoType.PUBLIC,
130133
):
131134
yield repo
132135
if self.collecting.org_private:
133136
async for repo in self.client.fetch_repos_for_org(
134-
org["login"], "private"
137+
org_login=org["login"],
138+
repo_type=OrgRepoType.PRIVATE,
135139
):
136140
yield repo
137141

@@ -144,13 +148,8 @@ async def _fetch_repos_by_user(self) -> AsyncGenerator[GithubRepo]:
144148
repository permissions (read)
145149
"""
146150
async for user in self.client.fetch_all_users():
147-
if self.collecting.user_public:
148-
async for repo in self.client.fetch_repos_for_user(
149-
user["login"], "public"
150-
):
151-
yield repo
152-
if self.collecting.user_private:
153-
async for repo in self.client.fetch_repos_for_user(
154-
user["login"], "private"
155-
):
156-
yield repo
151+
async for repo in self.client.fetch_repos_for_user(
152+
user_login=user["login"],
153+
repo_type=UserRepoType.OWNER,
154+
):
155+
yield repo

nodestream_github/teams.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .interpretations.relationship.user import simplify_user
1515
from .logging import get_plugin_logger
1616
from .types import GithubTeam, GithubTeamSummary, SimplifiedUser, TeamRecord
17+
from .types.enums import TeamMemberRole
1718

1819
logger = get_plugin_logger(__name__)
1920

@@ -25,7 +26,7 @@ def __init__(self, **github_client_kwargs: any):
2526
async def extract_records(self) -> AsyncGenerator[TeamRecord]:
2627
async for page in self.client.fetch_all_organizations():
2728
login = page["login"]
28-
async for team in self.client.fetch_teams_for_org(login):
29+
async for team in self.client.fetch_teams_for_org(org_login=login):
2930
team_record = await self._fetch_team(login, team)
3031
if team_record:
3132
logger.debug(
@@ -42,24 +43,31 @@ async def _fetch_members(self, team: GithubTeam) -> AsyncGenerator[SimplifiedUse
4243
team["slug"],
4344
)
4445

45-
async for member in self.client.fetch_members_for_team(team["id"], "member"):
46+
async for member in self.client.fetch_members_for_team(
47+
team_id=team["id"],
48+
role=TeamMemberRole.MEMBER,
49+
):
4650
yield member | {"role": "member"}
4751
async for member in self.client.fetch_members_for_team(
48-
team["id"], "maintainer"
52+
team_id=team["id"],
53+
role=TeamMemberRole.MAINTAINER,
4954
):
5055
yield member | {"role": "maintainer"}
5156

5257
async def _fetch_team(
5358
self, login: str, team_summary: GithubTeamSummary
5459
) -> GithubTeam:
55-
team = await self.client.fetch_team(login, team_summary["slug"])
60+
team = await self.client.fetch_team(org_login=login, slug=team_summary["slug"])
5661
if not team:
5762
return None
5863
team["members"] = [
5964
simplify_user(member) async for member in self._fetch_members(team)
6065
]
6166
team["repos"] = [
6267
simplify_repo(repo, permission=team.get("permission"))
63-
async for repo in self.client.fetch_repos_for_team(login, team["slug"])
68+
async for repo in self.client.fetch_repos_for_team(
69+
org_login=login,
70+
slug=team["slug"],
71+
)
6472
]
6573
return team

nodestream_github/transformer/repo.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,33 @@ def __init__(
1919
full_name_key: str = "full_name",
2020
**kwargs: any,
2121
):
22-
2322
self.client = GithubRestApiClient(**kwargs)
2423
self.full_name_key = full_name_key
2524

2625
async def transform_record(
2726
self,
2827
record: types.GithubRepo,
2928
) -> AsyncGenerator[types.GithubUser]:
30-
(repo_owner, repo_name) = record[self.full_name_key].split("/")
31-
logging.debug("Transforming record %s/%s", repo_owner, repo_name)
29+
logging.debug("Attempting to transform %s", record)
3230

31+
full_name = record.get(self.full_name_key)
3332
simplified_repo = simplify_repo(record)
3433

34+
if full_name is not None:
35+
async for user in self._transform(full_name, simplified_repo):
36+
yield user
37+
else:
38+
logging.info("No full_name key found in record %s", record)
39+
40+
async def _transform(
41+
self,
42+
full_name: str,
43+
simplified_repo: types.SimplifiedRepo,
44+
) -> AsyncGenerator[types.GithubUser]:
45+
(repo_owner, repo_name) = full_name.split("/")
46+
47+
logging.debug("Transforming repo %s/%s", repo_owner, repo_name)
48+
3549
async for collaborator in self.client.fetch_collaborators_for_repo(
3650
owner_login=repo_owner,
3751
repo_name=repo_name,

nodestream_github/types/enums.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,31 @@ class CollaboratorAffiliation(StrEnum):
55
ALL = "all"
66
OUTSIDE = "outside"
77
DIRECT = "direct"
8+
9+
10+
class OrgRepoType(StrEnum):
11+
ALL = "all"
12+
PUBLIC = "public"
13+
PRIVATE = "private"
14+
FORKS = "forks"
15+
SOURCES = "sources"
16+
MEMBER = "member"
17+
INTERNAL = "internal"
18+
19+
20+
class UserRepoType(StrEnum):
21+
ALL = "all"
22+
OWNER = "owner"
23+
MEMBER = "member"
24+
25+
26+
class OrgMemberRole:
27+
ALL = "all"
28+
ADMIN = "admin"
29+
MEMBER = "member"
30+
31+
32+
class TeamMemberRole:
33+
ALL = "all"
34+
MAINTAINER = "maintainer"
35+
MEMBER = "member"

nodestream_github/users.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .interpretations.relationship.repository import simplify_repo
1414
from .logging import get_plugin_logger
1515
from .types import UserRecord
16+
from .types.enums import UserRepoType
1617

1718
logger = get_plugin_logger(__name__)
1819

@@ -27,7 +28,10 @@ async def extract_records(self) -> AsyncGenerator[UserRecord]:
2728
login = user["login"]
2829
user["repositories"] = [
2930
simplify_repo(repo)
30-
async for repo in self.client.fetch_repos_for_user(login, "all")
31+
async for repo in self.client.fetch_repos_for_user(
32+
user_login=login,
33+
repo_type=UserRepoType.OWNER,
34+
)
3135
]
3236
logger.debug("yielded GithubUser{login=%s}", login)
3337
yield user

0 commit comments

Comments
 (0)