Skip to content

Commit 121d978

Browse files
authored
feat[extractors-users]: add personal repo download toggle (#22)
* feat[extractors-users]: allow toggling downloading personal repos * fix[extractors-users]: download full user data * chore: update version * fix: import formatting
1 parent 75bf7dd commit 121d978

File tree

15 files changed

+606
-590
lines changed

15 files changed

+606
-590
lines changed

.github/workflows/release.yaml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name: Release
22

33
# Trigger the workflow on a release event.
44
# This will trigger the workflow when a release is published - so draft releases will not trigger the workflow.
5-
on:
5+
on:
66
release:
7-
types: [published]
7+
types: [ published ]
88

99
jobs:
1010
publish:
@@ -18,8 +18,8 @@ jobs:
1818
permissions:
1919
id-token: write
2020
attestations: write
21-
22-
# Use the Github Actions Environment to isolate the workflow from the rest of the repository.
21+
22+
# Use the GitHub Actions Environment to isolate the workflow from the rest of the repository.
2323
# See here: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment
2424
environment:
2525
name: pypi
@@ -35,9 +35,9 @@ jobs:
3535

3636
# Set up Python 3.12 to build the package.
3737
# Python version here does not really matter as long as it works with
38-
# poetry because its simply building the package. We've confirmed functionality
38+
# poetry because it's simply building the package. We've confirmed functionality
3939
# with CI testing before this step.
40-
- name: Set up Python 3.12
40+
- name: Set up Python 3.12
4141
uses: actions/setup-python@v5
4242
with:
4343
python-version: 3.12
@@ -68,7 +68,7 @@ jobs:
6868
name: python-package-distribution
6969
path: dist/
7070

71-
# We are then going to sign the package using Github's Attest Build Provenance action.
71+
# We are then going to sign the package using GitHub's Attest Build Provenance action.
7272
# This action will sign the package and upload the signature to the GitHub release.
7373
# This is to ensure that the package is verified and trusted by the user.
7474
- uses: actions/attest-build-provenance@v2

nodestream_github/client/githubclient.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ async def fetch_full_org(self, org_login: str) -> types.GithubOrg | None:
373373
return await self._get_item(f"orgs/{org_login}")
374374
except httpx.HTTPError as e:
375375
_fetch_problem(f"full organization info for {org_login}", e)
376+
return None
376377

377378
async def fetch_repos_for_user(
378379
self,
@@ -542,6 +543,7 @@ async def fetch_team(self, *, org_login: str, slug: str) -> types.GithubTeam | N
542543
return await self._get_item(f"orgs/{org_login}/teams/{slug}")
543544
except httpx.HTTPError as e:
544545
_fetch_problem(f"full team info for {org_login}/{slug}", e)
546+
return None
545547

546548
async def fetch_members_for_team(
547549
self,
@@ -591,3 +593,15 @@ async def fetch_repos_for_team(
591593
yield repo
592594
except httpx.HTTPError as e:
593595
_fetch_problem(f"repos for team {org_login}/{slug}", e)
596+
597+
async def fetch_user(self, *, username: str) -> types.GithubUser | None:
598+
"""
599+
Provides publicly available information about someone with a GitHub account.
600+
601+
https://docs.github.com/en/[email protected]/rest/users/users?apiVersion=2022-11-28#get-a-user
602+
"""
603+
try:
604+
return await self._get_item(f"users/{username}")
605+
except httpx.HTTPError as e:
606+
_fetch_problem(f"full user info for {username}", e)
607+
return None

nodestream_github/teams.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ async def _fetch_members(self, team: GithubTeam) -> AsyncGenerator[SimplifiedUse
5757

5858
async def _fetch_team(
5959
self, login: str, team_summary: GithubTeamSummary
60-
) -> GithubTeam:
60+
) -> GithubTeam | None:
6161
team = await self.client.fetch_team(org_login=login, slug=team_summary["slug"])
6262
if not team:
6363
return None

nodestream_github/types/github.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,22 @@
33
JSONType: TypeAlias = (
44
dict[str, "JSONType"] | list["JSONType"] | str | int | float | bool | None
55
)
6+
JSONObject: TypeAlias = dict[str, JSONType]
67

7-
GithubOrgSummary: TypeAlias = JSONType
8-
GithubOrg: TypeAlias = JSONType
9-
GithubRepo: TypeAlias = JSONType
10-
GithubUser: TypeAlias = JSONType
11-
Webhook: TypeAlias = JSONType
12-
GithubTeam: TypeAlias = JSONType
13-
GithubTeamSummary: TypeAlias = JSONType
14-
GithubAuditLog: TypeAlias = JSONType
8+
GithubOrgSummary: TypeAlias = JSONObject
9+
GithubOrg: TypeAlias = JSONObject
10+
GithubRepo: TypeAlias = JSONObject
11+
GithubUser: TypeAlias = JSONObject
12+
Webhook: TypeAlias = JSONObject
13+
GithubTeam: TypeAlias = JSONObject
14+
GithubTeamSummary: TypeAlias = JSONObject
15+
GithubAuditLog: TypeAlias = JSONObject
1516

16-
LanguageRecord: TypeAlias = JSONType
17-
OrgRecord: TypeAlias = JSONType
18-
RepositoryRecord: TypeAlias = JSONType
19-
TeamRecord: TypeAlias = JSONType
20-
UserRecord: TypeAlias = JSONType
17+
LanguageRecord: TypeAlias = JSONObject
18+
OrgRecord: TypeAlias = JSONObject
19+
RepositoryRecord: TypeAlias = JSONObject
20+
TeamRecord: TypeAlias = JSONObject
21+
UserRecord: TypeAlias = JSONObject
2122

22-
SimplifiedRepo: TypeAlias = JSONType
23-
SimplifiedUser: TypeAlias = JSONType
23+
SimplifiedRepo: TypeAlias = JSONObject
24+
SimplifiedUser: TypeAlias = JSONObject

nodestream_github/users.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,35 @@
1313
from .client import GithubRestApiClient
1414
from .interpretations.relationship.repository import simplify_repo
1515
from .logging import get_plugin_logger
16-
from .types import UserRecord
16+
from .types import SimplifiedUser, UserRecord
1717
from .types.enums import UserRepoType
1818

1919
logger = get_plugin_logger(__name__)
2020

2121

2222
class GithubUserExtractor(Extractor):
23-
def __init__(self, **github_client_kwargs: Any):
23+
def __init__(self, *, include_repos: bool = True, **github_client_kwargs: Any):
24+
self.include_repos = include_repos is True # handle None
2425
self.client = GithubRestApiClient(**github_client_kwargs)
2526

2627
async def extract_records(self) -> AsyncGenerator[UserRecord]:
2728
"""Scrapes the GitHub REST api for all users and converts them to records."""
28-
async for user in self.client.fetch_all_users():
29-
login = user["login"]
30-
user["repositories"] = [
31-
simplify_repo(repo)
32-
async for repo in self.client.fetch_repos_for_user(
33-
user_login=login,
34-
repo_type=UserRepoType.OWNER,
35-
)
36-
]
29+
async for user_short in self.client.fetch_all_users():
30+
login = user_short["login"]
31+
user = await self.client.fetch_user(username=login)
32+
if user is None:
33+
continue
34+
if self.include_repos:
35+
logger.debug("including repos for %s", user)
36+
user["repositories"] = await self._user_repos(login=login)
3737
logger.debug("yielded GithubUser{login=%s}", login)
3838
yield user
39+
40+
async def _user_repos(self, *, login: str) -> list[SimplifiedUser]:
41+
return [
42+
simplify_repo(repo)
43+
async for repo in self.client.fetch_repos_for_user(
44+
user_login=login,
45+
repo_type=UserRepoType.OWNER,
46+
)
47+
]

0 commit comments

Comments
 (0)