Skip to content

Commit 2ee2817

Browse files
authored
Merge pull request #23 from nodestream-proj/audit-log-actor-filter
Actor-based Filtering for GH AuditLogExtractor
2 parents 121d978 + 0f9a5d0 commit 2ee2817

File tree

7 files changed

+316
-30
lines changed

7 files changed

+316
-30
lines changed

nodestream_github/audit.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,38 @@
2020
class GithubAuditLogExtractor(Extractor):
2121
"""
2222
Extracts audit logs from the GitHub REST API.
23-
You can pass the enterprise_name, actions and lookback_period to the extractor
24-
along with the regular GitHub parameters.
23+
You can pass the enterprise_name, actions, actors, exclude_actors
24+
and lookback_period to the extractor along with the regular
25+
GitHub parameters.
2526
2627
lookback_period can contain keys for days, months, and/or years as ints
27-
actions can be found in the GitHub documentation
28+
actions, and actors/exclude_actors can be found in the GitHub documentation
2829
https://docs.github.com/en/[email protected]/admin/monitoring-activity-in-your-enterprise/reviewing-audit-logs-for-your-enterprise/searching-the-audit-log-for-your-enterprise#search-based-on-the-action-performed
2930
"""
3031

3132
def __init__(
3233
self,
3334
enterprise_name: str,
3435
actions: list[str] | None = None,
36+
actors: list[str] | None = None,
37+
exclude_actors: list[str] | None = None,
3538
lookback_period: dict[str, int] | None = None,
3639
**github_client_kwargs: Any | None,
3740
):
3841
self.enterprise_name = enterprise_name
3942
self.client = GithubRestApiClient(**github_client_kwargs)
4043
self.lookback_period = lookback_period
4144
self.actions = actions
45+
self.actors = actors
46+
self.exclude_actors = exclude_actors
4247

4348
async def extract_records(self) -> AsyncGenerator[GithubAuditLog]:
4449
async for audit in self.client.fetch_enterprise_audit_log(
45-
self.enterprise_name, self.actions, self.lookback_period
50+
self.enterprise_name,
51+
self.actions,
52+
self.actors,
53+
self.exclude_actors,
54+
self.lookback_period,
4655
):
4756
audit["timestamp"] = audit.pop("@timestamp")
4857
yield audit

nodestream_github/client/githubclient.py

Lines changed: 80 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,68 @@ def _fetch_problem(title: str, e: httpx.HTTPError):
7474
logger.warning("Problem fetching %s", title, exc_info=e, stacklevel=2)
7575

7676

77+
def validate_lookback_period(lookback_period: dict[str, int]) -> dict[str, int]:
78+
"""Sanitize the lookback period to only include valid keys."""
79+
80+
def validate_positive_int(value: int) -> int:
81+
converted = int(value)
82+
if converted <= 0:
83+
negative_value_exception_msg = (
84+
f"Lookback period values must be positive: {value}"
85+
)
86+
raise ValueError(negative_value_exception_msg)
87+
return converted
88+
89+
try:
90+
return {k: validate_positive_int(v) for k, v in lookback_period.items()}
91+
except Exception as e:
92+
exception_msg = "Formatting lookback period failed"
93+
raise ValueError(exception_msg) from e
94+
95+
96+
def build_search_phrase(
97+
actions: list[str],
98+
actors: list[str],
99+
exclude_actors: list[str],
100+
lookback_period: dict[str, int],
101+
) -> str:
102+
# adding action-based filtering
103+
actions_phrase = ""
104+
if actions:
105+
actions_phrase = " ".join(f"action:{action}" for action in actions)
106+
107+
# adding lookback_period based filtering
108+
date_filter = ""
109+
if lookback_period:
110+
lookback_period = validate_lookback_period(lookback_period)
111+
date_filter = (
112+
f"created:>={(datetime.now(tz=UTC) - relativedelta(**lookback_period))
113+
.strftime('%Y-%m-%d')}"
114+
if lookback_period
115+
else ""
116+
)
117+
118+
# adding actor-based filtering
119+
actors_phrase = ""
120+
if actors:
121+
actors_phrase = " ".join(f"actor:{actor}" for actor in actors)
122+
123+
# adding exclude_actors based filtering
124+
exclude_actors_phrase = ""
125+
if exclude_actors:
126+
exclude_actors_phrase = " ".join(f"-actor:{actor}" for actor in exclude_actors)
127+
return " ".join(
128+
section
129+
for section in [
130+
actions_phrase,
131+
date_filter,
132+
actors_phrase,
133+
exclude_actors_phrase,
134+
]
135+
if section
136+
).strip()
137+
138+
77139
class GithubRestApiClient:
78140
def __init__(
79141
self,
@@ -240,6 +302,13 @@ async def _get_paginated(
240302
query_params.update(params)
241303

242304
while url is not None:
305+
if "&page=100" in url:
306+
logger.warning(
307+
"The GithubAPI has reached the maximum page size "
308+
"of 100. The returned data may be incomplete for request: %s",
309+
url,
310+
)
311+
243312
response = await self._get_retrying(
244313
url, headers=headers, params=query_params
245314
)
@@ -331,23 +400,24 @@ async def fetch_all_organizations(self) -> AsyncGenerator[types.GithubOrg]:
331400
_fetch_problem("all organizations", e)
332401

333402
async def fetch_enterprise_audit_log(
334-
self, enterprise_name: str, actions: list[str], lookback_period: dict[str, int]
403+
self,
404+
enterprise_name: str,
405+
actions: list[str],
406+
actors: list[str],
407+
exclude_actors: list[str],
408+
lookback_period: dict[str, int],
335409
) -> AsyncGenerator[types.GithubAuditLog]:
336410
"""Fetches enterprise-wide audit log data
337411
338412
https://docs.github.com/en/enterprise-cloud@latest/rest/enterprise-admin/audit-log?apiVersion=2022-11-28#get-the-audit-log-for-an-enterprise
339413
"""
340414
try:
341-
# adding action-based filtering
342-
actions_phrase = " ".join(f"action:{action}" for action in actions)
343-
# adding lookback_period based filtering
344-
date_filter = (
345-
f" created:>={(datetime.now(tz=UTC) - relativedelta(**lookback_period))
346-
.strftime('%Y-%m-%d')}"
347-
if lookback_period
348-
else ""
415+
search_phrase = build_search_phrase(
416+
actions=actions,
417+
actors=actors,
418+
exclude_actors=exclude_actors,
419+
lookback_period=lookback_period,
349420
)
350-
search_phrase = f"{actions_phrase}{date_filter}"
351421

352422
params = {"phrase": search_phrase} if search_phrase else {}
353423

poetry.lock

Lines changed: 16 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "nodestream-plugin-github"
3-
version = "0.14.1-beta.3"
3+
version = "0.14.1-beta.4"
44
description = ""
55
authors = [
66
"Jon Bristow <[email protected]>",
@@ -17,6 +17,7 @@ nodestream = "^0.14"
1717
limits = "5.2.0"
1818
tenacity = "^9.0.0"
1919
httpx = ">=0.27,<0.28"
20+
freezegun = "^1.5.4"
2021

2122
[tool.poetry.group.dev.dependencies]
2223
ruff = "^0.11.0"

tests/client/test_githubclient.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,45 @@ async def test_pagination(httpx_mock: HTTPXMock):
8585
assert items == ["a", "b", "c", "d"]
8686

8787

88+
@pytest.mark.asyncio
89+
async def test_pagination_truncate_warning(
90+
httpx_mock: HTTPXMock, caplog: pytest.LogCaptureFixture
91+
):
92+
client = GithubRestApiClient(
93+
auth_token="test-auth-token",
94+
github_hostname=DEFAULT_HOSTNAME,
95+
user_agent="test-user-agent",
96+
max_retries=0,
97+
per_page=2,
98+
)
99+
100+
next_page = f'<{DEFAULT_BASE_URL}/example?per_page=2&page=100>; rel="next"'
101+
first_page = f'<${DEFAULT_BASE_URL}/example?per_page=2&page=99>; rel="first"'
102+
httpx_mock.add_response(
103+
url=f"{DEFAULT_BASE_URL}/example?per_page=2",
104+
json=["a", "b"],
105+
is_reusable=False,
106+
headers={"link": f"{next_page}, {first_page}"},
107+
)
108+
httpx_mock.add_response(
109+
url=f"{DEFAULT_BASE_URL}/example?per_page=2&page=100",
110+
json=["c", "d"],
111+
is_reusable=False,
112+
)
113+
114+
with caplog.at_level("WARNING"):
115+
items = [item async for item in client._get_paginated("example")]
116+
117+
assert items == ["a", "b", "c", "d"]
118+
119+
# Test that the warning message was logged
120+
expected_warning = (
121+
"The GithubAPI has reached the maximum page size of 100. "
122+
"The returned data may be incomplete"
123+
)
124+
assert expected_warning in caplog.text
125+
126+
88127
def test_all_null_args():
89128
# noinspection PyTypeChecker
90129
assert GithubRestApiClient(auth_token=None, github_hostname=None)

tests/mocks/githubrest.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# noinspection PyProtectedMember
2-
from typing import Any
2+
from typing import Any, Optional
33

44
from pytest_httpx import HTTPXMock
55

@@ -192,9 +192,9 @@ def get_repos_for_user(
192192
**kwargs,
193193
)
194194

195-
def get_enterprise_audit_logs(self, **kwargs: Any):
196-
url = (
197-
f"{self.base_url}/enterprises/test-enterprise"
198-
f"/audit-log?per_page=100&phrase=action:protected_branch.create"
199-
)
195+
def get_enterprise_audit_logs(self, *, search_phrase: Optional[str], **kwargs: Any):
196+
url = f"{self.base_url}/enterprises/test-enterprise/audit-log"
197+
url += f"?per_page={self.per_page}"
198+
if search_phrase:
199+
url += f"&phrase={search_phrase}"
200200
self.add_response(url=url, **kwargs)

0 commit comments

Comments
 (0)