Skip to content

Commit 81b330d

Browse files
rreddy2rreddy15
andauthored
Adding GitHub Audit Log Extractor (#16)
* Adding Audit Log Extractor * updating events -> actions * formatting changes * formatting changes * updating tests * adding cov to makefile and splitting url to 2 lines * adding support for lookback_period and changing timestamp key * updating code to resolve tests * updating formatting * format * more formatting * updating pipeline * making changes based on comments * keeping everything consistent --------- Co-authored-by: rreddy15 <[email protected]>
1 parent 206acdb commit 81b330d

File tree

14 files changed

+241
-28
lines changed

14 files changed

+241
-28
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,6 @@ lint: fmt
2828
test:
2929
poetry run pytest
3030

31+
.PHONY: coverage
32+
coverage:
33+
poetry run coverage html

nodestream_github/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from .audit import GithubAuditLogExtractor
12
from .interpretations import (
23
RepositoryRelationshipInterpretation,
34
UserRelationshipInterpretation,
@@ -9,6 +10,7 @@
910
from .users import GithubUserExtractor
1011

1112
__all__ = (
13+
"GithubAuditLogExtractor",
1214
"GithubOrganizationsExtractor",
1315
"GithubPlugin",
1416
"GithubReposExtractor",

nodestream_github/audit.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""
2+
Nodestream Extractor that extracts audit logs from the GitHub REST API.
3+
4+
Developed using Enterprise Server 3.12
5+
https://docs.github.com/en/[email protected]/rest?apiVersion=2022-11-28
6+
"""
7+
8+
from collections.abc import AsyncGenerator
9+
from typing import Any
10+
11+
from nodestream.pipeline import Extractor
12+
13+
from .client import GithubRestApiClient
14+
from .logging import get_plugin_logger
15+
from .types import GithubAuditLog
16+
17+
logger = get_plugin_logger(__name__)
18+
19+
20+
class GithubAuditLogExtractor(Extractor):
21+
"""
22+
Extracts audit logs from the GitHub REST API.
23+
You can pass the enterprise_name, actions and lookback_period to the extractor
24+
along with the regular GitHub parameters.
25+
26+
lookback_period can contain keys for days, months, and/or years as ints
27+
actions can be found in the GitHub documentation
28+
https://docs.github.com/en/[email protected]/admin/monitoring-activity-in-your-enterprise/reviewing-audit-logs-for-your-enterprise/searching-the-audit-log-for-your-enterprise#search-based-on-the-action-performed
29+
"""
30+
31+
def __init__(
32+
self,
33+
enterprise_name: str,
34+
actions: list[str] | None = None,
35+
lookback_period: dict[str, int] | None = None,
36+
**github_client_kwargs: dict[str, Any] | None,
37+
):
38+
self.enterprise_name = enterprise_name
39+
self.client = GithubRestApiClient(**github_client_kwargs)
40+
self.lookback_period = lookback_period
41+
self.actions = actions
42+
43+
async def extract_records(self) -> AsyncGenerator[GithubAuditLog]:
44+
async for audit in self.client.fetch_enterprise_audit_log(
45+
self.enterprise_name, self.actions, self.lookback_period
46+
):
47+
audit["timestamp"] = audit.pop("@timestamp")
48+
yield audit

nodestream_github/client/githubclient.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
import json
77
import logging
88
from collections.abc import AsyncGenerator
9+
from datetime import UTC, datetime
910
from enum import Enum
1011
from typing import Any
1112

1213
import httpx
14+
from dateutil.relativedelta import relativedelta
1315
from limits import RateLimitItem, RateLimitItemPerMinute
1416
from limits.aio.storage import MemoryStorage
1517
from limits.aio.strategies import MovingWindowRateLimiter, RateLimiter
@@ -328,6 +330,34 @@ async def fetch_all_organizations(self) -> AsyncGenerator[types.GithubOrg]:
328330
except httpx.HTTPError as e:
329331
_fetch_problem("all organizations", e)
330332

333+
async def fetch_enterprise_audit_log(
334+
self, enterprise_name: str, actions: list[str], lookback_period: dict[str, int]
335+
) -> AsyncGenerator[types.GithubAuditLog]:
336+
"""Fetches enterprise-wide audit log data
337+
338+
https://docs.github.com/en/enterprise-cloud@latest/rest/enterprise-admin/audit-log?apiVersion=2022-11-28#get-the-audit-log-for-an-enterprise
339+
"""
340+
try:
341+
# adding action-based filtering
342+
actions_phrase = " ".join(f"action:{action}" for action in actions)
343+
# adding lookback_period based filtering
344+
date_filter = (
345+
f" created:>={(datetime.now(tz=UTC) - relativedelta(**lookback_period))
346+
.strftime('%Y-%m-%d')}"
347+
if lookback_period
348+
else ""
349+
)
350+
search_phrase = f"{actions_phrase}{date_filter}"
351+
352+
params = {"phrase": search_phrase} if search_phrase else {}
353+
354+
async for audit in self._get_paginated(
355+
f"enterprises/{enterprise_name}/audit-log", params=params
356+
):
357+
yield audit
358+
except httpx.HTTPError as e:
359+
_fetch_problem("audit log", e)
360+
331361
async def fetch_full_org(self, org_login: str) -> types.GithubOrg | None:
332362
"""Fetches the complete org record.
333363
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
- implementation: nodestream_github:GithubAuditLogExtractor
2+
arguments:
3+
github_hostname: !config 'github_hostname'
4+
auth_token: !config 'auth_token'
5+
user_agent: !config 'user_agent'
6+
enterprise_name: 'test-enterprise'
7+
actions:
8+
- protected_branch.create
9+
- repo.download_zip
10+
lookback_period:
11+
days: 1
12+
13+
- implementation: nodestream.interpreting:Interpreter
14+
arguments:
15+
interpretations:
16+
- type: source_node
17+
node_type: BranchProtectionPolicyChange
18+
key:
19+
timestamp: !jmespath 'timestamp'
20+
actor: !jmespath 'actor'
21+
action: !jmespath 'action'
22+
- type: properties
23+
properties:
24+
org: !jmespath 'org'
25+
repo: !jmespath 'repo'
26+
created_at: !jmespath 'created_at'

nodestream_github/types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .github import (
2+
GithubAuditLog,
23
GithubOrg,
34
GithubOrgSummary,
45
GithubRepo,
@@ -23,6 +24,7 @@
2324
"GithubRepo",
2425
"GithubTeam",
2526
"GithubTeamSummary",
27+
"GithubAuditLog",
2628
"GithubUser",
2729
"HeaderTypes",
2830
"JSONType",

nodestream_github/types/github.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
Webhook: TypeAlias = JSONType
1212
GithubTeam: TypeAlias = JSONType
1313
GithubTeamSummary: TypeAlias = JSONType
14+
GithubAuditLog: TypeAlias = JSONType
1415

1516
LanguageRecord: TypeAlias = JSONType
1617
OrgRecord: TypeAlias = JSONType

tests/data/audit.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
GITHUB_AUDIT = [
2+
{
3+
"@timestamp": 1606929874512,
4+
"action": "team.add_member",
5+
"actor": "octocat",
6+
"created_at": 1606929874512,
7+
"_document_id": "xJJFlFOhQ6b-5vaAFy9Rjw",
8+
"org": "octo-corp",
9+
"team": "octo-corp/example-team",
10+
"user": "monalisa",
11+
},
12+
{
13+
"@timestamp": 1606507117008,
14+
"action": "org.create",
15+
"actor": "octocat",
16+
"created_at": 1606507117008,
17+
"_document_id": "Vqvg6kZ4MYqwWRKFDzlMoQ",
18+
"org": "octocat-test-org",
19+
},
20+
{
21+
"@timestamp": 1605719148837,
22+
"action": "repo.destroy",
23+
"actor": "monalisa",
24+
"created_at": 1605719148837,
25+
"_document_id": "LwW2vpJZCDS-WUmo9Z-ifw",
26+
"org": "mona-org",
27+
"repo": "mona-org/mona-test-repo",
28+
"visibility": "private",
29+
},
30+
]
31+
32+
33+
GITHUB_EXPECTED_OUTPUT = [
34+
{
35+
"timestamp": 1606929874512,
36+
"action": "team.add_member",
37+
"actor": "octocat",
38+
"created_at": 1606929874512,
39+
"_document_id": "xJJFlFOhQ6b-5vaAFy9Rjw",
40+
"org": "octo-corp",
41+
"team": "octo-corp/example-team",
42+
"user": "monalisa",
43+
},
44+
{
45+
"timestamp": 1606507117008,
46+
"action": "org.create",
47+
"actor": "octocat",
48+
"created_at": 1606507117008,
49+
"_document_id": "Vqvg6kZ4MYqwWRKFDzlMoQ",
50+
"org": "octocat-test-org",
51+
},
52+
{
53+
"timestamp": 1605719148837,
54+
"action": "repo.destroy",
55+
"actor": "monalisa",
56+
"created_at": 1605719148837,
57+
"_document_id": "LwW2vpJZCDS-WUmo9Z-ifw",
58+
"org": "mona-org",
59+
"repo": "mona-org/mona-test-repo",
60+
"visibility": "private",
61+
},
62+
]

tests/mocks/githubrest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,10 @@ def get_repos_for_user(
189189
url=f"{self.base_url}/users/{user_login}/repos?per_page=100&{type_param}",
190190
**kwargs,
191191
)
192+
193+
def get_enterprise_audit_logs(self, **kwargs: dict[str, Any]):
194+
url = (
195+
f"{self.base_url}/enterprises/test-enterprise"
196+
f"/audit-log?per_page=100&phrase=action:protected_branch.create"
197+
)
198+
self.add_response(url=url, **kwargs)

tests/test_audit.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import pytest
2+
3+
from nodestream_github import GithubAuditLogExtractor
4+
from tests.data.audit import GITHUB_AUDIT, GITHUB_EXPECTED_OUTPUT
5+
from tests.mocks.githubrest import (
6+
DEFAULT_HOSTNAME,
7+
DEFAULT_PER_PAGE,
8+
GithubHttpxMock,
9+
)
10+
11+
12+
@pytest.fixture
13+
def audit_extractor() -> GithubAuditLogExtractor:
14+
return GithubAuditLogExtractor(
15+
auth_token="test-token",
16+
github_hostname=DEFAULT_HOSTNAME,
17+
user_agent="test-agent",
18+
max_retries=0,
19+
per_page=DEFAULT_PER_PAGE,
20+
enterprise_name="test-enterprise",
21+
actions=["protected_branch.create"],
22+
)
23+
24+
25+
@pytest.mark.asyncio
26+
async def test_get_audit(
27+
audit_extractor: GithubAuditLogExtractor, gh_rest_mock: GithubHttpxMock
28+
):
29+
gh_rest_mock.get_enterprise_audit_logs(status_code=200, json=GITHUB_AUDIT)
30+
31+
all_records = [record async for record in audit_extractor.extract_records()]
32+
assert all_records == GITHUB_EXPECTED_OUTPUT

0 commit comments

Comments
 (0)