Skip to content

Commit 3dc674e

Browse files
authored
Merge pull request #25 from nodestream-proj/audit-log-bucketing
Adding functionality to bucket the audit log requests
2 parents 2ee2817 + 06be85f commit 3dc674e

File tree

5 files changed

+251
-105
lines changed

5 files changed

+251
-105
lines changed

nodestream_github/audit.py

Lines changed: 87 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
"""
77

88
from collections.abc import AsyncGenerator
9+
from datetime import UTC, datetime, timedelta
910
from typing import Any
1011

12+
from dateutil.relativedelta import relativedelta
1113
from nodestream.pipeline import Extractor
1214

1315
from .client import GithubRestApiClient
@@ -17,6 +19,77 @@
1719
logger = get_plugin_logger(__name__)
1820

1921

22+
def generate_date_range(lookback_period: dict[str, int]) -> list[str]:
23+
"""
24+
Generate a list of date strings in YYYY-MM-DD format for
25+
the given lookback period.
26+
"""
27+
if not lookback_period:
28+
return []
29+
30+
end_date = datetime.now(tz=UTC).date()
31+
start_date = (datetime.now(tz=UTC) - relativedelta(**lookback_period)).date()
32+
33+
delta_days = (end_date - start_date).days + 1
34+
return [
35+
(start_date + timedelta(days=i)).strftime("%Y-%m-%d") for i in range(delta_days)
36+
]
37+
38+
39+
def build_search_phrase(
40+
actions: list[str],
41+
actors: list[str],
42+
exclude_actors: list[str],
43+
target_date: str | None = None,
44+
) -> str:
45+
# adding action-based filtering
46+
actions_phrase = ""
47+
if actions:
48+
actions_phrase = " ".join(f"action:{action}" for action in actions)
49+
50+
# adding date-based filtering for a specific date
51+
date_filter = f"created:{target_date}" if target_date else ""
52+
53+
# adding actor-based filtering
54+
actors_phrase = ""
55+
if actors:
56+
actors_phrase = " ".join(f"actor:{actor}" for actor in actors)
57+
58+
# adding exclude_actors based filtering
59+
exclude_actors_phrase = ""
60+
if exclude_actors:
61+
exclude_actors_phrase = " ".join(f"-actor:{actor}" for actor in exclude_actors)
62+
return " ".join(
63+
section
64+
for section in [
65+
actions_phrase,
66+
date_filter,
67+
actors_phrase,
68+
exclude_actors_phrase,
69+
]
70+
if section
71+
).strip()
72+
73+
74+
def validate_lookback_period(lookback_period: dict[str, int]) -> dict[str, int]:
75+
"""Sanitize the lookback period to only include valid keys."""
76+
77+
def validate_positive_int(value: int) -> int:
78+
converted = int(value)
79+
if converted <= 0:
80+
negative_value_exception_msg = (
81+
f"Lookback period values must be positive: {value}"
82+
)
83+
raise ValueError(negative_value_exception_msg)
84+
return converted
85+
86+
try:
87+
return {k: validate_positive_int(v) for k, v in lookback_period.items()}
88+
except Exception as e:
89+
exception_msg = "Formatting lookback period failed"
90+
raise ValueError(exception_msg) from e
91+
92+
2093
class GithubAuditLogExtractor(Extractor):
2194
"""
2295
Extracts audit logs from the GitHub REST API.
@@ -46,12 +119,17 @@ def __init__(
46119
self.exclude_actors = exclude_actors
47120

48121
async def extract_records(self) -> AsyncGenerator[GithubAuditLog]:
49-
async for audit in self.client.fetch_enterprise_audit_log(
50-
self.enterprise_name,
51-
self.actions,
52-
self.actors,
53-
self.exclude_actors,
54-
self.lookback_period,
55-
):
56-
audit["timestamp"] = audit.pop("@timestamp")
57-
yield audit
122+
dates = generate_date_range(self.lookback_period) or [None]
123+
for target_date in dates:
124+
search_phrase = build_search_phrase(
125+
actions=self.actions,
126+
actors=self.actors,
127+
exclude_actors=self.exclude_actors,
128+
target_date=target_date,
129+
)
130+
async for audit in self.client.fetch_enterprise_audit_log(
131+
self.enterprise_name,
132+
search_phrase,
133+
):
134+
audit["timestamp"] = audit.pop("@timestamp")
135+
yield audit

nodestream_github/client/githubclient.py

Lines changed: 2 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@
66
import json
77
import logging
88
from collections.abc import AsyncGenerator
9-
from datetime import UTC, datetime
109
from enum import Enum
1110
from typing import Any
1211

1312
import httpx
14-
from dateutil.relativedelta import relativedelta
1513
from limits import RateLimitItem, RateLimitItemPerMinute
1614
from limits.aio.storage import MemoryStorage
1715
from limits.aio.strategies import MovingWindowRateLimiter, RateLimiter
@@ -74,68 +72,6 @@ def _fetch_problem(title: str, e: httpx.HTTPError):
7472
logger.warning("Problem fetching %s", title, exc_info=e, stacklevel=2)
7573

7674

77-
def validate_lookback_period(lookback_period: dict[str, int]) -> dict[str, int]:
78-
"""Sanitize the lookback period to only include valid keys."""
79-
80-
def validate_positive_int(value: int) -> int:
81-
converted = int(value)
82-
if converted <= 0:
83-
negative_value_exception_msg = (
84-
f"Lookback period values must be positive: {value}"
85-
)
86-
raise ValueError(negative_value_exception_msg)
87-
return converted
88-
89-
try:
90-
return {k: validate_positive_int(v) for k, v in lookback_period.items()}
91-
except Exception as e:
92-
exception_msg = "Formatting lookback period failed"
93-
raise ValueError(exception_msg) from e
94-
95-
96-
def build_search_phrase(
97-
actions: list[str],
98-
actors: list[str],
99-
exclude_actors: list[str],
100-
lookback_period: dict[str, int],
101-
) -> str:
102-
# adding action-based filtering
103-
actions_phrase = ""
104-
if actions:
105-
actions_phrase = " ".join(f"action:{action}" for action in actions)
106-
107-
# adding lookback_period based filtering
108-
date_filter = ""
109-
if lookback_period:
110-
lookback_period = validate_lookback_period(lookback_period)
111-
date_filter = (
112-
f"created:>={(datetime.now(tz=UTC) - relativedelta(**lookback_period))
113-
.strftime('%Y-%m-%d')}"
114-
if lookback_period
115-
else ""
116-
)
117-
118-
# adding actor-based filtering
119-
actors_phrase = ""
120-
if actors:
121-
actors_phrase = " ".join(f"actor:{actor}" for actor in actors)
122-
123-
# adding exclude_actors based filtering
124-
exclude_actors_phrase = ""
125-
if exclude_actors:
126-
exclude_actors_phrase = " ".join(f"-actor:{actor}" for actor in exclude_actors)
127-
return " ".join(
128-
section
129-
for section in [
130-
actions_phrase,
131-
date_filter,
132-
actors_phrase,
133-
exclude_actors_phrase,
134-
]
135-
if section
136-
).strip()
137-
138-
13975
class GithubRestApiClient:
14076
def __init__(
14177
self,
@@ -402,25 +338,13 @@ async def fetch_all_organizations(self) -> AsyncGenerator[types.GithubOrg]:
402338
async def fetch_enterprise_audit_log(
403339
self,
404340
enterprise_name: str,
405-
actions: list[str],
406-
actors: list[str],
407-
exclude_actors: list[str],
408-
lookback_period: dict[str, int],
341+
search_phrase: str | None = None,
409342
) -> AsyncGenerator[types.GithubAuditLog]:
410343
"""Fetches enterprise-wide audit log data
411-
412-
https://docs.github.com/en/enterprise-cloud@latest/rest/enterprise-admin/audit-log?apiVersion=2022-11-28#get-the-audit-log-for-an-enterprise
344+
https://docs.github.com/en/[email protected]/rest/enterprise-admin/audit-log?apiVersion=2022-11-28#get-the-audit-log-for-an-enterprise
413345
"""
414346
try:
415-
search_phrase = build_search_phrase(
416-
actions=actions,
417-
actors=actors,
418-
exclude_actors=exclude_actors,
419-
lookback_period=lookback_period,
420-
)
421-
422347
params = {"phrase": search_phrase} if search_phrase else {}
423-
424348
async for audit in self._get_paginated(
425349
f"enterprises/{enterprise_name}/audit-log", params=params
426350
):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "nodestream-plugin-github"
3-
version = "0.14.1-beta.4"
3+
version = "0.14.1-beta.6"
44
description = ""
55
authors = [
66
"Jon Bristow <[email protected]>",

tests/client/test_githubclient.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
import pytest
33
from pytest_httpx import HTTPXMock
44

5-
from nodestream_github.client.githubclient import GithubRestApiClient, RateLimitedError
5+
from nodestream_github.client.githubclient import (
6+
GithubRestApiClient,
7+
RateLimitedError,
8+
)
69
from tests.mocks.githubrest import DEFAULT_BASE_URL, DEFAULT_HOSTNAME
710

811

0 commit comments

Comments
 (0)