1
- """
2
- This tools find all users in multiple organizations and print their last activity date.
1
+ """GitHub Organization Activity Tracker
2
+
3
+ This module tracks and reports the last activity of members across GitHub organizations.
4
+ It implements disk-based caching to minimize API requests and respect rate limits.
3
5
"""
4
6
5
7
import os
13
15
import diskcache
14
16
import json
15
17
import pathlib
16
- from typing import Optional
18
+ from typing import Optional , List , Dict
19
+ import argparse
17
20
18
21
orgs = [
19
22
"binder-examples" ,
48
51
CACHE_DIR = "github_cache"
49
52
cache = diskcache .Cache (CACHE_DIR )
50
53
51
- async def get_org_members (session : aiohttp .ClientSession , org : str ) -> list [dict ]:
52
- """Get all members for an organization with persistent caching"""
54
+ async def get_org_members (session : aiohttp .ClientSession , org : str ) -> List [Dict ]:
55
+ """Fetch all members of a GitHub organization with caching.
56
+
57
+ Parameters
58
+ ----------
59
+ session : aiohttp.ClientSession
60
+ The HTTP session to use for requests
61
+ org : str
62
+ The name of the GitHub organization
63
+
64
+ Returns
65
+ -------
66
+ List[Dict]
67
+ A list of dictionaries containing member information.
68
+ Each dictionary contains at least:
69
+ - 'login': str, the username
70
+ - 'id': int, the user ID
71
+ - 'type': str, usually 'User'
72
+
73
+ Notes
74
+ -----
75
+ Results are cached for 24 hours to minimize API requests.
76
+ Pagination is handled automatically (100 items per page).
77
+ """
53
78
cache_key = f"org_members_{ org } "
54
79
if cache_key in cache :
55
80
return cache [cache_key ]
@@ -68,11 +93,30 @@ async def get_org_members(session: aiohttp.ClientSession, org: str) -> list[dict
68
93
69
94
members .extend (page_members )
70
95
71
- cache .set (cache_key , members , expire = 3600 * 24 ) # Cache for 24 hours
96
+ cache .set (cache_key , members , expire = 3600 * 24 )
72
97
return members
73
98
74
99
async def get_user_activity (session : aiohttp .ClientSession , username : str ) -> Optional [datetime ]:
75
- """Get the last activity date for a user with persistent caching"""
100
+ """Fetch the last public activity date for a GitHub user.
101
+
102
+ Parameters
103
+ ----------
104
+ session : aiohttp.ClientSession
105
+ The HTTP session to use for requests
106
+ username : str
107
+ The GitHub username to check
108
+
109
+ Returns
110
+ -------
111
+ Optional[datetime]
112
+ The datetime of the user's last public activity,
113
+ or None if no activity was found or an error occurred
114
+
115
+ Notes
116
+ -----
117
+ Results are cached for 24 hours to minimize API requests.
118
+ Only public events are considered for activity tracking.
119
+ """
76
120
cache_key = f"user_activity_{ username } "
77
121
if cache_key in cache :
78
122
return cache [cache_key ]
@@ -83,14 +127,42 @@ async def get_user_activity(session: aiohttp.ClientSession, username: str) -> Op
83
127
events = await response .json ()
84
128
if events :
85
129
last_activity = datetime .fromisoformat (events [0 ]["created_at" ].replace ('Z' , '+00:00' ))
86
- cache .set (cache_key , last_activity , expire = 3600 * 24 ) # Cache for 24 hours
130
+ cache .set (cache_key , last_activity , expire = 3600 * 24 )
87
131
return last_activity
88
132
return None
89
133
134
+ def clear_cache () -> None :
135
+ """Clear the disk cache.
136
+
137
+ Removes all cached data, forcing fresh API requests on next run.
138
+
139
+ Notes
140
+ -----
141
+ This is useful when you want to ensure you're getting the latest data
142
+ or if the cache becomes corrupted.
143
+ """
144
+ if pathlib .Path (CACHE_DIR ).exists ():
145
+ cache .clear ()
146
+ print ("[green]Cache cleared successfully[/green]" )
147
+ else :
148
+ print ("[yellow]No cache directory found[/yellow]" )
149
+
90
150
async def main ():
151
+ """Main execution function.
152
+
153
+ Fetches and displays the last activity for all members across specified organizations.
154
+ Uses disk caching to minimize API requests and handles GitHub API rate limits.
155
+
156
+ Notes
157
+ -----
158
+ The results are displayed organization by organization, with members sorted
159
+ by their last activity date (most recent first).
160
+ """
91
161
# Add cache info at start
92
- if pathlib .Path (CACHE_DIR ).exists ():
93
- print (f"[blue]Using cache directory: { CACHE_DIR } [/blue]" )
162
+ cache_path = pathlib .Path (CACHE_DIR )
163
+ if cache_path .exists ():
164
+ cache_size = sum (f .stat ().st_size for f in cache_path .rglob ('*' ) if f .is_file ())
165
+ print (f"[blue]Using cache directory: { CACHE_DIR } ({ cache_size / 1024 / 1024 :.1f} MB)[/blue]" )
94
166
else :
95
167
print ("[yellow]Creating new cache directory[/yellow]" )
96
168
@@ -139,4 +211,11 @@ async def main():
139
211
print (f"{ username :<20} : Last activity { last_activity_ago } in orgs: { orgs_str } " )
140
212
141
213
if __name__ == "__main__" :
214
+ parser = argparse .ArgumentParser (description = "GitHub Organization Activity Tracker" )
215
+ parser .add_argument ('--clear-cache' , action = 'store_true' , help = 'Clear the cache before running' )
216
+ args = parser .parse_args ()
217
+
218
+ if args .clear_cache :
219
+ clear_cache ()
220
+
142
221
asyncio .run (main ())
0 commit comments