|
| 1 | +# This python script accesses the GitHub API to extract the |
| 2 | +# number of views and unique visitors for all public repos. |
| 3 | +# Any public repo can be excluded by adding its name to the |
| 4 | +# exclude_repos list in the script. Once the data has been |
| 5 | +# extracted it will append to the traffic.csv file that is |
| 6 | +# located in the data directory of the main branch. This file |
| 7 | +# is scheduled to run once a day at 12:45am using cron. The |
| 8 | +# scheduling code is located in .github/workflows/traffic.yml |
| 9 | + |
| 10 | +import os |
| 11 | +import requests |
| 12 | +import csv |
| 13 | +from datetime import datetime, timedelta |
| 14 | +from github import Github |
| 15 | + |
| 16 | +def fetch_visitor_stats(repo_url, headers): |
| 17 | + visitors_url = f"{repo_url}/traffic/views" |
| 18 | + visitors_response = requests.get(visitors_url, headers=headers) |
| 19 | + if visitors_response.status_code == 200: |
| 20 | + visitor_stats = visitors_response.json() |
| 21 | + return visitor_stats['views'] |
| 22 | + else: |
| 23 | + raise Exception(f"Failed to fetch Visitor stats for repository. Error: {visitors_response.status_code}") |
| 24 | + |
| 25 | +def fetch_git_clones_stats(repo_url, headers): |
| 26 | + clones_url = f"{repo_url}/traffic/clones" |
| 27 | + clones_response = requests.get(clones_url, headers=headers) |
| 28 | + if clones_response.status_code == 200: |
| 29 | + clone_stats = clones_response.json() |
| 30 | + return clone_stats['clones'] |
| 31 | + else: |
| 32 | + raise Exception(f"Failed to fetch Git Clones stats for repository. Error: {clones_response.status_code}") |
| 33 | + |
| 34 | +def update_csv_file(repo, file_path, data): |
| 35 | + file_contents = repo.get_contents(file_path) |
| 36 | + existing_data = file_contents.decoded_content.decode().splitlines() |
| 37 | + updated_data = existing_data + data |
| 38 | + |
| 39 | + updated_file_contents = '\n'.join(updated_data).encode() |
| 40 | + |
| 41 | + repo.update_file(file_path, "Appending data to CSV", updated_file_contents, file_contents.sha) |
| 42 | + |
| 43 | +def main(): |
| 44 | + organization = "analyticsinmotion" |
| 45 | + token = os.environ.get('TOKEN') |
| 46 | + base_url = "https://api.github.com" |
| 47 | + repos_url = f"{base_url}/orgs/{organization}/repos" |
| 48 | + |
| 49 | + headers = { |
| 50 | + "Authorization": f"Token {token}", |
| 51 | + "Accept": "application/vnd.github.v3+json" |
| 52 | + } |
| 53 | + |
| 54 | + response = requests.get(repos_url, headers=headers) |
| 55 | + if response.status_code == 200: |
| 56 | + output = response.json() |
| 57 | + else: |
| 58 | + print(f"Failed to fetch organization repositories. Error: {response.status_code}") |
| 59 | + return |
| 60 | + |
| 61 | + data = [] |
| 62 | + exclude_repos = ["discussions", ".github"] |
| 63 | + |
| 64 | + for repo in output: |
| 65 | + repo_name = repo["name"] |
| 66 | + repo_url = repo["url"] |
| 67 | + repo_private = repo["private"] |
| 68 | + |
| 69 | + if repo_name in exclude_repos or repo_private: |
| 70 | + continue |
| 71 | + |
| 72 | + try: |
| 73 | + visitor_stats = fetch_visitor_stats(repo_url, headers) |
| 74 | + git_clones_stats = fetch_git_clones_stats(repo_url, headers) |
| 75 | + |
| 76 | + check_visitor_dates_list = [] |
| 77 | + check_git_clones_dates_list = [] |
| 78 | + |
| 79 | + today = datetime.now().date() |
| 80 | + yesterday = today - timedelta(days=1) |
| 81 | + |
| 82 | + for views_stats in visitor_stats: |
| 83 | + views_date_object = datetime.strptime(views_stats['timestamp'], '%Y-%m-%dT%H:%M:%SZ').date() |
| 84 | + check_visitor_dates_list.append(views_date_object) |
| 85 | + if views_date_object == yesterday: |
| 86 | + date_only = yesterday |
| 87 | + views = views_stats['count'] |
| 88 | + unique_visitors = views_stats['uniques'] |
| 89 | + |
| 90 | + if yesterday not in check_visitor_dates_list: |
| 91 | + date_only = yesterday |
| 92 | + views = 0 |
| 93 | + unique_visitors = 0 |
| 94 | + |
| 95 | + |
| 96 | + for clones_stats in git_clones_stats: |
| 97 | + clones_date_object = datetime.strptime(clones_stats['timestamp'], '%Y-%m-%dT%H:%M:%SZ').date() |
| 98 | + check_git_clones_dates_list.append(clones_date_object) |
| 99 | + |
| 100 | + if clones_date_object == yesterday: |
| 101 | + date_only = yesterday |
| 102 | + clones = clones_stats['count'] |
| 103 | + unique_cloners = clones_stats['uniques'] |
| 104 | + |
| 105 | + if yesterday not in check_git_clones_dates_list: |
| 106 | + date_only = yesterday |
| 107 | + clones = 0 |
| 108 | + unique_cloners = 0 |
| 109 | + |
| 110 | + |
| 111 | + result = f"{date_only},{repo_name},{views},{unique_visitors},{clones},{unique_cloners}" |
| 112 | + data.append(result) |
| 113 | + except Exception as e: |
| 114 | + print(f"Failed to fetch traffic stats for repository '{repo_name}': {str(e)}") |
| 115 | + |
| 116 | + print(data) |
| 117 | + |
| 118 | + g = Github(os.getenv('TOKEN')) |
| 119 | + repo = g.get_repo('analyticsinmotion/github-stats') |
| 120 | + file_path = 'data/traffic.csv' |
| 121 | + |
| 122 | + try: |
| 123 | + update_csv_file(repo, file_path, data) |
| 124 | + print("Data appended to CSV file successfully.") |
| 125 | + except Exception as e: |
| 126 | + print(f"Failed to update CSV file: {str(e)}") |
| 127 | + |
| 128 | +if __name__ == "__main__": |
| 129 | + main() |
0 commit comments