Skip to content

Commit 06824e4

Browse files
committed
init referrer and path stats
1 parent 5f21b69 commit 06824e4

File tree

1 file changed

+179
-0
lines changed

1 file changed

+179
-0
lines changed

src/writegithubstat/githubstat.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
from abc import ABC, abstractmethod
2+
import os
3+
import logging
4+
import requests
5+
from datetime import date
6+
7+
import pandas as pd
8+
9+
10+
class _GithubAuth:
11+
def __init__(self, owner: str, repo: str, token: str) -> None:
12+
self._header = _GithubAuth._get_auth_header(token)
13+
self._repo_id = _GithubAuth._get_repo_id(owner, repo, self._header)
14+
15+
@staticmethod
16+
def _get_auth_header(token: str) -> dict:
17+
auth_header = {
18+
"Authorization": f"token {token}",
19+
"Accept": "application/vnd.github.spiderman-preview+json",
20+
}
21+
return auth_header
22+
23+
@staticmethod
24+
def _get_repo_id(owner: str, repo: str, auth_header: str) -> str:
25+
url = f"https://api.github.com/repos/{owner}/{repo}"
26+
response = requests.get(url, headers=auth_header)
27+
if response.status_code == 200:
28+
repository = response.json()
29+
return repository["id"]
30+
else:
31+
raise requests.HTTPError(
32+
f"Request failed with status code {response.status_code}"
33+
)
34+
35+
@property
36+
def header(self) -> dict:
37+
return self._header
38+
39+
@property
40+
def repo_id(self) -> str:
41+
return self._repo_id
42+
43+
44+
class _GithubStat:
45+
@staticmethod
46+
def _dict_to_dataframe(data) -> pd.DataFrame:
47+
df = pd.DataFrame(data)
48+
return df
49+
50+
@staticmethod
51+
def _get_stat(auth: _GithubAuth, path: str) -> pd.DataFrame:
52+
url = f"https://api.github.com/repositories/{auth.repo_id}/{path}"
53+
response = requests.get(url, headers=auth.header)
54+
if response.status_code == 200:
55+
raw_data = response.json()
56+
data = _GithubStat._dict_to_dataframe(raw_data)
57+
return data
58+
else:
59+
raise requests.HTTPError(
60+
f"Request failed with status code {response.status_code}"
61+
)
62+
63+
64+
class _GithubStatType(ABC):
65+
@property
66+
@abstractmethod
67+
def path(self):
68+
pass
69+
70+
@property
71+
@abstractmethod
72+
def dimensions(self):
73+
pass
74+
75+
@property
76+
@abstractmethod
77+
def measures(self):
78+
pass
79+
80+
@property
81+
@abstractmethod
82+
def name(self):
83+
pass
84+
85+
86+
class Referrers(_GithubStatType):
87+
@property
88+
def path(self):
89+
return "traffic/popular/referrers"
90+
91+
@property
92+
def dimensions(self):
93+
return ["referrer"]
94+
95+
@property
96+
def measures(self):
97+
return ["count", "uniques"]
98+
99+
@property
100+
def name(self):
101+
return "referrers"
102+
103+
104+
class Paths(_GithubStatType):
105+
@property
106+
def path(self):
107+
return "traffic/popular/paths"
108+
109+
@property
110+
def dimensions(self):
111+
return ["path", "title"]
112+
113+
@property
114+
def measures(self):
115+
return ["count", "uniques"]
116+
117+
@property
118+
def name(self):
119+
return "paths"
120+
121+
122+
class WriteGithubStat:
123+
def __init__(self, owner: str, repo: str, token: str) -> None:
124+
self._date = date.today().strftime("%Y-%m-%d")
125+
self._owner = owner
126+
self._repo = repo
127+
self._auth = _GithubAuth(self._owner, self._repo, token)
128+
129+
def write_stats(self, outdir: str, prefix: str) -> None:
130+
os.makedirs(outdir, exist_ok=True)
131+
for stat_type in Referrers(), Paths():
132+
self._write_stat(stat_type, outdir, prefix)
133+
134+
def _write_stat(self, stat_type: _GithubStatType, outdir: str, prefix: str) -> None:
135+
year = self._date[0:4]
136+
csv = f"{outdir}/{year}_{prefix}_githubstat_{stat_type.name}.csv"
137+
stats = self._get_stats(stat_type)
138+
logging.info(stats)
139+
stored_stats = self._get_stored_stats(csv)
140+
merged_stats = self._merge_stats(stored_stats, stats)
141+
merged_stats.to_csv(csv, index=False)
142+
143+
def _get_stats(self, stat_type: _GithubStatType) -> pd.DataFrame:
144+
stat = _GithubStat._get_stat(self._auth, stat_type.path)
145+
if stat.empty:
146+
empty = {
147+
**{col: "-" for col in stat_type.dimensions},
148+
**{col: 0 for col in stat_type.measures},
149+
}
150+
stat = pd.DataFrame([empty])
151+
stat = self._insert_metadata(stat)
152+
return stat
153+
154+
def _insert_metadata(self, df: pd.DataFrame) -> pd.DataFrame:
155+
df.insert(0, "date", self._date)
156+
df.insert(1, "owner", self._owner)
157+
df.insert(2, "repo", self._repo)
158+
return df
159+
160+
def _get_stored_stats(self, path: str) -> pd.DataFrame:
161+
try:
162+
df = pd.read_csv(path)
163+
return df
164+
except FileNotFoundError:
165+
return pd.DataFrame()
166+
167+
def _merge_stats(
168+
self, stored_stats: pd.DataFrame, stats: pd.DataFrame
169+
) -> pd.DataFrame:
170+
if not stored_stats.empty:
171+
stored_stats = stored_stats.drop(
172+
stored_stats[
173+
(stored_stats["date"] == self._date)
174+
& (stored_stats["owner"] == self._owner)
175+
& (stored_stats["repo"] == self._repo)
176+
].index
177+
)
178+
stats = pd.concat([stored_stats, stats])
179+
return stats

0 commit comments

Comments
 (0)