Skip to content

Commit 690695d

Browse files
Graphql integration (concept) (#152)
* add log_repositories_pr_by_graphql logic * update qraphql arg-flag * increase sleep delta for graphql * handle GraphQL errors (add retring)
1 parent 8d04875 commit 690695d

File tree

4 files changed

+254
-37
lines changed

4 files changed

+254
-37
lines changed

main.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def parse_args():
2121
parser.add_argument(
2222
"-p", "--pull_requests", help="log pull requests", action="store_true"
2323
)
24+
parser.add_argument(
25+
"--graphql", help="use graphql for requesting data (work only with --pull_requests) ", action="store_true"
26+
)
2427
parser.add_argument("-i", "--issues", help="log issues", action="store_true")
2528
parser.add_argument("-w", "--wikis", help="log wikis", action="store_true")
2629
parser.add_argument("--contributors", help="log contributors", action="store_true")
@@ -150,14 +153,20 @@ def run(args, binded_repos, repos_for_wiki=None):
150153
binded_repos, args.out, start, finish, args.branch, args.forks_include
151154
)
152155
if args.pull_requests:
153-
pull_requests_parser.log_pull_requests(
154-
binded_repos,
155-
args.out,
156-
start,
157-
finish,
158-
args.forks_include,
159-
args.pr_comments,
160-
)
156+
if args.graphql:
157+
pull_requests_parser.log_pull_requests_by_graphql(
158+
binded_repos=binded_repos,
159+
csv_name=args.out
160+
)
161+
else:
162+
pull_requests_parser.log_pull_requests(
163+
binded_repos,
164+
args.out,
165+
start,
166+
finish,
167+
args.forks_include,
168+
args.pr_comments,
169+
)
161170
if args.issues:
162171
issues_parser.log_issues(
163172
binded_repos, args.out, start, finish, args.forks_include, args.base_url,

src/graphql/pull_request_parser.py

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
from dataclasses import asdict
2+
from typing import Generator
3+
from time import sleep
4+
5+
import requests
6+
7+
from src.constants import TIMEDELTA
8+
from src.repo_dataclasses import PullRequestData
9+
from src.interface_wrapper import IRepositoryAPI, Repository
10+
from src.utils import logger
11+
12+
13+
# -----------GraphQLAPI block--------------
14+
15+
def log_repositories_pr_by_graphql(owner, repo_name, token, csv_name, first_n=100):
16+
HEADERS = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
17+
18+
query = """
19+
query GetPRData($owner: String!, $repo: String!, $first: Int!, $after: String) {
20+
repository(owner: $owner, name: $repo) {
21+
nameWithOwner
22+
pullRequests(first: $first, after: $after, states: [OPEN, CLOSED, MERGED], orderBy: {field: CREATED_AT, direction: DESC}) {
23+
totalCount
24+
pageInfo {
25+
hasNextPage
26+
endCursor
27+
}
28+
nodes {
29+
title
30+
number
31+
state
32+
createdAt
33+
34+
author {
35+
login
36+
... on User {
37+
name
38+
email
39+
}
40+
}
41+
42+
baseRef {
43+
name
44+
target {
45+
oid
46+
}
47+
}
48+
49+
headRef {
50+
name
51+
target {
52+
oid
53+
}
54+
}
55+
56+
changedFiles
57+
additions
58+
deletions
59+
60+
mergedAt
61+
mergedBy {
62+
login
63+
... on User {
64+
name
65+
email
66+
}
67+
}
68+
69+
assignees(first: 10) {
70+
nodes {
71+
login
72+
name
73+
}
74+
}
75+
76+
labels(first: 20) {
77+
nodes {
78+
name
79+
color
80+
}
81+
}
82+
}
83+
}
84+
}
85+
}
86+
"""
87+
88+
has_next_page = True
89+
after_cursor = None
90+
processed_count = 0
91+
92+
while has_next_page:
93+
94+
variables = {
95+
"owner": owner,
96+
"repo": repo_name,
97+
"first": first_n,
98+
"after": after_cursor,
99+
}
100+
101+
response = requests.post(
102+
"https://api.github.com/graphql",
103+
headers=HEADERS,
104+
json={"query": query, "variables": variables},
105+
)
106+
107+
if response.status_code != 200:
108+
logger.log_error(f"GraphQL request failed: {response.status_code} - {response.text}")
109+
logger.log_to_stdout(f"Sleep to {100*TIMEDELTA} and retry")
110+
sleep(100*TIMEDELTA)
111+
continue
112+
113+
graphql_data = response.json()
114+
115+
if "errors" in graphql_data:
116+
logger.log_error(f"GraphQL errors: {graphql_data['errors']}")
117+
logger.log_to_stdout(f"Sleep to {100*TIMEDELTA} and retry")
118+
sleep(100*TIMEDELTA)
119+
continue
120+
121+
repo_data = graphql_data["data"]["repository"]
122+
123+
page_info = repo_data["pullRequests"]["pageInfo"]
124+
has_next_page = page_info["hasNextPage"]
125+
after_cursor = page_info["endCursor"]
126+
127+
prs = repo_data["pullRequests"]["nodes"]
128+
129+
processed_count += len(prs)
130+
logger.log_to_stdout(f"Processing {processed_count} / {repo_data["pullRequests"]['totalCount']}")
131+
132+
for pr in prs:
133+
pr_data = PullRequestData(
134+
repository_name=repo_data["nameWithOwner"],
135+
title=pr["title"],
136+
id=pr["number"],
137+
state=str(pr["state"]).lower(),
138+
commit_into=(
139+
pr["baseRef"]["target"]["oid"]
140+
if pr["baseRef"] and pr["baseRef"]["target"]
141+
else None
142+
),
143+
commit_from=(
144+
pr["headRef"]["target"]["oid"]
145+
if pr["headRef"] and pr["headRef"]["target"]
146+
else None
147+
),
148+
created_at=pr["createdAt"],
149+
creator_name=(
150+
pr["author"]["name"]
151+
if pr["author"] and "name" in pr["author"]
152+
else None
153+
),
154+
creator_login=pr["author"]["login"] if pr["author"] else None,
155+
creator_email=(
156+
pr["author"]["email"]
157+
if pr["author"] and "email" in pr["author"]
158+
else None
159+
),
160+
changed_files=pr["changedFiles"],
161+
comment_body=None,
162+
comment_created_at=None,
163+
comment_author_name=None,
164+
comment_author_login=None,
165+
comment_author_email=None,
166+
merger_name=(
167+
pr["mergedBy"]["name"]
168+
if pr["mergedBy"] and "name" in pr["mergedBy"]
169+
else None
170+
),
171+
merger_login=pr["mergedBy"]["login"] if pr["mergedBy"] else None,
172+
merger_email=(
173+
pr["mergedBy"]["email"]
174+
if pr["mergedBy"] and "email" in pr["mergedBy"]
175+
else None
176+
),
177+
source_branch=pr["headRef"]["name"] if pr["headRef"] else None,
178+
target_branch=pr["baseRef"]["name"] if pr["baseRef"] else None,
179+
assignee_story=None,
180+
related_issues=None,
181+
labels=", ".join([label["name"] for label in pr["labels"]["nodes"]]),
182+
milestone=None,
183+
)
184+
185+
pr_info = asdict(pr_data)
186+
logger.log_to_csv(csv_name, list(pr_info.keys()), pr_info)
187+
logger.log_to_stdout(pr_info)
188+
189+
190+
def log_pull_requests_by_graphql(
191+
binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None],
192+
csv_name: str,
193+
):
194+
info = asdict(PullRequestData())
195+
logger.log_to_csv(csv_name, list(info.keys()))
196+
197+
for _, repo, token in binded_repos:
198+
logger.log_title(repo.name)
199+
log_repositories_pr_by_graphql(
200+
owner=repo.owner.login, repo_name=repo.name, csv_name=csv_name, token=token
201+
)
202+
sleep(100*TIMEDELTA)

src/pull_requests_parser.py

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,8 @@
1111
from src.git_logger import get_assignee_story
1212
from src.interface_wrapper import IRepositoryAPI, Repository
1313
from src.utils import logger
14-
15-
16-
@dataclass(kw_only=True, frozen=True)
17-
class PullRequestData:
18-
repository_name: str = ''
19-
title: str = ''
20-
id: int = 0
21-
state: str = ''
22-
commit_into: str = ''
23-
commit_from: str = ''
24-
created_at: str = ''
25-
creator_name: str = ''
26-
creator_login: str = ''
27-
creator_email: str = ''
28-
changed_files: str = ''
29-
comment_body: str = ''
30-
comment_created_at: str = ''
31-
comment_author_name: str = ''
32-
comment_author_login: str = ''
33-
comment_author_email: str = ''
34-
merger_name: str | None = None
35-
merger_login: str | None = None
36-
merger_email: str | None = None
37-
source_branch: str = ''
38-
target_branch: str = ''
39-
assignee_story: str = ''
40-
related_issues: str = ''
41-
labels: str = ''
42-
milestone: str = ''
14+
from src.graphql.pull_request_parser import log_pull_requests_by_graphql # for using in main.py
15+
from src.repo_dataclasses import PullRequestData
4316

4417

4518
def get_related_issues(pull_request_number, repo_owner, repo_name, token):
@@ -106,6 +79,9 @@ def get_info(obj, attr):
10679
return EMPTY_FIELD if obj is None else getattr(obj, attr)
10780

10881

82+
# -----------GithubAPI block--------------
83+
84+
10985
def log_repositories_pr(
11086
client: IRepositoryAPI,
11187
repository: Repository,

src/repo_dataclasses.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from dataclasses import dataclass
2+
3+
4+
@dataclass(kw_only=True, frozen=True)
5+
class PullRequestData:
6+
repository_name: str = ''
7+
title: str = ''
8+
id: int = 0
9+
state: str = ''
10+
commit_into: str = ''
11+
commit_from: str = ''
12+
created_at: str = ''
13+
creator_name: str = ''
14+
creator_login: str = ''
15+
creator_email: str = ''
16+
changed_files: str = ''
17+
comment_body: str = ''
18+
comment_created_at: str = ''
19+
comment_author_name: str = ''
20+
comment_author_login: str = ''
21+
comment_author_email: str = ''
22+
merger_name: str | None = None
23+
merger_login: str | None = None
24+
merger_email: str | None = None
25+
source_branch: str = ''
26+
target_branch: str = ''
27+
assignee_story: str = ''
28+
related_issues: str = ''
29+
labels: str = ''
30+
milestone: str = ''

0 commit comments

Comments
 (0)