1313from elasticsearch import Elasticsearch
1414from elasticsearch .helpers import bulk
1515from datetime import timedelta
16+ import pprint
1617
1718
1819date_format = '%Y-%m-%d %H:%M:%S'
@@ -21,8 +22,11 @@ def parse_args() -> argparse.Namespace:
2122 parser = argparse .ArgumentParser (
2223 formatter_class = argparse .RawDescriptionHelpFormatter , allow_abbrev = False )
2324
24- parser .add_argument ('--pull-request' , required = True , help = 'pull request number' , type = int )
25- parser .add_argument ('--repo' , required = True , help = 'github repo' )
25+ parser .add_argument ('--pull-request' , help = 'pull request number' , type = int )
26+ parser .add_argument ('--range' , help = 'execute based on a date range, for example 2023-01-01..2023-01-05' )
27+ parser .add_argument ('--repo' , help = 'github repo' , default = 'zephyrproject-rtos/zephyr' )
28+ parser .add_argument ('--es-index' , help = 'Elasticsearch index' )
29+ parser .add_argument ('-y' ,'--dry-run' , action = "store_true" , help = 'dry run, do not upload data' )
2630
2731 return parser .parse_args ()
2832
@@ -33,6 +37,90 @@ def gendata(data, index):
3337 "_source" : t
3438 }
3539
40+ def process_pr (pr ):
41+ reviews = pr .get_reviews ()
42+ print (f'#{ pr .number } : { pr .title } - { pr .comments } Comments, reviews: { reviews .totalCount } , { len (pr .assignees )} Assignees (Updated { pr .updated_at } )' )
43+ assignee_reviews = 0
44+ prj = {}
45+
46+ assignees = []
47+ labels = []
48+ for label in pr .labels :
49+ labels .append (label .name )
50+
51+ reviewers = set ()
52+ for review in reviews :
53+ # get list of all approved reviews
54+ if review .user and review .state == 'APPROVED' :
55+ reviewers .add (review .user .login )
56+
57+ for assignee in pr .assignees :
58+ # list assignees for later checks
59+ assignees .append (assignee .login )
60+ if assignee .login in reviewers :
61+ assignee_reviews += 1
62+
63+ if assignee_reviews > 0 or pr .merged_by .login in assignees :
64+ # in case of assignee reviews or if PR was merged by an assignee
65+ prj ['review_rule' ] = "yes"
66+ elif not pr .assignees or \
67+ (pr .user .login in assignees and len (assignees ) == 1 ) or \
68+ ('Trivial' in labels or 'Hotfix' in labels ):
69+ # in case where no assignees set or if submitter is the only assignee
70+ # or in case of trivial or hotfixes
71+ prj ['review_rule' ] = "na"
72+ else :
73+ # everything else
74+ prj ['review_rule' ] = "no"
75+
76+
77+ # calculate time the PR was in review, hours and business days.
78+ delta = pr .closed_at - pr .created_at
79+ deltah = delta .total_seconds () / 3600
80+ prj ['hours_open' ] = deltah
81+
82+ dates = (pr .created_at + timedelta (idx + 1 ) for idx in range ((pr .closed_at - pr .created_at ).days ))
83+
84+ # Get number of business days per the guidelines, we need at least 2.
85+ business_days = sum (1 for day in dates if day .weekday () < 5 )
86+ prj ['business_days_open' ] = business_days
87+
88+ # less than 2 business days ...
89+ if business_days < 2 and not ('Trivial' in labels or 'Hotfix' in labels ) or \
90+ deltah < 4 and 'Trivial' in labels :
91+ prj ['time_rule' ] = "no"
92+ else :
93+ prj ['time_rule' ] = "yes"
94+
95+ # This is all data we get easily though the Github API and serves as the basis
96+ # for displaying some trends and metrics.
97+ # Data can be extended in the future if we find more information that
98+ # is useful through the API
99+
100+ prj ['nr' ] = pr .number
101+ prj ['url' ] = pr .url
102+ prj ['title' ] = pr .title
103+ prj ['comments' ] = pr .comments
104+ prj ['reviews' ] = reviews .totalCount
105+ prj ['assignees' ] = assignees
106+ prj ['updated' ] = pr .updated_at .strftime ("%Y-%m-%d %H:%M:%S" )
107+ prj ['created' ] = pr .created_at .strftime ("%Y-%m-%d %H:%M:%S" )
108+ prj ['closed' ] = pr .closed_at .strftime ("%Y-%m-%d %H:%M:%S" )
109+ prj ['merged_by' ] = pr .merged_by .login
110+ prj ['submitted_by' ] = pr .user .login
111+ prj ['changed_files' ] = pr .changed_files
112+ prj ['additions' ] = pr .additions
113+ prj ['deletions' ] = pr .deletions
114+ prj ['commits' ] = pr .commits
115+ # The branch we are targeting. main vs release branches.
116+ prj ['base' ] = pr .base .ref
117+
118+ # list all reviewers
119+ prj ['reviewers' ] = list (reviewers )
120+ prj ['labels' ] = labels
121+
122+ return prj
123+
36124def main ():
37125 args = parse_args ()
38126 token = os .environ .get ('GITHUB_TOKEN' )
@@ -46,112 +134,35 @@ def main():
46134
47135 if args .pull_request :
48136 pr = gh_repo .get_pull (args .pull_request )
49-
50- reviews = pr .get_reviews ()
51- print (f'#{ pr .number } : { pr .title } - { pr .comments } Comments, reviews: { reviews .totalCount } , { len (pr .assignees )} Assignees (Updated { pr .updated_at } )' )
52- assignee_reviews = 0
53- reviewers = set ()
54- prj = {}
55- for r in reviews :
56- if r .user and r .state == 'APPROVED' :
57- reviewers .add (r .user .login )
58- if pr .assignees and r .user :
59- for assignee in pr .assignees :
60- if r .user .login == assignee .login :
61- assignee_reviews = assignee_reviews + 1
62- # was reviewed at least by one assignee
63- prj ['reviewed_by_assignee' ] = "yes"
64-
65- # This is all data we get easily though the Github API and serves as the basis
66- # for displaying some trends and metrics.
67- # Data can be extended in the future if we find more information that
68- # is useful through the API
69-
70- prj ['nr' ] = pr .number
71- prj ['url' ] = pr .url
72- prj ['title' ] = pr .title
73- prj ['comments' ] = pr .comments
74- prj ['reviews' ] = reviews .totalCount
75- prj ['assignees' ] = len (pr .assignees )
76- prj ['updated' ] = pr .updated_at .strftime ("%Y-%m-%d %H:%M:%S" )
77- prj ['created' ] = pr .created_at .strftime ("%Y-%m-%d %H:%M:%S" )
78- prj ['closed' ] = pr .closed_at .strftime ("%Y-%m-%d %H:%M:%S" )
79- prj ['merged_by' ] = pr .merged_by .login
80- prj ['submitted_by' ] = pr .user .login
81- prj ['changed_files' ] = pr .changed_files
82- prj ['additions' ] = pr .additions
83- prj ['deletions' ] = pr .deletions
84- prj ['commits' ] = pr .commits
85- # The branch we are targeting. main vs release branches.
86- prj ['base' ] = pr .base .ref
87-
88- ll = []
89- for l in pr .labels :
90- ll .append (l .name )
91- prj ['labels' ] = ll
92-
93- # take first assignee, otherwise we have no assignees and this rule is not applicable
94- if pr .assignee :
95- prj ['assignee' ] = pr .assignee .login
96- else :
97- prj ['assignee' ] = "none"
98- prj ['reviewed_by_assignee' ] = "na"
99- prj ['review_rule' ] = "na"
100-
101- # go through all assignees and check if anyone has approved and reset assignee to the one who approved
102- for assignee in pr .assignees :
103- if assignee .login in reviewers :
104- prj ['assignee' ] = assignee .login
105- elif assignee .login == pr .user .login :
106- prj ['reviewed_by_assignee' ] = "yes"
107-
108-
109- # list assignees for later checks
110- assignees = [a .login for a in pr .assignees ]
111-
112- # Deal with exceptions when assignee approval is not needed.
113- if 'Trivial' in ll or 'Hotfix' in ll :
114- prj ['review_rule' ] = "yes"
115- elif pr .merged_by .login in assignees :
116- prj ['review_rule' ] = "yes"
117- else :
118- prj ['review_rule' ] = "no"
119-
120- prj ['assignee_reviews' ] = assignee_reviews
121-
122- delta = pr .closed_at - pr .created_at
123- deltah = delta .total_seconds () / 3600
124- prj ['hours_open' ] = deltah
125-
126- dates = (pr .created_at + timedelta (idx + 1 ) for idx in range ((pr .closed_at - pr .created_at ).days ))
127-
128- # Get number of business days per the guidelines, we need at least 2.
129- res = sum (1 for day in dates if day .weekday () < 5 )
130-
131- if res < 2 and not ('Trivial' in ll or 'Hotfix' in ll ):
132- prj ['time_rule' ] = False
133- elif deltah < 4 and 'Trivial' in ll :
134- prj ['time_rule' ] = False
135- else :
136- prj ['time_rule' ] = True
137- prj ['reviewers' ] = list (reviewers )
138-
137+ prj = process_pr (pr )
139138 json_list .append (prj )
140-
141-
142- # Send data over to elasticsearch.
143- es = Elasticsearch (
144- [os .environ ['ELASTICSEARCH_SERVER' ]],
145- api_key = os .environ ['ELASTICSEARCH_KEY' ],
146- verify_certs = False
147- )
148-
149- try :
150- index = os .environ ['PR_STAT_ES_INDEX' ]
151- bulk (es , gendata (json_list , index ))
152- except KeyError as e :
153- print (f"Error: { e } not set." )
154- print (json_list )
139+ elif args .range :
140+ query = f'repo:{ args .repo } merged:{ args .range } is:pr is:closed sort:updated-desc base:main'
141+ prs = gh .search_issues (query = f'{ query } ' )
142+ for _pr in prs :
143+ pr = gh_repo .get_pull (_pr .number )
144+ prj = process_pr (pr )
145+ json_list .append (prj )
146+
147+ if json_list and not args .dry_run :
148+ # Send data over to elasticsearch.
149+ es = Elasticsearch (
150+ [os .environ ['ELASTICSEARCH_SERVER' ]],
151+ api_key = os .environ ['ELASTICSEARCH_KEY' ],
152+ verify_certs = False
153+ )
154+
155+ try :
156+ if args .es_index :
157+ index = args .es_index
158+ else :
159+ index = os .environ ['PR_STAT_ES_INDEX' ]
160+ bulk (es , gendata (json_list , index ))
161+ except KeyError as e :
162+ print (f"Error: { e } not set." )
163+ print (json_list )
164+ if args .dry_run :
165+ pprint .pprint (json_list )
155166
156167if __name__ == "__main__" :
157168 main ()
0 commit comments