-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathanalytics.py
More file actions
81 lines (60 loc) · 2.98 KB
/
analytics.py
File metadata and controls
81 lines (60 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
This model produces insights from the data. Not related to any simulation task.
"""
import simdata
import datetime
import pandas as pd
def get_bucket_definition(reporting_start, reporting_end, bucket_days):
"""
Divides a date range in buckets.
:param reporting_start: Start of the date range.
:param reporting_end: End of the date range.
:param bucket_days: Size of the bucket in days.
:return: Two lists: One for bucket start and the other for bucket end.
"""
bucket_start = reporting_start
bucket_end = reporting_start
bucket_start_list = []
bucket_end_list = []
while bucket_end <= reporting_end:
bucket_start_list.append(bucket_start)
bucket_end = bucket_start + datetime.timedelta(days=bucket_days)
bucket_end_list.append(bucket_end)
bucket_start = bucket_end
return bucket_start_list, bucket_end_list
def get_project_history(bucket_start_list, bucket_end_list, project_issues):
"""
According to a bucket definition, it returns the number of issues that got reported on each specific buket
:param bucket_start_list: List containing the start dates per bucket.
:param bucket_end_list: List containing the end dates per bucket.
:param project_issues: List of project issues.
:return:
"""
return [len(simdata.filter_by_create_date(project_issues, bucket_start, bucket_end, True)) for
bucket_start, bucket_end in
zip(bucket_start_list, bucket_end_list)]
def run_project_analysis(project_keys, issues_in_range):
"""
Gathers project-related metrics
:param project_keys: List of keys of the projects to analyse.
:param issues_in_range: Dataframe with the issues.
:return: None
"""
bucket_days = 30
bucket_start_list, bucket_end_list = get_bucket_definition(issues_in_range[simdata.CREATED_DATE_COLUMN].min(),
issues_in_range[simdata.CREATED_DATE_COLUMN].max(),
bucket_days)
history_dataframe = pd.DataFrame({'bucket_start_list': bucket_start_list,
'bucket_end_list': bucket_end_list})
for project_key in project_keys:
project_issues = simdata.filter_by_project(issues_in_range, [project_key])
issues = len(project_issues.index)
reporting_start = project_issues[simdata.CREATED_DATE_COLUMN].min()
reporting_end = project_issues[simdata.CREATED_DATE_COLUMN].max()
print "Project ", project_key, ": Issues ", issues, " Reporting Start: ", reporting_start, " Reporting End: ", \
reporting_end
history_dataframe[project_key] = get_project_history(bucket_start_list, bucket_end_list, project_issues)
print "Total issues: ", len(issues_in_range)
history_file = "csv/" + "_".join(project_keys) + "_project_report_history.csv"
print "Saving reporting history to ", history_file
history_dataframe.to_csv(history_file)