Skip to content

Commit 4f9dbdb

Browse files
authored
Merge pull request #3840 from amvanbaren/fix-reports-issues
Reports improvements
2 parents aa34166 + 3111c60 commit 4f9dbdb

File tree

5 files changed

+102
-101
lines changed

5 files changed

+102
-101
lines changed

reports/get_all_extensions.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,9 @@ def get_extension(extension):
4040
while retry_count > 0:
4141
try:
4242
response = requests.get(extension_url)
43-
if response.status_code == 200:
44-
return response.json()
45-
else:
46-
raise Exception('%s: HTTP %s Error retrieving %s' % (datetime.now(), response.status_code,extension['url']))
47-
except Exception as e:
43+
response.raise_for_status()
44+
return response.json()
45+
except requests.exceptions.RequestException as e:
4846
print("%s: %s" % (datetime.now(), e))
4947
retry_count -= 1
5048
time.sleep(2)
@@ -74,11 +72,11 @@ def get_all_by_license():
7472
all_extensions = get_all_extensions()
7573
count = 1
7674
for extension in all_extensions:
77-
license = extension.get('license', 'None')
78-
if license in extensions_by_license:
79-
extensions_by_license[license].append(extension)
75+
license_name = extension.get('license', 'None')
76+
if license_name in extensions_by_license:
77+
extensions_by_license[license_name].append(extension)
8078
else:
81-
extensions_by_license[license] = [extension]
79+
extensions_by_license[license_name] = [extension]
8280
if int(count/100) == count/100:
8381
print('Processed %s of %s.' % (count, len(all_extensions)))
8482
count += 1
@@ -89,7 +87,6 @@ def write_json_file(extensions):
8987
f = open(JSON_FILENAME, 'w')
9088
f.write(json.dumps(extensions, indent=4))
9189
f.close()
92-
return
9390

9491
def write_tsv_file(extensions):
9592
f = open(TSV_FILENAME, 'w')

reports/get_availability_data.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,15 @@ def make_api_call(url):
2222
while not done:
2323
try:
2424
response = requests.get(url, headers=HEADERS)
25-
if response.status_code != 200:
26-
raise Exception('%s HTTP error %s' % (url, response.status_code))
27-
else:
28-
done = True
29-
except Exception as e:
30-
print(" %s, retrying..." % e)
25+
response.raise_for_status()
26+
done = True
27+
except requests.exceptions.RequestException as e:
3128
if retry_count > 0:
29+
print(" %s, retrying..." % e)
3230
time.sleep((6 - retry_count) * 5)
3331
retry_count = retry_count - 1
3432
else:
35-
done = True
36-
raise Exception('Failing call to %s after multiple retries' % url)
33+
raise
3734

3835
return response.json()
3936

@@ -58,7 +55,7 @@ def get_monitor_url(id, start_date, end_date):
5855
return '%s/monitors/%s/sla?from=%s&to=%s' % (API_URL, id, start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))
5956

6057
def get_monitor_data(monitor, time_span):
61-
id = monitor['id']
58+
monitor_id = monitor['id']
6259
name = monitor['attributes']['pronounceable_name']
6360
date_str = monitor['attributes']['created_at']
6461
start_date = datetime.strptime(date_str[0:10], '%Y-%m-%d')
@@ -69,11 +66,11 @@ def get_monitor_data(monitor, time_span):
6966
downtime_data = []
7067
print('processing %s' % name)
7168
while end_date <= today:
72-
availability_url = get_monitor_url(id, start_date, end_date)
69+
availability_url = get_monitor_url(monitor_id, start_date, end_date)
7370
json_results = make_api_call(availability_url)
7471
dates.append(np.datetime64(end_date.strftime('%Y-%m-%d')))
7572
sla_data.append(json_results['data']['attributes']['availability'])
76-
downtime_url = get_monitor_url(id, start_date, start_date)
73+
downtime_url = get_monitor_url(monitor_id, start_date, start_date)
7774
json_results = make_api_call(downtime_url)
7875
downtime_data.append(json_results['data']['attributes']['total_downtime']/60)
7976
start_date = start_date + timedelta(days=1)
@@ -85,15 +82,15 @@ def get_continuous_data(time_span=30):
8582
monitors = get_all_monitors()
8683
results = []
8784
for monitor in monitors:
88-
name, dates, sla_data, downtime_data = get_monitor_data(monitor, time_span=30)
85+
name, dates, sla_data, downtime_data = get_monitor_data(monitor, time_span)
8986
results.append({'name': name,
9087
'dates': dates,
9188
'sla_data': sla_data,
9289
'downtime_data': downtime_data})
9390
return results
9491

9592
def get_monthly_monitor_data(monitor):
96-
id = monitor['id']
93+
monitor_id = monitor['id']
9794
name = monitor['attributes']['pronounceable_name']
9895
date_str = monitor['attributes']['created_at']
9996
interval_start_date = datetime.strptime(date_str[0:10], '%Y-%m-%d')
@@ -105,12 +102,12 @@ def get_monthly_monitor_data(monitor):
105102
while interval_start_date < end_date:
106103
interval_days_in_month = calendar.monthrange(interval_start_date.year, interval_start_date.month)[1]
107104
interval_end_date = interval_start_date + timedelta(days=interval_days_in_month - interval_start_date.day)
108-
availability_url = get_monitor_url(id, interval_start_date, interval_end_date)
105+
availability_url = get_monitor_url(monitor_id, interval_start_date, interval_end_date)
109106
json_results = make_api_call(availability_url)
110107
dt = interval_start_date.strftime('%Y-%m')
111108
dates.append(np.datetime64(dt))
112109
sla_data.append(json_results['data']['attributes']['availability'])
113-
downtime_url = get_monitor_url(id, interval_start_date, interval_end_date)
110+
downtime_url = get_monitor_url(monitor_id, interval_start_date, interval_end_date)
114111
json_results = make_api_call(downtime_url)
115112
downtime_data.append(json_results['data']['attributes']['total_downtime']/60)
116113
interval_start_date = interval_end_date + timedelta(days=1)

reports/get_open_vsx_data.py

Lines changed: 70 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -65,31 +65,63 @@ def get_publishing_data(starting_year, starting_month):
6565
df = pd.DataFrame(data,columns=HEADERS)
6666
return df
6767

68-
def get_most_active_data(starting_year, starting_month):
68+
def process_most_active_data(most_active):
69+
resulting_dfs = {}
70+
for key in most_active.keys():
71+
if key == 'dates':
72+
dates = most_active['dates']
73+
else:
74+
data = {'dates': dates}
75+
for entry in most_active[key]['unique']:
76+
data[entry] = [None] * len(most_active['dates'])
77+
for i in range(len(dates)):
78+
date = dates[i]
79+
for entry in most_active[key][date]:
80+
item = list(entry.values())[0]
81+
value = list(entry.values())[1]
82+
data[item][i] = value
83+
df = pd.DataFrame(data, columns=most_active[key]['unique'])
84+
df['date'] = dates
85+
resulting_dfs[key] = df
86+
return resulting_dfs
87+
88+
def most_active_data_append_unique(most_active, json_results, top_prop, key):
89+
for item in json_results[top_prop]:
90+
if item[key] not in most_active[top_prop]['unique']:
91+
most_active[top_prop]['unique'].append(item[key])
92+
93+
def extract_most_active_data_from_json(most_active, json_results, year, month):
94+
top_publishers = 'topMostActivePublishingUsers'
95+
top_extensions = 'topNamespaceExtensions'
96+
top_extension_versions = 'topNamespaceExtensionVersions'
97+
top_downloads = 'topMostDownloadedExtensions'
98+
99+
no_data = True
100+
top_props = [top_publishers, top_extensions, top_extension_versions, top_downloads]
101+
for top_prop in top_props:
102+
if len(json_results[top_prop]) > 0:
103+
no_data = False
104+
break
105+
106+
if no_data:
107+
return
69108

70-
def process_data(most_active):
71-
resulting_dfs = {}
72-
for key in most_active.keys():
73-
if key == 'dates':
74-
dates = most_active['dates']
75-
else:
76-
data = {'dates': dates}
77-
for entry in most_active[key]['unique']:
78-
data[entry] = [None] * len(most_active['dates'])
79-
for i in range(len(dates)):
80-
date = dates[i]
81-
for entry in most_active[key][date]:
82-
item = list(entry.values())[0]
83-
value = list(entry.values())[1]
84-
data[item][i] = value
85-
df = pd.DataFrame(data, columns=most_active[key]['unique'])
86-
df['date'] = dates
87-
resulting_dfs[key] = df
88-
return resulting_dfs
89-
90-
current_year = date.today().year
91-
current_month = date.today().month
109+
year_month = '%s/%s' % (month, str(year)[2:])
110+
most_active['dates'].append(year_month)
92111

112+
most_active[top_publishers][year_month] = json_results[top_publishers]
113+
most_active_data_append_unique(most_active, json_results, top_publishers, 'userLoginName')
114+
115+
most_active[top_extensions][year_month] = json_results[top_extensions]
116+
most_active_data_append_unique(most_active, json_results, top_extensions, 'namespace')
117+
118+
most_active[top_extension_versions][year_month] = json_results[top_extension_versions]
119+
most_active_data_append_unique(most_active, json_results, top_extension_versions, 'namespace')
120+
121+
most_active[top_downloads][year_month] = json_results[top_downloads]
122+
most_active_data_append_unique(most_active, json_results, top_downloads, 'extensionIdentifier')
123+
124+
def get_most_active_data(starting_year, starting_month):
93125
most_active = {
94126
'dates': [],
95127
'topMostActivePublishingUsers': {
@@ -105,44 +137,22 @@ def process_data(most_active):
105137
'unique': []
106138
}
107139
}
108-
for year in range(starting_year, current_year + 1):
109-
for month in range(1, 13):
110-
if year == starting_year and month >= starting_month or \
111-
starting_year < year < current_year or year == current_year and month <= current_month:
112-
url = '%sadmin/report?year=%s&month=%s&token=%s' % (API_ENDPOINT, year, month, ACCESS_TOKEN)
113-
response = requests.get(url)
114-
if response.status_code == 200:
115-
try:
116-
json_results = response.json()
117-
if len(json_results['topMostActivePublishingUsers']) > 0 or len(json_results['topNamespaceExtensions']) > 0 or \
118-
len(json_results['topNamespaceExtensionVersions']) > 0 or len(json_results['topMostDownloadedExtensions']) > 0:
119-
year_month = '%s/%s' % (month, str(year)[2:])
120-
most_active['dates'].append(year_month)
121-
most_active['topMostActivePublishingUsers'][year_month] = json_results['topMostActivePublishingUsers']
122-
for item in json_results['topMostActivePublishingUsers']:
123-
if item['userLoginName'] not in most_active['topMostActivePublishingUsers']['unique']:
124-
most_active['topMostActivePublishingUsers']['unique'].append(item['userLoginName'])
125-
most_active['topNamespaceExtensions'][year_month] = json_results['topNamespaceExtensions']
126-
for item in json_results['topNamespaceExtensions']:
127-
if item['namespace'] not in most_active['topNamespaceExtensions']['unique']:
128-
most_active['topNamespaceExtensions']['unique'].append(item['namespace'])
129-
most_active['topNamespaceExtensionVersions'][year_month] = json_results['topNamespaceExtensionVersions']
130-
for item in json_results['topNamespaceExtensionVersions']:
131-
if item['namespace'] not in most_active['topNamespaceExtensionVersions']['unique']:
132-
most_active['topNamespaceExtensionVersions']['unique'].append(item['namespace'])
133-
most_active['topMostDownloadedExtensions'][year_month] = json_results['topMostDownloadedExtensions']
134-
for item in json_results['topMostDownloadedExtensions']:
135-
if item['extensionIdentifier'] not in most_active['topMostDownloadedExtensions']['unique']:
136-
most_active['topMostDownloadedExtensions']['unique'].append(item['extensionIdentifier'])
137-
except JSONDecodeError:
138-
json_results = None
139-
print("Error decoding JSON results for %s" % url)
140-
else:
141-
print("%s error processing results for %s" % (response.status_code, url))
142-
143-
resulting_dfs = process_data(most_active)
144140

145-
return resulting_dfs
141+
today = date.today()
142+
start_date = date(starting_year, starting_month, 1)
143+
while start_date.year < today.year or (start_date.year == today.year and start_date.month < today.month):
144+
url = '%sadmin/report?year=%s&month=%s&token=%s' % (API_ENDPOINT, start_date.year, start_date.month, ACCESS_TOKEN)
145+
response = requests.get(url)
146+
if response.status_code == 200:
147+
try:
148+
extract_most_active_data_from_json(most_active, response.json(), start_date.year, start_date.month)
149+
except JSONDecodeError:
150+
print("Error decoding JSON results for %s" % url)
151+
else:
152+
print("%s error processing results for %s" % (response.status_code, url))
153+
start_date = start_date + relativedelta(months=1)
154+
155+
return process_most_active_data(most_active)
146156

147157
if __name__ == '__main__':
148158
reports = get_available_reports()

reports/get_vs_marketplace_data.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,9 @@
1010
from datetime import datetime
1111

1212
def get_ms_info(ext):
13-
extensionName = ext['extensionName']
14-
publisherName = ext['publisher']['publisherName']
15-
displayName = ext['displayName']
16-
displayName = displayName.replace(',', ' ')
13+
extension_name = ext['extensionName']
14+
publisher_name = ext['publisher']['publisherName']
15+
display_name = ext['displayName'].replace(',', ' ')
1716
latest_version = ext['versions'][0]['version']
1817
last_updated = ext['versions'][0]['lastUpdated']
1918

@@ -27,7 +26,7 @@ def get_ms_info(ext):
2726
except Exception:
2827
pricing = None
2928

30-
return extensionName, publisherName, displayName, latest_version, last_updated, repo, pricing
29+
return extension_name, publisher_name, display_name, latest_version, last_updated, repo, pricing
3130

3231
def convert_date_str(input_str):
3332
date_str = input_str[0:input_str.find('T')]
@@ -107,15 +106,15 @@ def convert_date_str(input_str):
107106
csv_file.write("MS Publisher (Namespace), MS Extension, MS DisplayName, MS Pricing, MS Version, MS Date, VSX Version, VSX Date, VSX Publisher, VSX License, Repo\n")
108107
for ext in extensions:
109108
print("%s.%s" % (ext['publisher']['publisherName'], ext['extensionName']))
110-
ms_extensionName, ms_publisherName, ms_displayname, ms_latest_version, ms_last_updated, ms_repo, ms_pricing = get_ms_info(ext)
111-
vsx_extension_url = '%s/%s/%s' % (VSX_API, ms_publisherName, ms_extensionName)
109+
ms_extension_name, ms_publisher_name, ms_display_name, ms_latest_version, ms_last_updated, ms_repo, ms_pricing = get_ms_info(ext)
110+
vsx_extension_url = '%s/%s/%s' % (VSX_API, ms_publisher_name, ms_extension_name)
112111
response = requests.get(vsx_extension_url)
113112
if response.status_code == 200:
114113
vsx_results = response.json()
115114
csv_file.write("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" % (
116-
ms_publisherName,
117-
ms_extensionName,
118-
ms_displayname,
115+
ms_publisher_name,
116+
ms_extension_name,
117+
ms_display_name,
119118
ms_pricing,
120119
ms_latest_version,
121120
convert_date_str(ms_last_updated),
@@ -127,9 +126,9 @@ def convert_date_str(input_str):
127126
))
128127
elif response.status_code == 404:
129128
csv_file.write("%s, %s, %s, %s, %s, %s, , , , %s, %s\n" % (
130-
ms_publisherName,
131-
ms_extensionName,
132-
ms_displayname,
129+
ms_publisher_name,
130+
ms_extension_name,
131+
ms_display_name,
133132
ms_pricing,
134133
ms_latest_version,
135134
convert_date_str(ms_last_updated),
@@ -140,7 +139,7 @@ def convert_date_str(input_str):
140139
else:
141140
print(response.status_code)
142141
print(response.content)
143-
raise Exception('HTTP %s error' % response.status_code)
142+
response.raise_for_status()
144143

145144
# Output JSON File
146145
json_file = open(JSON_FILE_NAME, 'w')

reports/graph_availability_trends.ipynb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@
131131
},
132132
"outputs": [],
133133
"source": [
134-
"# plt.rcParams['figure.figsize'] = (15,9)\n",
135134
"fig, ax = plt.subplots()\n",
136135
"plt.ylim(97.0, 100.1)\n",
137136
"plt.title('Open-vsx.org Monthly Site Availability', fontsize=14)\n",
@@ -189,7 +188,6 @@
189188
},
190189
"outputs": [],
191190
"source": [
192-
"# plt.rcParams['figure.figsize'] = (15,9)\n",
193191
"fig, ax = plt.subplots()\n",
194192
"plt.title('Open-vsx.org Monthly Site Downtime', fontsize=14)\n",
195193
"\n",

0 commit comments

Comments
 (0)