Skip to content

Commit 837737f

Browse files
authored
Merge pull request #3409 from kineticsquid/Add-reports
Adding reports
2 parents f565a66 + 29ff2ef commit 837737f

10 files changed

+1252
-0
lines changed

.DS_Store

6 KB
Binary file not shown.

reports/get_all_extensions.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""
2+
Script to collect metadata on all published extensions. Used by Jupyter notebooks.
3+
"""
4+
import requests
5+
import os
6+
import json
7+
import time
8+
from datetime import datetime
9+
10+
API_ENDPOINT = "https://open-vsx.org/"
11+
12+
JSON_FILENAME = 'extensions.json'
13+
TSV_FILENAME = 'extensions.tsv'
14+
15+
url = API_ENDPOINT + 'api/-/search?size=100'
16+
17+
def get_all_extensions():
18+
extensions = []
19+
done = False
20+
offset = 0
21+
while not done:
22+
search_url = url + '&offset=%s' % offset
23+
try:
24+
response = requests.get(search_url)
25+
results = response.json()
26+
extensions = extensions + results['extensions']
27+
offset = len(extensions)
28+
print('Retrieved %s extensions' % len(extensions))
29+
if len(extensions) == results['totalSize']:
30+
done = True
31+
except Exception as e:
32+
print("%s: %s" % (datetime.now(), e))
33+
done = True
34+
35+
count = 1
36+
all_extensions = []
37+
print("\n\nStarting: %s" % datetime.now())
38+
for extension in extensions:
39+
namespace_url = API_ENDPOINT + 'api/%s/%s' % (extension['namespace'], extension['name'])
40+
retry_count = 5
41+
while retry_count > 0:
42+
try:
43+
response = requests.get(namespace_url)
44+
if response.status_code == 200:
45+
break
46+
else:
47+
raise Exception('%s: HTTP %s Error retrieving %s' % (datetime.now(), response.status_code,extension['url']))
48+
except Exception as e:
49+
print("%s: %s" % (datetime.now(), e))
50+
retry_count -= 1
51+
time.sleep(2)
52+
if retry_count == 0:
53+
print('Error retrieving %s' % extension['url'])
54+
else:
55+
results = response.json()
56+
all_extensions.append(results)
57+
if int(count/100) == count/100:
58+
print('Processed %s of %s.' % (count, len(extensions)))
59+
count += 1
60+
print("\n\nFinished %s API Calls: %s" % (count, datetime.now()))
61+
62+
return(all_extensions)
63+
64+
def get_all_by_license():
65+
extensions_by_license = {}
66+
all_extensions = get_all_extensions()
67+
count = 1
68+
for extension in all_extensions:
69+
license = extension.get('license', 'None')
70+
if license in extensions_by_license:
71+
extensions_by_license[license].append(extension)
72+
else:
73+
extensions_by_license[license] = [extension]
74+
if int(count/100) == count/100:
75+
print('Processed %s of %s.' % (count, len(all_extensions)))
76+
count += 1
77+
78+
return(dict(sorted(extensions_by_license.items())))
79+
80+
def write_json_file(extensions):
81+
f = open(JSON_FILENAME, 'w')
82+
f.write(json.dumps(extensions, indent=4))
83+
f.close()
84+
return
85+
86+
def write_tsv_file(extensions):
87+
f = open(TSV_FILENAME, 'w')
88+
columns = "Name\tNamespace\tVersions\tLogin Name\t"
89+
columns = columns + "Full Name\tLicense\tTimestamp\tDownloads\tReviews\tFiles\t"
90+
columns = columns + "PreRelease\tVerified\tUnrelated Publisher\tNamespace Access\tPreview\t"
91+
columns = columns + "Homepage\tRepo\tBugs\tBundled Extensions\n"
92+
f.write(columns)
93+
for e in extensions:
94+
row = "%s\t%s\t%s\t%s" % (e['name'], e['namespace'], len(e['allVersions']), e['publishedBy']['loginName'])
95+
row = "%s\t%s\t%s\t%s\t%s\t%s\t%s" % (row, e['publishedBy'].get('fullName', 'None'), e.get('license', 'None'), e['timestamp'], e['downloadCount'], e['reviewCount'], len(e['files']))
96+
row = "%s\t%s\t%s\t%s\t%s\t%s" % (row, e['preRelease'], e['verified'], e['unrelatedPublisher'], e['namespaceAccess'], e['preview'])
97+
row = "%s\t%s\t%s\t%s\t%s\n" % (row, e.get('homepage', 'None'), e.get('repository', 'None'), e.get('bugs', 'None'), len(e['dependencies']))
98+
f.write(row)
99+
f.close()
100+
101+
if __name__ == '__main__':
102+
extensions = get_all_extensions()
103+
write_json_file(extensions)
104+
write_tsv_file(extensions)
105+
106+
107+
108+
109+

reports/get_availability_data.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""
2+
Script to collect availability data from open-vsx endpoints monitored by
3+
betteruptime. Used by graph_availability_trends Jupyter Notebook.Requires
4+
an access token from IT team.
5+
"""
6+
import requests
7+
from datetime import datetime, timedelta
8+
import numpy as np
9+
import os
10+
import calendar
11+
import time
12+
13+
API_URL = 'https://betteruptime.com/api/v2'
14+
TOKEN = os.getenv('TOKEN')
15+
HEADERS = {'Authorization': 'Bearer %s' % TOKEN}
16+
17+
def make_api_call(url):
18+
# print("Calling %s" % url)
19+
retry_count = 5
20+
done = False
21+
while not done:
22+
try:
23+
response = requests.get(url, headers=HEADERS)
24+
if response.status_code != 200:
25+
raise Exception('%s HTTP error %s' % (url, response.status_code))
26+
else:
27+
done = True
28+
except Exception as e:
29+
print(" %s, retrying..." % e)
30+
if retry_count > 0:
31+
time.sleep((6 - retry_count) * 5)
32+
retry_count = retry_count - 1
33+
else:
34+
done = True
35+
raise Exception('Failing call to %s after multiple retries' % url)
36+
37+
return response.json()
38+
39+
def get_all_monitors():
40+
all_openvsx_monitors = []
41+
all_monitors_url = '%s/monitors' % API_URL
42+
done = False
43+
while not done:
44+
json_results = make_api_call(all_monitors_url)
45+
for monitor in json_results['data']:
46+
if 'https://open-vsx.org' in monitor['attributes']['url']:
47+
all_openvsx_monitors.append(monitor)
48+
next_page = json_results['pagination'].get('next')
49+
if next_page is None:
50+
done = True
51+
else:
52+
all_monitors_url = next_page
53+
return all_openvsx_monitors
54+
55+
def get_monitor_data(monitor, time_span):
56+
id = monitor['id']
57+
name = monitor['attributes']['pronounceable_name']
58+
date_str = monitor['attributes']['created_at']
59+
start_date = datetime.strptime(date_str[0:10], '%Y-%m-%d')
60+
end_date = start_date + timedelta(days=time_span)
61+
today = datetime.now()
62+
dates = []
63+
sla_data = []
64+
downtime_data = []
65+
print('processing %s' % name)
66+
while end_date <= today:
67+
availability_url = '%s/monitors/%s/sla?from=%s&to=%s' % (API_URL, id, start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))
68+
json_results = make_api_call(availability_url)
69+
dates.append(np.datetime64(end_date.strftime('%Y-%m-%d')))
70+
sla_data.append(json_results['data']['attributes']['availability'])
71+
downtime_url = '%s/monitors/%s/sla?from=%s&to=%s' % (API_URL, id, start_date.strftime('%Y-%m-%d'), start_date.strftime('%Y-%m-%d'))
72+
json_results = make_api_call(downtime_url)
73+
downtime_data.append(json_results['data']['attributes']['total_downtime']/60)
74+
start_date = start_date + timedelta(days=1)
75+
end_date = end_date + timedelta(days=1)
76+
print('finished processing')
77+
return name, dates, sla_data, downtime_data
78+
79+
def get_continuous_data(time_span=30):
80+
monitors = get_all_monitors()
81+
results = []
82+
for monitor in monitors:
83+
name, dates, sla_data, downtime_data = get_monitor_data(monitor, time_span=30)
84+
results.append({'name': name,
85+
'dates': dates,
86+
'sla_data': sla_data,
87+
'downtime_data': downtime_data})
88+
return results
89+
90+
def get_monthly_monitor_data(monitor):
91+
id = monitor['id']
92+
name = monitor['attributes']['pronounceable_name']
93+
date_str = monitor['attributes']['created_at']
94+
interval_start_date = datetime.strptime(date_str[0:10], '%Y-%m-%d')
95+
end_date = datetime.now()
96+
dates = []
97+
sla_data = []
98+
downtime_data = []
99+
print('processing %s' % name)
100+
while interval_start_date < end_date:
101+
interval_days_in_month = calendar.monthrange(interval_start_date.year, interval_start_date.month)[1]
102+
interval_end_date = interval_start_date + timedelta(days=interval_days_in_month - interval_start_date.day)
103+
availability_url = '%s/monitors/%s/sla?from=%s&to=%s' % (API_URL, id, interval_start_date.strftime('%Y-%m-%d'), interval_end_date.strftime('%Y-%m-%d'))
104+
json_results = make_api_call(availability_url)
105+
dt = interval_start_date.strftime('%Y-%m')
106+
dates.append(np.datetime64(dt))
107+
sla_data.append(json_results['data']['attributes']['availability'])
108+
downtime_url = '%s/monitors/%s/sla?from=%s&to=%s' % (API_URL, id, interval_start_date.strftime('%Y-%m-%d'), interval_end_date.strftime('%Y-%m-%d'))
109+
json_results = make_api_call(downtime_url)
110+
downtime_data.append(json_results['data']['attributes']['total_downtime']/60)
111+
interval_start_date = interval_end_date + timedelta(days=1)
112+
113+
print('finished processing')
114+
return name, dates, sla_data, downtime_data
115+
116+
def get_monthly_data():
117+
monitors = get_all_monitors()
118+
results = []
119+
for monitor in monitors:
120+
name, dates, sla_data, downtime_data = get_monthly_monitor_data(monitor)
121+
results.append({'name': name,
122+
'dates': dates,
123+
'sla_data': sla_data,
124+
'downtime_data': downtime_data})
125+
return results
126+
127+
128+
if __name__ == '__main__':
129+
results = get_monthly_data()
130+
print(results)
131+

0 commit comments

Comments
 (0)