Skip to content

Commit d0f3727

Browse files
author
Tim Davies
committed
Updating application to use separate source of registry from org-id/register github source, and to support preview of arbitrary branches
1 parent e41e637 commit d0f3727

File tree

4 files changed

+60
-34
lines changed

4 files changed

+60
-34
lines changed

prefix_finder/frontend/templates/base.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@
4343
</div>
4444
</nav>
4545

46+
{% if branch and branch != 'master' %}
47+
<div class="alert alert-warning" role="alert">You are currently previewing branch {{ branch }}. <a href="/_preview_branch/master">Return to master</a> or <a href="https://github.com/OpenDataServices/org-ids/compare/{{ branch }}">view on GitHub</a></div>
48+
{% endif %}
49+
50+
4651
{% block main %}
4752
<div class="main-block">
4853
<div class="container">

prefix_finder/frontend/tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def test_xml_codelists():
1212
schema_file_parsed = etree.parse(schema_file)
1313

1414
xmlschema = etree.XMLSchema(schema_file_parsed)
15-
created_xml_codelist = make_xml_codelist()
15+
created_xml_codelist = make_xml_codelist('master')
1616
created_xml_codelist_file = io.StringIO(created_xml_codelist)
1717
xml_codelist_etree = etree.parse(created_xml_codelist_file)
1818

prefix_finder/frontend/urls.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
urlpatterns = [
77
url(r'^$', views.home, name='home'),
88
url(r'^_update_lists$', views.update_lists, name='update_lists'),
9+
url(r'^_preview_branch/(.+)$', views.preview_branch, name='preview_branch'),
910
url(r'^terms', TemplateView.as_view(template_name='terms.html'), name='terms'),
1011
url(r'^about', TemplateView.as_view(template_name='about.html'), name='about'),
1112
url(r'^list/(.+)$', views.list_details, name='list'),

prefix_finder/frontend/views.py

Lines changed: 53 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
##ToDo:
2+
# * Use auth token throughout the application to avoid getting throttled by GitHub
3+
# * Contine work on edit view to fetch updated list
4+
15
import os
26
import json
37
import glob
@@ -6,9 +10,10 @@
610
import csv
711
from collections import OrderedDict
812

9-
from django.shortcuts import render
13+
from django.shortcuts import render, redirect
1014
from django.http import HttpResponse, Http404
1115
from django.conf import settings
16+
1217
import requests
1318
import datetime
1419

@@ -25,19 +30,20 @@
2530

2631
##globals
2732
lookups = None
28-
org_id_dict = None
29-
git_commit_ref = ''
30-
33+
org_id_dict = {}
34+
git_commit_ref = {'master':''}
35+
branch = 'master'
3136

3237
def load_schemas_from_github(branch="master"):
3338
schemas = {}
34-
response = requests.get("https://github.com/OpenDataServices/org-ids/archive/"+branch+".zip")
39+
response = requests.get("https://github.com/org-id/register/archive/"+branch+".zip")
3540
with zipfile.ZipFile(io.BytesIO(response.content)) as ziped_repo:
3641
for filename in ziped_repo.namelist():
3742
filename_split = filename.split("/")[1:]
3843
if len(filename_split) == 2 and filename_split[0] == "schema" and filename_split[-1].endswith(".json"):
3944
with ziped_repo.open(filename) as schema_file:
4045
schemas[filename_split[-1].split(".")[0]] = json.loads(schema_file.read().decode('utf-8'))
46+
print("Loaded schemas from GitHub")
4147
return schemas
4248

4349

@@ -79,20 +85,20 @@ def create_codelist_lookups(schemas):
7985
return lookups
8086

8187

82-
def load_org_id_lists_from_github():
88+
def load_org_id_lists_from_github(branch="master"):
8389
org_id_lists = []
84-
response = requests.get("https://github.com/OpenDataServices/org-ids/archive/master.zip")
90+
response = requests.get("https://github.com/org-id/register/archive/"+branch+".zip")
8591
with zipfile.ZipFile(io.BytesIO(response.content)) as ziped_repo:
8692
for filename in ziped_repo.namelist():
8793
filename_split = filename.split("/")[1:]
88-
if len(filename_split) == 3 and filename_split[0] == "codes" and filename_split[-1].endswith(".json"):
94+
if len(filename_split) == 3 and filename_split[0] == "lists" and filename_split[-1].endswith(".json"):
8995
with ziped_repo.open(filename) as schema_file:
9096
org_id_lists.append(json.loads(schema_file.read().decode('utf-8')))
9197
return org_id_lists
9298

9399

94100
def load_org_id_lists_from_disk():
95-
codes_dir = os.path.join(current_dir, '../../codes')
101+
codes_dir = os.path.join(current_dir, '../../lists')
96102
org_id_lists = []
97103
for org_id_list_file in glob.glob(codes_dir + '/*/*.json'):
98104
with open(org_id_list_file) as org_id_list:
@@ -148,7 +154,7 @@ def augment_structure(org_id_lists):
148154
if split[0] not in prefix['structure']:
149155
prefix['structure'].append(split[0])
150156

151-
157+
152158
def add_titles(org_list):
153159
'''Add coverage_titles and subnationalCoverage_titles to organisation lists'''
154160
coverage_codes = org_list.get('coverage')
@@ -170,17 +176,17 @@ def add_titles(org_list):
170176
org_list['sector_titles'] = [tup[1] for tup in lookups['sector'] if tup[0] in sector_codes]
171177

172178

173-
def refresh_data():
179+
def refresh_data(branch="master"):
174180
global lookups
175181
global org_id_dict
176182
global git_commit_ref
177183

178184
try:
179185
sha = requests.get(
180-
'https://api.github.com/repos/opendataservices/org-ids/branches/master'
186+
'https://api.github.com/repos/org-id/register/branches/'+branch
181187
).json()['commit']['sha']
182188
using_github = True
183-
if sha == git_commit_ref:
189+
if sha == git_commit_ref.get(branch,''):
184190
return "Not updating as sha has not changed: {}".format(sha)
185191
except Exception:
186192
using_github = False
@@ -190,19 +196,21 @@ def refresh_data():
190196

191197
if using_github:
192198
try:
193-
schemas = load_schemas_from_github()
199+
print("Starting schema load from GitHub")
200+
schemas = load_schemas_from_github(branch)
194201
except Exception:
195202
raise
196203
using_github = False
197204
schemas = load_schemas_from_disk()
198205
else:
206+
print("Loading from disk")
199207
schemas = load_schemas_from_disk()
200208

201209
lookups = create_codelist_lookups(schemas)
202-
210+
203211
if using_github:
204212
try:
205-
org_id_lists = load_org_id_lists_from_github()
213+
org_id_lists = load_org_id_lists_from_github(branch)
206214
except:
207215
raise
208216
using_github = False
@@ -213,20 +221,19 @@ def refresh_data():
213221
augment_quality(schemas, org_id_lists)
214222
augment_structure(org_id_lists)
215223

216-
org_id_dict = {org_id_list['code']: org_id_list for org_id_list in org_id_lists if org_id_list.get('confirmed')}
224+
org_id_dict[branch] = {org_id_list['code']: org_id_list for org_id_list in org_id_lists if org_id_list.get('confirmed')}
217225

218226
if using_github:
219-
git_commit_ref = sha
227+
git_commit_ref[branch] = sha
220228
return "Loaded from github: {}".format(sha)
221229
else:
222230
return "Loaded from disk"
223231

232+
print(refresh_data())
224233

225-
refresh_data()
226234

227-
228-
def filter_and_score_results(query):
229-
indexed = {key: value.copy() for key, value in org_id_dict.items()}
235+
def filter_and_score_results(query,use_branch="master"):
236+
indexed = {key: value.copy() for key, value in org_id_dict[use_branch].items()}
230237
for prefix in list(indexed.values()):
231238
prefix['relevance'] = 0
232239
prefix['relevance_debug'] = []
@@ -318,7 +325,7 @@ def filter_and_score_results(query):
318325

319326
for num, value in enumerate(sorted(indexed.values(), key=lambda k: -(k['relevance'] * 100 + k['quality']))):
320327
add_titles(value)
321-
328+
322329
if (value['relevance'] >= RELEVANCE["SUGGESTED_RELEVANCE_THRESHOLD"]
323330
and value['quality'] > RELEVANCE["SUGGESTED_QUALITY_THRESHOLD"]
324331
and not all_results['suggested'] or (all_results['suggested'] and value['relevance'] == all_results['suggested'][0]['relevance'])):
@@ -441,8 +448,16 @@ def get_lookups(query_dict):
441448
def update_lists(request):
442449
return HttpResponse(refresh_data())
443450

451+
def preview_branch(request,branch_name):
452+
print("Loading branch "+ branch_name)
453+
refresh_data(branch_name)
454+
request.session['branch'] = branch_name
455+
return redirect('home')
456+
444457

445458
def home(request):
459+
use_branch = request.session.get('branch', 'master')
460+
446461
query = {key: value for key, value in request.GET.items() if value}
447462
context = {
448463
"lookups": {
@@ -453,7 +468,7 @@ def home(request):
453468
}
454469
if query:
455470
context['lookups'] = get_lookups(query)
456-
context['all_results'] = filter_and_score_results(query)
471+
context['all_results'] = filter_and_score_results(query,use_branch)
457472
context['query'] = query
458473
else:
459474
query = {'coverage': '', 'structure': '', 'sector': ''}
@@ -462,20 +477,21 @@ def home(request):
462477

463478

464479
context['local'] = settings.LOCAL_DATA
465-
480+
context['branch'] = use_branch
466481

467482
return render(request, "home.html", context=context)
468483

469484

470485
def list_details(request, prefix):
486+
use_branch = request.session.get('branch', 'master')
487+
471488
try:
472-
org_list = org_id_dict[prefix].copy()
489+
org_list = org_id_dict[use_branch][prefix].copy()
473490
add_titles(org_list)
474491

475492
except KeyError:
476493
raise Http404('Organisation list {} does not exist'.format(prefix))
477-
return render(request, 'list.html', context={'org_list': org_list})
478-
494+
return render(request, 'list.html', context={'org_list': org_list, 'branch':use_branch})
479495

480496
def _get_filename():
481497
if git_commit_ref:
@@ -485,7 +501,8 @@ def _get_filename():
485501

486502

487503
def json_download(request):
488-
response = HttpResponse(json.dumps({"lists": list(org_id_dict.values())}, indent=2), content_type='text/json')
504+
use_branch = request.session.get('branch', 'master')
505+
response = HttpResponse(json.dumps({"lists": list(org_id_dict[use_branch].values())}, indent=2), content_type='text/json')
489506
response['Content-Disposition'] = 'attachment; filename="org-id-{0}.json"'.format(_get_filename())
490507
return response
491508

@@ -502,9 +519,10 @@ def _flatten_list(obj, path=''):
502519

503520

504521
def csv_download(request):
522+
use_branch = request.session.get('branch', 'master')
505523
all_keys = set()
506524
all_rows = []
507-
for item in org_id_dict.values():
525+
for item in org_id_dict[use_branch].values():
508526
row = dict(_flatten_list(item))
509527
all_keys.update(row.keys())
510528
all_rows.append(row)
@@ -529,14 +547,14 @@ def csv_download(request):
529547
import lxml.etree as ET
530548

531549

532-
def make_xml_codelist():
550+
def make_xml_codelist(use_branch):
533551
root = ET.Element("codelist")
534552
meta = ET.SubElement(root, "metadata")
535553
ET.SubElement(ET.SubElement(meta, "name"),"narrative").text = "Organization Identifier Lists"
536554
ET.SubElement(ET.SubElement(meta, "description"),"narrative").text = "Organisation identifier lists and their code. These can be used as the prefix for an organisation identifier. For general guidance about constructing Organisation Identifiers, please see http://iatistandard.org/organisation-identifiers/ This list was formerly maintained by the IATI Secretariat as the Organization Registration Agency codelist. This version is maintained by the Identify-Org project, of which IATI is a member. New code requests should be made via Identify-org.net"
537555
items = ET.SubElement(root, "codelist-items")
538556

539-
for entry in org_id_dict.values():
557+
for entry in org_id_dict[use_branch].values():
540558
if entry['access'] and entry['access']['availableOnline']:
541559
publicdb = str(1)
542560
else:
@@ -569,6 +587,8 @@ def make_xml_codelist():
569587

570588

571589
def xml_download(request):
572-
response = HttpResponse(make_xml_codelist(), content_type='text/xml')
590+
use_branch = request.session.get('branch', 'master')
591+
response = HttpResponse(make_xml_codelist(use_branch), content_type='text/xml')
573592
response['Content-Disposition'] = 'attachment; filename="org-id-{0}.xml"'.format(_get_filename())
574593
return response
594+

0 commit comments

Comments
 (0)