1
+ ##ToDo:
2
+ # * Use auth token throughout the application to avoid getting throttled by GitHub
3
+ # * Contine work on edit view to fetch updated list
4
+
1
5
import os
2
6
import json
3
7
import glob
6
10
import csv
7
11
from collections import OrderedDict
8
12
9
- from django .shortcuts import render
13
+ from django .shortcuts import render , redirect
10
14
from django .http import HttpResponse , Http404
11
15
from django .conf import settings
16
+
12
17
import requests
13
18
import datetime
14
19
25
30
26
31
##globals
27
32
lookups = None
28
- org_id_dict = None
29
- git_commit_ref = ''
30
-
33
+ org_id_dict = {}
34
+ git_commit_ref = { 'master' : '' }
35
+ branch = 'master'
31
36
32
37
def load_schemas_from_github (branch = "master" ):
33
38
schemas = {}
34
- response = requests .get ("https://github.com/OpenDataServices/ org-ids /archive/" + branch + ".zip" )
39
+ response = requests .get ("https://github.com/org-id/register /archive/" + branch + ".zip" )
35
40
with zipfile .ZipFile (io .BytesIO (response .content )) as ziped_repo :
36
41
for filename in ziped_repo .namelist ():
37
42
filename_split = filename .split ("/" )[1 :]
38
43
if len (filename_split ) == 2 and filename_split [0 ] == "schema" and filename_split [- 1 ].endswith (".json" ):
39
44
with ziped_repo .open (filename ) as schema_file :
40
45
schemas [filename_split [- 1 ].split ("." )[0 ]] = json .loads (schema_file .read ().decode ('utf-8' ))
46
+ print ("Loaded schemas from GitHub" )
41
47
return schemas
42
48
43
49
@@ -79,20 +85,20 @@ def create_codelist_lookups(schemas):
79
85
return lookups
80
86
81
87
82
- def load_org_id_lists_from_github ():
88
+ def load_org_id_lists_from_github (branch = "master" ):
83
89
org_id_lists = []
84
- response = requests .get ("https://github.com/OpenDataServices/ org-ids/ archive/master .zip" )
90
+ response = requests .get ("https://github.com/org-id/register/ archive/" + branch + " .zip" )
85
91
with zipfile .ZipFile (io .BytesIO (response .content )) as ziped_repo :
86
92
for filename in ziped_repo .namelist ():
87
93
filename_split = filename .split ("/" )[1 :]
88
- if len (filename_split ) == 3 and filename_split [0 ] == "codes " and filename_split [- 1 ].endswith (".json" ):
94
+ if len (filename_split ) == 3 and filename_split [0 ] == "lists " and filename_split [- 1 ].endswith (".json" ):
89
95
with ziped_repo .open (filename ) as schema_file :
90
96
org_id_lists .append (json .loads (schema_file .read ().decode ('utf-8' )))
91
97
return org_id_lists
92
98
93
99
94
100
def load_org_id_lists_from_disk ():
95
- codes_dir = os .path .join (current_dir , '../../codes ' )
101
+ codes_dir = os .path .join (current_dir , '../../lists ' )
96
102
org_id_lists = []
97
103
for org_id_list_file in glob .glob (codes_dir + '/*/*.json' ):
98
104
with open (org_id_list_file ) as org_id_list :
@@ -148,7 +154,7 @@ def augment_structure(org_id_lists):
148
154
if split [0 ] not in prefix ['structure' ]:
149
155
prefix ['structure' ].append (split [0 ])
150
156
151
-
157
+
152
158
def add_titles (org_list ):
153
159
'''Add coverage_titles and subnationalCoverage_titles to organisation lists'''
154
160
coverage_codes = org_list .get ('coverage' )
@@ -170,17 +176,17 @@ def add_titles(org_list):
170
176
org_list ['sector_titles' ] = [tup [1 ] for tup in lookups ['sector' ] if tup [0 ] in sector_codes ]
171
177
172
178
173
- def refresh_data ():
179
+ def refresh_data (branch = "master" ):
174
180
global lookups
175
181
global org_id_dict
176
182
global git_commit_ref
177
183
178
184
try :
179
185
sha = requests .get (
180
- 'https://api.github.com/repos/opendataservices/ org-ids/ branches/master'
186
+ 'https://api.github.com/repos/org-id/register/ branches/' + branch
181
187
).json ()['commit' ]['sha' ]
182
188
using_github = True
183
- if sha == git_commit_ref :
189
+ if sha == git_commit_ref . get ( branch , '' ) :
184
190
return "Not updating as sha has not changed: {}" .format (sha )
185
191
except Exception :
186
192
using_github = False
@@ -190,19 +196,21 @@ def refresh_data():
190
196
191
197
if using_github :
192
198
try :
193
- schemas = load_schemas_from_github ()
199
+ print ("Starting schema load from GitHub" )
200
+ schemas = load_schemas_from_github (branch )
194
201
except Exception :
195
202
raise
196
203
using_github = False
197
204
schemas = load_schemas_from_disk ()
198
205
else :
206
+ print ("Loading from disk" )
199
207
schemas = load_schemas_from_disk ()
200
208
201
209
lookups = create_codelist_lookups (schemas )
202
-
210
+
203
211
if using_github :
204
212
try :
205
- org_id_lists = load_org_id_lists_from_github ()
213
+ org_id_lists = load_org_id_lists_from_github (branch )
206
214
except :
207
215
raise
208
216
using_github = False
@@ -213,20 +221,19 @@ def refresh_data():
213
221
augment_quality (schemas , org_id_lists )
214
222
augment_structure (org_id_lists )
215
223
216
- org_id_dict = {org_id_list ['code' ]: org_id_list for org_id_list in org_id_lists if org_id_list .get ('confirmed' )}
224
+ org_id_dict [ branch ] = {org_id_list ['code' ]: org_id_list for org_id_list in org_id_lists if org_id_list .get ('confirmed' )}
217
225
218
226
if using_github :
219
- git_commit_ref = sha
227
+ git_commit_ref [ branch ] = sha
220
228
return "Loaded from github: {}" .format (sha )
221
229
else :
222
230
return "Loaded from disk"
223
231
232
+ print (refresh_data ())
224
233
225
- refresh_data ()
226
234
227
-
228
- def filter_and_score_results (query ):
229
- indexed = {key : value .copy () for key , value in org_id_dict .items ()}
235
+ def filter_and_score_results (query ,use_branch = "master" ):
236
+ indexed = {key : value .copy () for key , value in org_id_dict [use_branch ].items ()}
230
237
for prefix in list (indexed .values ()):
231
238
prefix ['relevance' ] = 0
232
239
prefix ['relevance_debug' ] = []
@@ -318,7 +325,7 @@ def filter_and_score_results(query):
318
325
319
326
for num , value in enumerate (sorted (indexed .values (), key = lambda k : - (k ['relevance' ] * 100 + k ['quality' ]))):
320
327
add_titles (value )
321
-
328
+
322
329
if (value ['relevance' ] >= RELEVANCE ["SUGGESTED_RELEVANCE_THRESHOLD" ]
323
330
and value ['quality' ] > RELEVANCE ["SUGGESTED_QUALITY_THRESHOLD" ]
324
331
and not all_results ['suggested' ] or (all_results ['suggested' ] and value ['relevance' ] == all_results ['suggested' ][0 ]['relevance' ])):
@@ -441,8 +448,16 @@ def get_lookups(query_dict):
441
448
def update_lists (request ):
442
449
return HttpResponse (refresh_data ())
443
450
451
+ def preview_branch (request ,branch_name ):
452
+ print ("Loading branch " + branch_name )
453
+ refresh_data (branch_name )
454
+ request .session ['branch' ] = branch_name
455
+ return redirect ('home' )
456
+
444
457
445
458
def home (request ):
459
+ use_branch = request .session .get ('branch' , 'master' )
460
+
446
461
query = {key : value for key , value in request .GET .items () if value }
447
462
context = {
448
463
"lookups" : {
@@ -453,7 +468,7 @@ def home(request):
453
468
}
454
469
if query :
455
470
context ['lookups' ] = get_lookups (query )
456
- context ['all_results' ] = filter_and_score_results (query )
471
+ context ['all_results' ] = filter_and_score_results (query , use_branch )
457
472
context ['query' ] = query
458
473
else :
459
474
query = {'coverage' : '' , 'structure' : '' , 'sector' : '' }
@@ -462,20 +477,21 @@ def home(request):
462
477
463
478
464
479
context ['local' ] = settings .LOCAL_DATA
465
-
480
+ context [ 'branch' ] = use_branch
466
481
467
482
return render (request , "home.html" , context = context )
468
483
469
484
470
485
def list_details (request , prefix ):
486
+ use_branch = request .session .get ('branch' , 'master' )
487
+
471
488
try :
472
- org_list = org_id_dict [prefix ].copy ()
489
+ org_list = org_id_dict [use_branch ][ prefix ].copy ()
473
490
add_titles (org_list )
474
491
475
492
except KeyError :
476
493
raise Http404 ('Organisation list {} does not exist' .format (prefix ))
477
- return render (request , 'list.html' , context = {'org_list' : org_list })
478
-
494
+ return render (request , 'list.html' , context = {'org_list' : org_list , 'branch' :use_branch })
479
495
480
496
def _get_filename ():
481
497
if git_commit_ref :
@@ -485,7 +501,8 @@ def _get_filename():
485
501
486
502
487
503
def json_download (request ):
488
- response = HttpResponse (json .dumps ({"lists" : list (org_id_dict .values ())}, indent = 2 ), content_type = 'text/json' )
504
+ use_branch = request .session .get ('branch' , 'master' )
505
+ response = HttpResponse (json .dumps ({"lists" : list (org_id_dict [use_branch ].values ())}, indent = 2 ), content_type = 'text/json' )
489
506
response ['Content-Disposition' ] = 'attachment; filename="org-id-{0}.json"' .format (_get_filename ())
490
507
return response
491
508
@@ -502,9 +519,10 @@ def _flatten_list(obj, path=''):
502
519
503
520
504
521
def csv_download (request ):
522
+ use_branch = request .session .get ('branch' , 'master' )
505
523
all_keys = set ()
506
524
all_rows = []
507
- for item in org_id_dict .values ():
525
+ for item in org_id_dict [ use_branch ] .values ():
508
526
row = dict (_flatten_list (item ))
509
527
all_keys .update (row .keys ())
510
528
all_rows .append (row )
@@ -529,14 +547,14 @@ def csv_download(request):
529
547
import lxml .etree as ET
530
548
531
549
532
- def make_xml_codelist ():
550
+ def make_xml_codelist (use_branch ):
533
551
root = ET .Element ("codelist" )
534
552
meta = ET .SubElement (root , "metadata" )
535
553
ET .SubElement (ET .SubElement (meta , "name" ),"narrative" ).text = "Organization Identifier Lists"
536
554
ET .SubElement (ET .SubElement (meta , "description" ),"narrative" ).text = "Organisation identifier lists and their code. These can be used as the prefix for an organisation identifier. For general guidance about constructing Organisation Identifiers, please see http://iatistandard.org/organisation-identifiers/ This list was formerly maintained by the IATI Secretariat as the Organization Registration Agency codelist. This version is maintained by the Identify-Org project, of which IATI is a member. New code requests should be made via Identify-org.net"
537
555
items = ET .SubElement (root , "codelist-items" )
538
556
539
- for entry in org_id_dict .values ():
557
+ for entry in org_id_dict [ use_branch ] .values ():
540
558
if entry ['access' ] and entry ['access' ]['availableOnline' ]:
541
559
publicdb = str (1 )
542
560
else :
@@ -569,6 +587,8 @@ def make_xml_codelist():
569
587
570
588
571
589
def xml_download (request ):
572
- response = HttpResponse (make_xml_codelist (), content_type = 'text/xml' )
590
+ use_branch = request .session .get ('branch' , 'master' )
591
+ response = HttpResponse (make_xml_codelist (use_branch ), content_type = 'text/xml' )
573
592
response ['Content-Disposition' ] = 'attachment; filename="org-id-{0}.xml"' .format (_get_filename ())
574
593
return response
594
+
0 commit comments