Skip to content

Commit 9db64c0

Browse files
authored
Merge pull request #140 from open-data/feature/sysadmin-ds-dump
DataStore Dump & DataStore Field Sync
2 parents 4600942 + 3f4006f commit 9db64c0

File tree

8 files changed

+107
-14
lines changed

8 files changed

+107
-14
lines changed

changes/140.changes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
`recombinant_update` with `force` and `delete_fields` will now DROP old columns from the DataStore for fields that do not exist in the Schema.

changes/140.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added the DataStore Dump button for sysadmins to the Recombinant edit template.

ckanext/recombinant/cli.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,19 +116,26 @@ def remove_empty(dataset_type: Optional[List[str]] = None,
116116
is_flag=True,
117117
help="Force update of tables (required for changes to only primary keys/indexes)",
118118
)
119+
@click.option(
120+
"-d",
121+
"--delete-fields",
122+
is_flag=True,
123+
help="Deletes fields that are no longer in the Schema (requires --force-update)",
124+
)
119125
@click.option('-v', '--verbose', is_flag=True,
120126
type=click.BOOL, help='Increase verbosity.')
121127
def update(dataset_type: Optional[List[str]] = None,
122128
all_types: bool = False,
123129
force_update: bool = False,
130+
delete_fields: bool = False,
124131
verbose: bool = False):
125132
"""
126133
Triggers recombinant update for recombinant resources
127134
128135
Full Usage:\n
129136
recombinant update (-a | DATASET_TYPE ...) [-f]
130137
"""
131-
_update(dataset_type, all_types, force_update, verbose=verbose)
138+
_update(dataset_type, all_types, force_update, delete_fields, verbose=verbose)
132139

133140

134141
@recombinant.command(short_help="Delete recombinant datasets and all their data.")
@@ -349,6 +356,7 @@ def _show(dataset_type: Optional[str],
349356
def _update(dataset_types: Optional[List[str]],
350357
all_types: bool = False,
351358
force_update: bool = False,
359+
delete_fields: bool = False,
352360
verbose: bool = False):
353361
"""
354362
Triggers recombinant update for recombinant resources
@@ -366,7 +374,8 @@ def _update(dataset_types: Optional[List[str]],
366374
click.echo('%s %s updating' % (dtype, o))
367375
lc.action.recombinant_update(
368376
owner_org=o, dataset_type=dtype,
369-
force_update=force_update)
377+
force_update=force_update,
378+
delete_fields=delete_fields)
370379

371380

372381
def _expand_dataset_types(dataset_types: Optional[List[str]],
@@ -471,7 +480,12 @@ def _load_one_csv_file(name: str) -> int:
471480
_path, csv_name = os.path.split(name)
472481
assert csv_name.endswith('.csv'), csv_name
473482
resource_name = csv_name[:-4]
474-
click.echo(resource_name)
483+
singular_org_name = None
484+
if '.' in resource_name:
485+
singular_org_name, resource_name = tuple(resource_name.split('.'))
486+
click.echo('Resource name: %s' % resource_name)
487+
if singular_org_name:
488+
click.echo('Organization name: %s' % singular_org_name)
475489
chromo = get_chromo(resource_name)
476490

477491
dataset_type = chromo['dataset_type']
@@ -480,6 +494,11 @@ def _load_one_csv_file(name: str) -> int:
480494
errors = 0
481495

482496
for org_name, records in csv_data_batch(name, chromo):
497+
if not org_name and not singular_org_name:
498+
click.echo('could not find any org!')
499+
return 1
500+
if not org_name and singular_org_name:
501+
org_name = singular_org_name
483502
results = lc.action.package_search(
484503
q='type:%s AND organization:%s' % (dataset_type, org_name),
485504
include_private=True,

ckanext/recombinant/logic.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ def recombinant_update(context: Context, data_dict: DataDict):
6262
:param owner_org: organization name or id
6363
:param delete_resources: True to delete extra resources found
6464
:param force_update: True to force updating of datastore tables
65+
:param delete_fields: True to delete old fields not in schema,
66+
requires force_update=True
6567
'''
6668
lc, geno, dataset = _action_get_dataset(context, data_dict)
6769

@@ -70,7 +72,8 @@ def recombinant_update(context: Context, data_dict: DataDict):
7072
delete_resources=asbool(data_dict.get('delete_resources', False)))
7173
_update_datastore(
7274
lc, geno, dataset,
73-
force_update=asbool(data_dict.get('force_update', False)))
75+
force_update=asbool(data_dict.get('force_update', False)),
76+
delete_fields=asbool(data_dict.get('delete_fields', False)))
7477

7578

7679
def recombinant_show(context: Context, data_dict: DataDict) -> Dict[str, Any]:
@@ -113,11 +116,12 @@ def recombinant_show(context: Context, data_dict: DataDict) -> Dict[str, Any]:
113116
try:
114117
ds = lc.action.datastore_search(
115118
resource_id=resource['id'],
116-
limit=1)
119+
limit=0)
117120
datastore_correct = _datastore_match(r['fields'], ds['fields'])
118121
out['datastore_correct'] = datastore_correct
119122
resources_correct = resources_correct and datastore_correct
120123
out['datastore_rows'] = ds.get('total', 0)
124+
out['datastore_active'] = True
121125
except NotFound:
122126
out['error'] = 'datastore table missing'
123127
resources_correct = False
@@ -244,7 +248,8 @@ def _update_dataset(lc: LocalCKAN,
244248
def _update_datastore(lc: LocalCKAN,
245249
geno: Dict[str, Any],
246250
dataset: Dict[str, Any],
247-
force_update: bool = False):
251+
force_update: bool = False,
252+
delete_fields: bool = False):
248253
"""
249254
call lc.action.datastore_create to create tables or add
250255
columns to existing datastore tables based on dataset definition
@@ -259,6 +264,7 @@ def _update_datastore(lc: LocalCKAN,
259264
chromo['resource_name'], dataset['id'])
260265
resource_id = resource_ids[chromo['resource_name']]
261266
fields = datastore_fields(chromo['fields'], datastore_text_types)
267+
do_delete_fields = False
262268
try:
263269
ds = lc.action.datastore_search(resource_id=resource_id, limit=0)
264270
except NotFound:
@@ -273,6 +279,18 @@ def _update_datastore(lc: LocalCKAN,
273279
for f in datastore_fields(chromo['fields'], datastore_text_types):
274280
if f['id'] not in seen:
275281
fields.append(f)
282+
if delete_fields:
283+
# remove any fields from DS not in Schema
284+
new_fields = []
285+
schema_field_ids = set(
286+
f['id'] for f in datastore_fields(chromo['fields'],
287+
datastore_text_types))
288+
for f in fields:
289+
if f['id'] not in schema_field_ids:
290+
do_delete_fields = True
291+
continue
292+
new_fields.append(f)
293+
fields = new_fields
276294

277295
trigger_names = _update_triggers(lc, chromo)
278296

@@ -292,6 +310,7 @@ def _update_datastore(lc: LocalCKAN,
292310
lc.action.datastore_create(
293311
resource_id=resource_id,
294312
fields=fields,
313+
delete_fields=do_delete_fields,
295314
primary_key=chromo.get('datastore_primary_key', []),
296315
foreign_keys=foreign_keys,
297316
indexes=chromo.get('datastore_indexes', []),
@@ -410,7 +429,8 @@ def _datastore_match(fs: List[Dict[str, Any]], fields: List[Dict[str, Any]]) ->
410429
"""
411430
# XXX: does not check types or extra columns at this time
412431
existing = set(c['id'] for c in fields)
413-
return all(f['datastore_id'] in existing for f in fs)
432+
return all(f['datastore_id'] in existing for f in fs
433+
if not f.get('published_resource_computed_field', False))
414434

415435

416436
@chained_action

ckanext/recombinant/read_csv.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ def csv_data_batch(csv_path: str,
3939

4040
if strict:
4141
expected = [f['datastore_id'] for f in chromo['fields'] if not f.get(
42-
'published_resource_computed_field')] + \
43-
['owner_org', 'owner_org_title']
42+
'published_resource_computed_field')]
4443
assert cols == expected, 'column mismatch:\n{0}\n{1}'.format(
4544
cols, expected)
4645

ckanext/recombinant/templates/recombinant/resource_edit.html

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,17 @@
4545
{% block action_panels %}
4646
{% if dataset %}
4747
{% if 'error' not in resource %}
48+
{% if g.userobj.sysadmin and not resource.datastore_correct %}
49+
{# only let sysadmins refresh the recombinant record via UI #}
50+
<div class="module-alert alert alert-warning">
51+
<h3>{{ _("The Recombinant resource is out of date") }}{% snippet 'snippets/sysadmin_only.html' %}</h3>
52+
<p style="margin-bottom: 10px;">{{ _('You can refresh your resource in the database to try to solve the problem.') }}</p>
53+
<form id="create-pd-resource" method="post">
54+
{{ h.csrf_input() }}
55+
<button type="submit" class="btn btn-danger mrgn-bttm-md m-b-3" name="refresh-hard">{{_('Refresh…')}}</button>
56+
</form>
57+
</div>
58+
{% endif %}
4859
<div class="wb-tabs">
4960
<div class="tabpanels">
5061
{% block update_panel %}
@@ -59,6 +70,34 @@
5970
{% snippet "recombinant/snippets/xls_download.html",
6071
pkg=dataset, errors=errors %}
6172
{% endif %}
73+
{% if g.userobj.sysadmin and resource.datastore_active %}
74+
{# only let sysadmins datastore dump via UI #}
75+
{% set filename = dataset.owner_org ~ '.' ~ resource.name %}
76+
<h3>{{ _('DataStore Dump') }}{% snippet 'snippets/sysadmin_only.html' %}</h3>
77+
<div>
78+
<button class="btn btn-primary dropdown-toggle" role="button" id="dropdownDownloadFormat" data-bs-toggle="dropdown" aria-expanded="false" aria-label="{{ _('List of downloadable formats') }}">
79+
{{ _('Download') }}&nbsp;
80+
</button>
81+
<ul class="dropdown-menu" aria-labelledby="dropdownDownloadFormat">
82+
<li>
83+
<a class="dropdown-item" href="{{ h.url_for('datastore.dump', resource_id=resource.id, bom=True, filename=filename) }}"
84+
target="_blank" rel="noreferrer"><span>CSV</span></a>
85+
</li>
86+
<li>
87+
<a class="dropdown-item" href="{{ h.url_for('datastore.dump', resource_id=resource.id, format='tsv', bom=True, filename=filename) }}"
88+
target="_blank" rel="noreferrer"><span>TSV</span></a>
89+
</li>
90+
<li>
91+
<a class="dropdown-item" href="{{ h.url_for('datastore.dump', resource_id=resource.id, format='json', filename=filename) }}"
92+
target="_blank" rel="noreferrer"><span>JSON</span></a>
93+
</li>
94+
<li>
95+
<a class="dropdown-item" href="{{ h.url_for('datastore.dump', resource_id=resource.id, format='xml', filename=filename) }}"
96+
target="_blank" rel="noreferrer"><span>XML</span></a>
97+
</li>
98+
</ul>
99+
</div>
100+
{% endif %}
62101
{% block notices %}{% endblock %}
63102
</details>
64103
{% endblock %}
@@ -108,6 +147,7 @@ <h3>{{_("Error loading your records")}}</h3>
108147
<p>{{_("We were unable to retrieve your proactive publication records from our database. Please contact <a href=\"open-ouvert@tbs-sct.gc.ca\">open-ouvert@tbs-sct.gc.ca</a>.")}}</p>
109148
{% if g.userobj.sysadmin %}
110149
{# only let sysadmins refresh the recombinant record via UI #}
150+
<h4>{{ _('Refresh resource') }}{% snippet 'snippets/sysadmin_only.html' %}</h4>
111151
<form id="create-pd-resource" method="post">
112152
{{ h.csrf_input() }}
113153
<button type="submit" class="btn btn-default mrgn-bttm-md m-b-3" name="refresh">{{_('Refresh…')}}</button>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
&nbsp;<sup style="cursor: help;" title="{{ _('Sysadmin only') }}"><small><span class="fa fa-gavel text-info" aria-hidden="true"></span></small></sup>

ckanext/recombinant/views.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from ckan.logic import ValidationError, NotAuthorized
2828
from ckan.model.group import Group
29-
from ckan.authz import has_user_permission_for_group_or_org
29+
from ckan.authz import has_user_permission_for_group_or_org, is_sysadmin
3030

3131
from ckan.views.dataset import _get_package_type
3232

@@ -558,7 +558,10 @@ def preview_table(resource_name: str,
558558
except RecombinantException:
559559
return abort(404, _('Recombinant resource_name not found'))
560560

561-
if 'create' in request.form or 'refresh' in request.form:
561+
if (
562+
'create' in request.form or
563+
'refresh-hard' in request.form or
564+
'refresh' in request.form):
562565
# check if the user can update datasets for organization
563566
# admin and editors should be able to init recombinant records
564567
if not has_user_permission_for_group_or_org(org_object.id,
@@ -572,17 +575,26 @@ def preview_table(resource_name: str,
572575
dataset_type=chromo['dataset_type'], owner_org=owner_org)
573576
# check that the resource has errors
574577
for _r in dataset['resources']:
575-
if _r['name'] == resource_name and 'error' in _r:
578+
if _r['name'] == resource_name and ('error' in _r or
579+
not _r['datastore_correct']):
576580
raise NotFound
577581
except NotFound:
578582
try:
579583
if 'create' in request.form:
580584
lc.action.recombinant_create(
581585
dataset_type=chromo['dataset_type'], owner_org=owner_org)
582-
else:
586+
elif 'refresh-hard' in request.form or 'refresh' in request.form:
587+
if not is_sysadmin(g.user):
588+
# only sysadmins can refresh via UI
589+
return abort(403)
590+
delete_fields = False
591+
if 'refresh-hard' in request.form:
592+
delete_fields = True
583593
lc.action.recombinant_update(
584594
dataset_type=chromo['dataset_type'], owner_org=owner_org,
585-
force_update=True)
595+
force_update=True,
596+
delete_fields=delete_fields)
597+
h.flash_success(_('Resources successfully refreshed.'))
586598
except NotAuthorized as e:
587599
return abort(403, e.message or '')
588600
return h.redirect_to(

0 commit comments

Comments
 (0)