Skip to content

Commit 25bcb3d

Browse files
committed
WIP
1 parent 35d4547 commit 25bcb3d

File tree

6 files changed

+405
-341
lines changed

6 files changed

+405
-341
lines changed

scrunch/datasets.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,7 +1129,7 @@ def create_fill_values(self, variables, name, alias, description=''):
11291129
if "variable" in else_case and "name" in else_case:
11301130
raise ValueError("Else case can be either variable or category not both")
11311131

1132-
aliases = {c["variable"] for c in variables}
1132+
aliases = {c["var"] for c in variables}
11331133
vars_by_alias = self.resource.variables.by("alias")
11341134
types = {vars_by_alias[al]["type"] for al in aliases}
11351135
if types != {"categorical"}:
@@ -1162,13 +1162,13 @@ def create_fill_values(self, variables, name, alias, description=''):
11621162
})
11631163

11641164
expr = {"function": "case", "args": args}
1165-
fill_map = {str(cid): {"variable": vars_by_alias[v["variable"]]["id"]}
1165+
fill_map = {str(cid): {"var": v["var"]}
11661166
for cid, v in zip(cat_ids, variables)}
11671167

1168-
if "variable" in else_case:
1168+
if "var" in else_case:
11691169
# We are in the case of a default fill, replace the -1 with the new
11701170
# variable
1171-
fill_map["-1"] = {"variable": vars_by_alias[else_case["variable"]]["id"]}
1171+
fill_map["-1"] = {"var": else_case["var"]}
11721172

11731173
fill_expr = {
11741174
"function": "fill",

scrunch/expressions.py

Lines changed: 72 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151

5252
import sys
5353

54-
PY311 = sys.version_info[:2] == (3, 11)
54+
GT_PY_311 = sys.version_info[:2] >= (3, 11)
5555

5656
if six.PY2:
5757
from urllib import urlencode
@@ -292,7 +292,7 @@ def _parse(node, parent=None):
292292
# We will take the subvariable alias bit from the subscript
293293
# and return an object with the array and subvariable alias
294294
array_alias = dict(ast.iter_fields(fields[0][1]))["id"]
295-
if PY311:
295+
if GT_PY_311:
296296
name_node = dict(ast.iter_fields(fields[1][1]))
297297
subvariable_alias = name_node["id"]
298298
else:
@@ -494,17 +494,17 @@ def get_subvariables_resource(var_url, var_index):
494494
return {sv['alias'].strip('#'): sv['id'] for sv in sub_variables.values()}
495495

496496

497-
def _get_categories_from_var_index(var_index, var_url):
498-
return var_index[var_url].entity.body.categories
497+
def _get_categories_from_var_index(vars_by_alias, var_alias):
498+
return vars_by_alias[var_alias].entity.body.categories
499499

500500

501-
def adapt_multiple_response(var_url, values, var_index):
501+
def adapt_multiple_response(var_alias, values, vars_by_alias):
502502
"""
503503
Converts multiple response arguments
504504
to column.
505505
:return: the new args for multiple_response
506506
"""
507-
aliases = get_subvariables_resource(var_url, var_index)
507+
aliases = get_subvariables_resource(var_alias, vars_by_alias)
508508
result = []
509509

510510
if all(isinstance(value, int) for value in values):
@@ -515,35 +515,38 @@ def adapt_multiple_response(var_url, values, var_index):
515515
# scenario var.any([subvar1, subvar2])
516516
# in this scenario, we only want category ids that refers to `selected` categories
517517
column = [
518-
cat.get("id") for cat in _get_categories_from_var_index(var_index, var_url) if cat.get("selected")
518+
cat.get("id") for cat in _get_categories_from_var_index(vars_by_alias, var_alias) if cat.get("selected")
519519
]
520-
variables = [var_id for alias, var_id in aliases.items() if alias in values]
520+
subvars = [sva for sva in aliases if sva in values]
521521

522-
for variable_id in variables:
523-
variable_url = "{}subvariables/{}/".format(var_url, variable_id)
522+
for sva in subvars:
524523
result.append({
525-
"variable": variable_url,
524+
"var": var_alias,
525+
'axes': [sva],
526526
"column": column
527527
})
528528

529529
return result, True
530530

531531

532-
def _update_values_for_multiple_response(new_values, values, subitem, var_index, arrays):
532+
def _update_values_for_multiple_response(new_values, values, subitem, vars_by_alias, arrays):
533533
"""
534534
- Multiple response does not need the `value` key, but it relies on the `column` key
535535
- Remove from `arrays` (subvariable list) the ones that should not be considered
536536
"""
537-
var_url = subitem.get("variable", "").split("subvariables")[0]
537+
# var_url = subitem.get("variable", "").split("subvariables")[0]
538+
var_alias = subitem.get("var")
538539
column = new_values[0].get("column")
539540
value = values[0].get("value")
540-
if var_url and var_index[var_url]['type'] == 'multiple_response':
541+
# if var_url and var_index[var_url]['type'] == 'multiple_response':
542+
if var_alias and vars_by_alias[var_alias]['type'] == 'multiple_response':
541543
if column:
542544
values[0]['column'] = column
543545
elif value is not None:
544546
values[0]['column'] = value
545547
values[0].pop("value", None)
546-
arrays[0] = [new_value["variable"] for new_value in new_values]
548+
subvar_ids_by_aliases = {v['alias']: k for k, v in vars_by_alias[var_alias]['entity']['subvariables']['index'].items()}
549+
arrays[0] = [subvar_ids_by_aliases[new_value["axes"][0]] for new_value in new_values]
547550

548551

549552
def process_expr(obj, ds):
@@ -567,7 +570,7 @@ def ensure_category_ids(subitems, values, arrays, variables=variables):
567570
def variable_id(variable_url):
568571
return variable_url.split('/')[-2]
569572

570-
def category_ids(var_id, var_value, variables=variables):
573+
def category_ids(var_alias, var_value, variables=variables):
571574
value = None
572575
if isinstance(var_value, list) or isinstance(var_value, tuple):
573576
# {'values': [val1, val2, ...]}
@@ -578,7 +581,7 @@ def category_ids(var_id, var_value, variables=variables):
578581
value.append(val)
579582
continue
580583
for var in variables:
581-
if variables[var]['id'] == var_id:
584+
if variables[var]['alias'] == var_alias:
582585
if 'categories' in variables[var]:
583586
for cat in variables[var]['categories']:
584587
if cat['name'] == val:
@@ -587,22 +590,21 @@ def category_ids(var_id, var_value, variables=variables):
587590
# variable has no categories, return original
588591
# list of values
589592
value = var_value
590-
591593
elif isinstance(var_value, str):
592-
for var in variables:
594+
for va, var in variables.items():
593595
# if the variable is a date, don't try to process it's categories
594-
if variables[var]['type'] == 'datetime':
596+
if var['type'] == 'datetime':
595597
return var_value
596-
if variables[var]['id'] == var_id and 'categories' in variables[var]:
598+
if va == var_alias and 'categories' in var:
597599
found = False
598-
for cat in variables[var]['categories']:
600+
for cat in var['categories']:
599601
if cat['name'] == var_value:
600602
value = cat['id']
601603
found = True
602604
break
603605
if not found:
604606
raise ValueError("Couldn't find a category id for category %s in filter for variable %s" % (var_value, var))
605-
elif 'categories' not in variables[var]:
607+
elif 'categories' not in variables[var['alias']]:
606608
return var_value
607609

608610
else:
@@ -612,20 +614,19 @@ def category_ids(var_id, var_value, variables=variables):
612614
# special case for multiple_response variables
613615
if len(subitems) == 2:
614616
_variable, _value = subitems
615-
var_url = _variable.get('variable')
617+
var_alias = _variable.get('var')
616618
_value_key = next(iter(_value))
617-
if _value_key in {'column', "value"} and var_url:
618-
if var_url in var_index and var_index[var_url]['type'] == 'multiple_response':
619-
result = adapt_multiple_response(var_url, _value[_value_key], var_index)
619+
if _value_key in {'column', "value"} and var_alias:
620+
vars_by_alias = {v['alias']: v for _, v in var_index.items()}
621+
if var_alias in vars_by_alias and vars_by_alias[var_alias]['type'] == 'multiple_response':
622+
result = adapt_multiple_response(var_alias, _value[_value_key], vars_by_alias)
620623
# handle the multiple response type
621-
_update_values_for_multiple_response(result[0], values, subitems[0], var_index, arrays)
624+
_update_values_for_multiple_response(result[0], values, subitems[0], vars_by_alias, arrays)
622625
return result
623626

624627
for item in subitems:
625-
if isinstance(item, dict) and 'variable' in item and not isinstance(item["variable"], dict):
626-
var_id = variable_id(item['variable'])
627-
elif isinstance(item, dict) and 'value' in item:
628-
item['value'] = category_ids(var_id, item['value'])
628+
if isinstance(item, dict) and 'value' in item:
629+
item['value'] = category_ids(var_alias, item['value'])
629630
_subitems.append(item)
630631

631632
return _subitems, True
@@ -657,11 +658,12 @@ def _process(obj, variables):
657658
except TypeError:
658659
raise ValueError("Invalid variable alias '%s'" % args[0]['variable'])
659660

661+
new_obj = copy.deepcopy(obj)
660662
for key, val in obj.items():
661663
if isinstance(val, dict) and "array" not in val:
662664
# This is not an array object, then it's a nested ZCL expression
663665
# so we need to proceed for nested processing.
664-
obj[key] = _process(val, variables)
666+
new_obj[key] = _process(val, variables)
665667
elif isinstance(val, (list, tuple)):
666668
subitems = []
667669
for subitem in val:
@@ -671,6 +673,13 @@ def _process(obj, variables):
671673
arrays.append(subitem.pop('subvariables'))
672674
elif 'value' in subitem or 'column' in subitem:
673675
values.append(subitem)
676+
elif 'var' in subitem:
677+
var = variables.get(subitem['var'])
678+
if var['type'] in ARRAY_TYPES and 'axes' not in subitem:
679+
# Add info about the fact that the "var" referenced
680+
# variable is an array, so that the validation can
681+
# work properly in the remaindere of the code.
682+
arrays.append(var['subvariables'])
674683
subitems.append(subitem)
675684

676685
has_value = any(
@@ -683,13 +692,14 @@ def _process(obj, variables):
683692
has_value = any('column' in item for item in subitems if not is_number(item))
684693

685694
has_variable = any(
686-
'variable' in item for item in subitems if not is_number(item)
695+
# 'variable' in item for item in subitems if not is_number(item)
696+
'var' in item for item in subitems if not is_number(item)
687697
)
688698

689699
if has_value and has_variable:
690700
subitems, needs_wrap = ensure_category_ids(subitems, values, arrays)
691701

692-
obj[key] = subitems
702+
new_obj[key] = subitems
693703
elif key == 'variable':
694704
if isinstance(val, dict) and "array" in val:
695705
# This is a subvariable reference with this shape:
@@ -707,7 +717,7 @@ def _process(obj, variables):
707717
except KeyError:
708718
raise ValueError("Invalid subvariable `%s` for array '%s'" % (subvariables, array_alias))
709719
subvar_url = "%svariables/%s/subvariables/%s/" % (base_url, array_id, subvar_id)
710-
obj[key] = subvar_url
720+
new_obj[key] = subvar_url
711721
else:
712722
# Otherwise a regular variable references {"variable": alias}
713723
var = variables.get(val)
@@ -716,10 +726,10 @@ def _process(obj, variables):
716726

717727
# TODO: We shouldn't stitch URLs together, use the API
718728
if var.get('is_subvar'):
719-
obj[key] = '%svariables/%s/subvariables/%s/' \
729+
new_obj[key] = '%svariables/%s/subvariables/%s/' \
720730
% (base_url, var['parent_id'], var['id'])
721731
else:
722-
obj[key] = '%svariables/%s/' % (base_url, var['id'])
732+
new_obj[key] = '%svariables/%s/' % (base_url, var['id'])
723733

724734
if var['type'] in ARRAY_TYPES:
725735
subvariables = []
@@ -731,8 +741,19 @@ def _process(obj, variables):
731741
'%svariables/%s/subvariables/%s/'
732742
% (base_url, var['id'], subvar_id)
733743
)
744+
elif key == 'var':
745+
var = variables.get(val)
746+
if not var:
747+
raise ValueError("Invalid variable alias '%s'" % val)
748+
if var.get('is_subvar'):
749+
parents_by_ids = {v['id']: v for v in variables.values() if not v.get('is_subvar')}
750+
parent = parents_by_ids[var['parent_id']]
751+
new_obj[key] = parent['alias']
752+
new_obj['axes'] = [val]
734753
elif key == 'function':
735754
op = val
755+
756+
obj = new_obj
736757

737758
if subvariables:
738759
obj['subvariables'] = subvariables
@@ -778,11 +799,21 @@ def _process(obj, variables):
778799
value['value'] = inner_value[0]
779800

780801
if len(subvariables) == 1:
802+
# obj['function'] = real_op
803+
# obj["args"] = [
804+
# {'variable': subvariables[0]},
805+
# value
806+
# ]
781807
obj['function'] = real_op
782-
obj["args"] = [
783-
{'variable': subvariables[0]},
808+
obj['args'] = [
809+
{
810+
'var': var['alias'],
811+
'axes': [var['subreferences'][subvariables[0]]['alias']]
812+
},
784813
value
785814
]
815+
# obj['args'][0]['axes'] = [var['subreferences'][subvariables[0]]['alias']]
816+
# obj['args'][1] = value
786817
else:
787818
obj = {
788819
'function': expansion_op,
@@ -793,7 +824,7 @@ def _process(obj, variables):
793824
[{
794825
'function': real_op,
795826
'args': [
796-
{'variable': subvar},
827+
{'var': var['alias'], 'axes': [var['subreferences'][subvar]['alias']]},
797828
value
798829
]
799830
} for subvar in subvariables]

scrunch/tests/test_cubes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def test_pass_filter_expression(self, mock_fetch_cube):
4848
"function": ">",
4949
"args": [
5050
{
51-
"variable": "https://test.crunch.io/api/datasets/123456/variables/0001/"
51+
"var": "var1_alias"
5252
},
5353
{"value": 1},
5454
],

0 commit comments

Comments
 (0)