Skip to content

Commit 5f63253

Browse files
authored
fix datasource selection (#326)
* fix datasource selection
1 parent 31ee20e commit 5f63253

File tree

3 files changed

+68
-43
lines changed

3 files changed

+68
-43
lines changed

cid/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11

2-
__version__ = '0.2.2'
2+
__version__ = '0.2.3'

cid/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def deploy(ctx, **kwargs):
9393
--athena-workgroup TEXT Athena workgroup
9494
--glue-data-catalog TEXT Glue data catalog
9595
--cur-table-name TEXT CUR table name
96-
--quicksight-datasource-arn TEXT QuickSight Datasource ARN (if not found one with provided Athena workgroup)
96+
--quicksight-datasource-id TEXT QuickSight Datasource ID (not needed if only one DataSoruce exisits with a given Athena workgroup)
9797
--quicksight-user TEXT QuickSight user
9898
--dataset-{dataset_name}-id TEXT QuickSight dataset id for a specific dataset
9999
--view-{view_name}-{parameter} TEXT a custom parameter for a view creation, can use variable: {account_id}

cid/common.py

Lines changed: 66 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -948,64 +948,89 @@ def get_dataset_data_from_defintion(self, dataset_definition):
948948
return raw_template
949949

950950

951-
952951
def create_or_update_dataset(self, dataset_definition: dict, dataset_id: str=None,recursive: bool=True, update: bool=False) -> bool:
953952
# Read dataset definition from template
954953
data = self.get_dataset_data_from_defintion(dataset_definition)
955954
template = Template(json.dumps(data))
956955
cur_required = dataset_definition.get('dependsOn', dict()).get('cur')
957956
athena_datasource = None
958957

959-
960958
if not len(self.qs.athena_datasources):
961959
logger.info('No Athena datasources found, attempting to create one')
962960
self.qs.AthenaWorkGroup = self.athena.WorkGroup
963-
self.qs.create_data_source()
961+
self.qs.create_data_source() # FIXME: we need to use name/id provided by user if any
962+
# FIXME: we need to cleanup if datasource creation fails
964963

965-
if not len(self.qs.athena_datasources):
966-
logger.info('No Athena datasources available, failing')
967-
print('No Athena datasources detected and unable to create one. Please create at least one dataset manually if it fails.')
964+
if not self.qs.athena_datasources:
965+
logger.info('No valid DataSources available, failing')
966+
print('No valid DataSources detected and unable to create one. Please create at least one DataSet manually in QuickSight and see why it fails.')
968967
# Not failing here to let views creation below
969968
else:
970-
pre_compiled_dataset = json.loads(template.safe_substitute())
971-
dataset_name = pre_compiled_dataset.get('Name')
972-
973-
# let's find the schema/database and workgroup name
974-
schemas = []
975-
datasources = []
976-
if dataset_id:
977-
schemas = self.qs.get_datasets(id=dataset_id)[0].schemas
978-
datasources = self.qs.get_datasets(id=dataset_id)[0].datasources
979-
else: # try to find dataset and get athena database
980-
found_datasets = self.qs.get_datasets(name=dataset_name)
981-
if found_datasets:
982-
schemas = list(set(sum([d.schemas for d in found_datasets], [])))
983-
datasources = list(set(sum([d.datasources for d in found_datasets], [])))
984-
985-
if len(schemas) == 1:
986-
self.athena.DatabaseName = schemas[0]
987-
# else user will be suggested to choose database
988-
if len(datasources) == 1 and datasources[0] in self.qs.athena_datasources:
989-
athena_datasource = self.qs.get_datasources(id=datasources[0])[0]
969+
datasource_choices = {
970+
f"{datasource.name} {id_} (workgroup={datasource.AthenaParameters.get('WorkGroup')})":id_
971+
for id_, datasource in self.qs.athena_datasources.items()
972+
}
973+
if get_parameters().get('quicksight-datasource-id'):
974+
# We have explicit choice of datasource
975+
datasource_id = get_parameters().get('quicksight-datasource-id')
976+
if datasource_id not in datasource_choices.values():
977+
logger.critical(
978+
f'quicksight-datasource-id={datasource_id} not found or not in a valid state. '
979+
f'Here is a list of available DataSources (Name ID WorkGroup): {datasource_choices.keys()}'
980+
)
981+
exit(1)
982+
athena_datasource = self.qs.athena_datasources[datasource_id]
983+
990984
else:
991-
#try to find a datasource with defined workgroup
992-
workgroup = self.athena.WorkGroup
993-
datasources_with_workgroup = self.qs.get_datasources(athena_workgroup_name=workgroup)
994-
if len(datasources_with_workgroup) == 1:
995-
athena_datasource = datasources_with_workgroup[0]
985+
# Datasources are not obvious for customer so we will try to do our best guess
986+
# - if there is just one? -> take that one
987+
# - if datasource is references in existing dataset? -> take that one
988+
# - if athena workgroup defined -> Try to find a dataset with this workgroup
989+
# - and if still nothing -> ask an expicit choice from the user
990+
pre_compiled_dataset = json.loads(template.safe_substitute())
991+
dataset_name = pre_compiled_dataset.get('Name')
992+
993+
# let's find the schema/database and workgroup name
994+
schemas = []
995+
datasources = []
996+
if dataset_id:
997+
schemas = self.qs.get_datasets(id=dataset_id)[0].schemas
998+
datasources = self.qs.get_datasets(id=dataset_id)[0].datasources
999+
else: # try to find dataset and get athena database
1000+
found_datasets = self.qs.get_datasets(name=dataset_name)
1001+
if found_datasets:
1002+
schemas = list(set(sum([d.schemas for d in found_datasets], [])))
1003+
datasources = list(set(sum([d.datasources for d in found_datasets], [])))
1004+
1005+
if len(schemas) == 1:
1006+
self.athena.DatabaseName = schemas[0]
1007+
# else user will be suggested to choose database anyway
1008+
1009+
if len(datasources) == 1 and datasources[0] in self.qs.athena_datasources:
1010+
athena_datasource = self.qs.get_datasources(id=datasources[0])[0]
9961011
else:
997-
#cannot find the right athena_datasource
998-
athena_datasource = get_parameter(
999-
param_name='quicksight-datasource-arn',
1000-
message=f"Please choose DataSource ARN",
1001-
choices={f"{arn} (workgroup={datasource.AthenaParameters.get('WorkGroup')})":datasource for arn, datasource in self.qs.athena_datasources.items()},
1002-
)
1003-
logger.info(f'Found {len(datasources)} Athena datasources, not using {athena_datasource.id}')
1004-
if isinstance(athena_datasource, Datasource):
1005-
self.athena.WorkGroup = athena_datasource.AthenaParameters.get('WorkGroup')
1012+
#try to find a datasource with defined workgroup
1013+
workgroup = self.athena.WorkGroup
1014+
datasources_with_workgroup = self.qs.get_datasources(athena_workgroup_name=workgroup)
1015+
if len(datasources_with_workgroup) == 1:
1016+
athena_datasource = datasources_with_workgroup[0]
1017+
else:
1018+
#cannot find the right athena_datasource
1019+
logger.info('Multiple DataSources found.')
1020+
datasource_id = get_parameter(
1021+
param_name='quicksight-datasource-id',
1022+
message=f"Please choose DataSource (Choose the first one if not sure).",
1023+
choices=datasource_choices,
1024+
)
1025+
athena_datasource = self.qs.athena_datasources[datasource_id]
1026+
logger.info(f'Found {len(datasources)} Athena datasources, not using {athena_datasource.id}')
1027+
if isinstance(athena_datasource, Datasource):
1028+
self.athena.WorkGroup = athena_datasource.AthenaParameters.get('WorkGroup')
1029+
else:
1030+
logger.debug('Athena_datasource is not defined. Will only create views')
10061031

10071032
# Check for required views
1008-
_views = dataset_definition.get('dependsOn').get('views')
1033+
_views = dataset_definition.get('dependsOn', {}).get('views', [])
10091034
required_views = [(self.cur.tableName if cur_required and name =='${cur_table_name}' else name) for name in _views]
10101035

10111036
self.athena.discover_views(required_views)

0 commit comments

Comments
 (0)