@@ -948,64 +948,89 @@ def get_dataset_data_from_defintion(self, dataset_definition):
948948 return raw_template
949949
950950
951-
952951 def create_or_update_dataset (self , dataset_definition : dict , dataset_id : str = None ,recursive : bool = True , update : bool = False ) -> bool :
953952 # Read dataset definition from template
954953 data = self .get_dataset_data_from_defintion (dataset_definition )
955954 template = Template (json .dumps (data ))
956955 cur_required = dataset_definition .get ('dependsOn' , dict ()).get ('cur' )
957956 athena_datasource = None
958957
959-
960958 if not len (self .qs .athena_datasources ):
961959 logger .info ('No Athena datasources found, attempting to create one' )
962960 self .qs .AthenaWorkGroup = self .athena .WorkGroup
963- self .qs .create_data_source ()
961+ self .qs .create_data_source () # FIXME: we need to use name/id provided by user if any
962+ # FIXME: we need to cleanup if datasource creation fails
964963
965- if not len ( self .qs .athena_datasources ) :
966- logger .info ('No Athena datasources available, failing' )
967- print ('No Athena datasources detected and unable to create one. Please create at least one dataset manually if it fails.' )
964+ if not self .qs .athena_datasources :
965+ logger .info ('No valid DataSources available, failing' )
966+ print ('No valid DataSources detected and unable to create one. Please create at least one DataSet manually in QuickSight and see why it fails.' )
968967 # Not failing here to let views creation below
969968 else :
970- pre_compiled_dataset = json .loads (template .safe_substitute ())
971- dataset_name = pre_compiled_dataset .get ('Name' )
972-
973- # let's find the schema/database and workgroup name
974- schemas = []
975- datasources = []
976- if dataset_id :
977- schemas = self .qs .get_datasets (id = dataset_id )[0 ].schemas
978- datasources = self .qs .get_datasets (id = dataset_id )[0 ].datasources
979- else : # try to find dataset and get athena database
980- found_datasets = self .qs .get_datasets (name = dataset_name )
981- if found_datasets :
982- schemas = list (set (sum ([d .schemas for d in found_datasets ], [])))
983- datasources = list (set (sum ([d .datasources for d in found_datasets ], [])))
984-
985- if len (schemas ) == 1 :
986- self .athena .DatabaseName = schemas [0 ]
987- # else user will be suggested to choose database
988- if len (datasources ) == 1 and datasources [0 ] in self .qs .athena_datasources :
989- athena_datasource = self .qs .get_datasources (id = datasources [0 ])[0 ]
969+ datasource_choices = {
970+ f"{ datasource .name } { id_ } (workgroup={ datasource .AthenaParameters .get ('WorkGroup' )} )" :id_
971+ for id_ , datasource in self .qs .athena_datasources .items ()
972+ }
973+ if get_parameters ().get ('quicksight-datasource-id' ):
974+ # We have explicit choice of datasource
975+ datasource_id = get_parameters ().get ('quicksight-datasource-id' )
976+ if datasource_id not in datasource_choices .values ():
977+ logger .critical (
978+ f'quicksight-datasource-id={ datasource_id } not found or not in a valid state. '
979+ f'Here is a list of available DataSources (Name ID WorkGroup): { datasource_choices .keys ()} '
980+ )
981+ exit (1 )
982+ athena_datasource = self .qs .athena_datasources [datasource_id ]
983+
990984 else :
991- #try to find a datasource with defined workgroup
992- workgroup = self .athena .WorkGroup
993- datasources_with_workgroup = self .qs .get_datasources (athena_workgroup_name = workgroup )
994- if len (datasources_with_workgroup ) == 1 :
995- athena_datasource = datasources_with_workgroup [0 ]
985+ # Datasources are not obvious for customer so we will try to do our best guess
986+ # - if there is just one? -> take that one
987+ # - if datasource is references in existing dataset? -> take that one
988+ # - if athena workgroup defined -> Try to find a dataset with this workgroup
989+ # - and if still nothing -> ask an expicit choice from the user
990+ pre_compiled_dataset = json .loads (template .safe_substitute ())
991+ dataset_name = pre_compiled_dataset .get ('Name' )
992+
993+ # let's find the schema/database and workgroup name
994+ schemas = []
995+ datasources = []
996+ if dataset_id :
997+ schemas = self .qs .get_datasets (id = dataset_id )[0 ].schemas
998+ datasources = self .qs .get_datasets (id = dataset_id )[0 ].datasources
999+ else : # try to find dataset and get athena database
1000+ found_datasets = self .qs .get_datasets (name = dataset_name )
1001+ if found_datasets :
1002+ schemas = list (set (sum ([d .schemas for d in found_datasets ], [])))
1003+ datasources = list (set (sum ([d .datasources for d in found_datasets ], [])))
1004+
1005+ if len (schemas ) == 1 :
1006+ self .athena .DatabaseName = schemas [0 ]
1007+ # else user will be suggested to choose database anyway
1008+
1009+ if len (datasources ) == 1 and datasources [0 ] in self .qs .athena_datasources :
1010+ athena_datasource = self .qs .get_datasources (id = datasources [0 ])[0 ]
9961011 else :
997- #cannot find the right athena_datasource
998- athena_datasource = get_parameter (
999- param_name = 'quicksight-datasource-arn' ,
1000- message = f"Please choose DataSource ARN" ,
1001- choices = {f"{ arn } (workgroup={ datasource .AthenaParameters .get ('WorkGroup' )} )" :datasource for arn , datasource in self .qs .athena_datasources .items ()},
1002- )
1003- logger .info (f'Found { len (datasources )} Athena datasources, not using { athena_datasource .id } ' )
1004- if isinstance (athena_datasource , Datasource ):
1005- self .athena .WorkGroup = athena_datasource .AthenaParameters .get ('WorkGroup' )
1012+ #try to find a datasource with defined workgroup
1013+ workgroup = self .athena .WorkGroup
1014+ datasources_with_workgroup = self .qs .get_datasources (athena_workgroup_name = workgroup )
1015+ if len (datasources_with_workgroup ) == 1 :
1016+ athena_datasource = datasources_with_workgroup [0 ]
1017+ else :
1018+ #cannot find the right athena_datasource
1019+ logger .info ('Multiple DataSources found.' )
1020+ datasource_id = get_parameter (
1021+ param_name = 'quicksight-datasource-id' ,
1022+ message = f"Please choose DataSource (Choose the first one if not sure)." ,
1023+ choices = datasource_choices ,
1024+ )
1025+ athena_datasource = self .qs .athena_datasources [datasource_id ]
1026+ logger .info (f'Found { len (datasources )} Athena datasources, not using { athena_datasource .id } ' )
1027+ if isinstance (athena_datasource , Datasource ):
1028+ self .athena .WorkGroup = athena_datasource .AthenaParameters .get ('WorkGroup' )
1029+ else :
1030+ logger .debug ('Athena_datasource is not defined. Will only create views' )
10061031
10071032 # Check for required views
1008- _views = dataset_definition .get ('dependsOn' ).get ('views' )
1033+ _views = dataset_definition .get ('dependsOn' , {} ).get ('views' , [] )
10091034 required_views = [(self .cur .tableName if cur_required and name == '${cur_table_name}' else name ) for name in _views ]
10101035
10111036 self .athena .discover_views (required_views )
0 commit comments