@@ -134,16 +134,16 @@ def create_grants_df(database_name: str,object_type: str, object_key: str):
134134 return grants_df
135135
136136
137- def create_table_ACLSs_df_for_databases (database_names : List [str ]):
137+ def create_table_ACLSs_df_for_databases (database_names : List [str ], include_catalog : bool ):
138138
139139 # TODO check Catalog heuristic:
140140 # if all databases are exported, we include the Catalog grants as well
141141 #. if only a few databases are exported: we exclude the Catalog
142- if database_names is None or database_names == '' :
143- database_names = get_database_names ()
144- include_catalog = True
145- else :
146- include_catalog = False
142+ # if database_names is None or database_names == '':
143+ # database_names = get_database_names()
144+ # include_catalog = True
145+ # else:
146+ # include_catalog = False
147147
148148 num_databases_processed = len (database_names )
149149 num_tables_or_views_processed = 0
@@ -201,35 +201,50 @@ def create_table_ACLSs_df_for_databases(database_names: List[str]):
201201# COMMAND ----------
202202
203203# DBTITLE 1,Run Export
204+ def chunks (lst , n ):
205+ """Yield successive n-sized chunks from lst."""
206+ for i in range (0 , len (lst ), n ):
207+ yield lst [i :i + n ]
208+
209+
204210databases_raw = dbutils .widgets .get ("Databases" )
205211output_path = dbutils .widgets .get ("OutputPath" )
206212
207213if databases_raw .rstrip () == '' :
208- databases = None
214+ databases = get_database_names ()
215+ include_catalog = True
209216 print (f"Exporting all databases" )
210217else :
211218 databases = [x .rstrip ().lstrip () for x in databases_raw .split ("," )]
219+ include_catalog = False
212220 print (f"Exporting the following databases: { databases } " )
213221
222+ counter = 1
223+ for databases_chunks in chunks (databases , 1 ):
224+ table_ACLs_df , num_databases_processed , num_tables_or_views_processed = create_table_ACLSs_df_for_databases (
225+ databases_chunks , include_catalog
226+ )
227+
228+ print (
229+ f"{ datetime .datetime .now ()} total number processed chunk { counter } : databases: { num_databases_processed } , tables or views: { num_tables_or_views_processed } " )
230+ print (f"{ datetime .datetime .now ()} writing table ACLs to { output_path } " )
231+
232+ # with table ACLS active, I direct write to DBFS is not allowed, so we store
233+ # the dateframe as a table for single zipped JSON file sorted, for consitent file diffs
234+ (
235+ table_ACLs_df
236+ # .coalesce(1)
237+ .selectExpr ("Database" , "Principal" , "ActionTypes" , "ObjectType" , "ObjectKey" , "ExportTimestamp" )
238+ # .sort("Database","Principal","ObjectType","ObjectKey")
239+ .write
240+ .format ("JSON" )
241+ .option ("compression" , "gzip" )
242+ .mode ("append" if counter > 1 else "overwrite" )
243+ .save (output_path )
244+ )
214245
215- table_ACLs_df ,num_databases_processed , num_tables_or_views_processed = create_table_ACLSs_df_for_databases (databases )
216-
217- print (f"{ datetime .datetime .now ()} total number processed: databases: { num_databases_processed } , tables or views: { num_tables_or_views_processed } " )
218- print (f"{ datetime .datetime .now ()} writing table ACLs to { output_path } " )
219-
220- # with table ACLS active, I direct write to DBFS is not allowed, so we store
221- # the dateframe as a table for single zipped JSON file sorted, for consitent file diffs
222- (
223- table_ACLs_df
224- .coalesce (1 )
225- .selectExpr ("Database" ,"Principal" ,"ActionTypes" ,"ObjectType" ,"ObjectKey" ,"ExportTimestamp" )
226- .sort ("Database" ,"Principal" ,"ObjectType" ,"ObjectKey" )
227- .write
228- .format ("JSON" )
229- .option ("compression" ,"gzip" )
230- .mode ("overwrite" )
231- .save (output_path )
232- )
246+ counter += 1
247+ include_catalog = False
233248
234249
235250# COMMAND ----------
0 commit comments