@@ -413,33 +413,33 @@ def get_databases(self, catalog_id: Optional[str] = None) -> Iterator[Dict[str,
413413 def get_tables (self ,
414414 catalog_id : Optional [str ] = None ,
415415 database : Optional [str ] = None ,
416- search : Optional [str ] = None ,
417- prefix : Optional [str ] = None ,
418- suffix : Optional [str ] = None ) -> Iterator [Dict [str , Any ]]:
416+ name_contains : Optional [str ] = None ,
417+ name_prefix : Optional [str ] = None ,
418+ name_suffix : Optional [str ] = None ) -> Iterator [Dict [str , Any ]]:
419419 """
420420 Get an iterator of tables
421421
422422 :param catalog_id: The ID of the Data Catalog from which to retrieve Databases. If none is provided, the AWS account ID is used by default.
423423 :param database: Filter a specific database
424- :param search : Select by a specific string on table name
425- :param prefix : Select by a specific prefix on table name
426- :param suffix : Select by a specific suffix on table name
424+ :param name_contains : Select by a specific string on table name
425+ :param name_prefix : Select by a specific prefix on table name
426+ :param name_suffix : Select by a specific suffix on table name
427427 :return: Iterator[Dict[str, Any]] of Tables
428428 """
429429 paginator = self ._client_glue .get_paginator ("get_tables" )
430430 args : Dict [str , str ] = {}
431431 if catalog_id is not None :
432432 args ["CatalogId" ] = catalog_id
433- if (prefix is not None ) and (suffix is not None ) and (search is not None ):
434- args ["Expression" ] = f"{ prefix } .*{ search } .*{ suffix } "
435- elif (prefix is not None ) and (suffix is not None ):
436- args ["Expression" ] = f"{ prefix } .*{ suffix } "
437- elif search is not None :
438- args ["Expression" ] = f".*{ search } .*"
439- elif prefix is not None :
440- args ["Expression" ] = f"{ prefix } .*"
441- elif suffix is not None :
442- args ["Expression" ] = f".*{ suffix } "
433+ if (name_prefix is not None ) and (name_suffix is not None ) and (name_contains is not None ):
434+ args ["Expression" ] = f"{ name_prefix } .*{ name_contains } .*{ name_suffix } "
435+ elif (name_prefix is not None ) and (name_suffix is not None ):
436+ args ["Expression" ] = f"{ name_prefix } .*{ name_suffix } "
437+ elif name_contains is not None :
438+ args ["Expression" ] = f".*{ name_contains } .*"
439+ elif name_prefix is not None :
440+ args ["Expression" ] = f"{ name_prefix } .*"
441+ elif name_suffix is not None :
442+ args ["Expression" ] = f".*{ name_suffix } "
443443 if database is not None :
444444 databases = [database ]
445445 else :
@@ -455,27 +455,41 @@ def tables(self,
455455 limit : int = 100 ,
456456 catalog_id : Optional [str ] = None ,
457457 database : Optional [str ] = None ,
458- search : Optional [str ] = None ,
459- prefix : Optional [str ] = None ,
460- suffix : Optional [str ] = None ) -> DataFrame :
458+ search_text : Optional [str ] = None ,
459+ name_contains : Optional [str ] = None ,
460+ name_prefix : Optional [str ] = None ,
461+ name_suffix : Optional [str ] = None ) -> DataFrame :
461462 """
462- Get iterator of tables filtered by a search term, prefix, suffix.
463+ Get a Dataframe with tables filtered by a search term, prefix, suffix.
463464
464465 :param limit: Max number of tables
465466 :param catalog_id: The ID of the Data Catalog from which to retrieve Databases. If none is provided, the AWS account ID is used by default.
466467 :param database: Glue database name
467- :param search : Select only tables with the given string in the name.
468- :param prefix : Select only tables with the given string in the name prefix.
469- :param suffix : Select only tables with the given string in the name suffix.
470-
468+ :param search_text : Select only tables with the given string in table's properties
469+ :param name_contains : Select by a specific string on table name
470+ :param name_prefix : Select only tables with the given string in the name prefix
471+ :param name_suffix: Select only tables with the given string in the name suffix
471472 :return: Pandas Dataframe filled by formatted infos
472473 """
473- table_iter = self .get_tables (catalog_id = catalog_id ,
474- database = database ,
475- search = search ,
476- prefix = prefix ,
477- suffix = suffix )
478- tables = islice (table_iter , limit )
474+ if search_text is None :
475+ table_iter = self .get_tables (catalog_id = catalog_id ,
476+ database = database ,
477+ name_contains = name_contains ,
478+ name_prefix = name_prefix ,
479+ name_suffix = name_suffix )
480+ tables : List [Dict [str , Any ]] = list (islice (table_iter , limit ))
481+ else :
482+ tables = list (self .search_tables (text = search_text , catalog_id = catalog_id ))
483+ if database is not None :
484+ tables = [x for x in tables if x ["DatabaseName" ] == database ]
485+ if name_contains is not None :
486+ tables = [x for x in tables if name_contains in x ["Name" ]]
487+ if name_prefix is not None :
488+ tables = [x for x in tables if x ["Name" ].startswith (name_prefix )]
489+ if name_suffix is not None :
490+ tables = [x for x in tables if x ["Name" ].endswith (name_suffix )]
491+ tables = tables [:limit ]
492+
479493 df_dict : Dict [str , List ] = {"Database" : [], "Table" : [], "Description" : [], "Columns" : [], "Partitions" : []}
480494 for table in tables :
481495 df_dict ["Database" ].append (table ["DatabaseName" ])
@@ -488,6 +502,26 @@ def tables(self,
488502 df_dict ["Partitions" ].append (", " .join ([x ["Name" ] for x in table ["PartitionKeys" ]]))
489503 return DataFrame (data = df_dict )
490504
505+ def search_tables (self , text : str , catalog_id : Optional [str ] = None ):
506+ """
507+ Get iterator of tables filtered by a search string.
508+
509+ :param text: Select only tables with the given string in table's properties.
510+ :param catalog_id: The ID of the Data Catalog from which to retrieve Databases. If none is provided, the AWS account ID is used by default.
511+ :return: Iterator of tables
512+ """
513+ args : Dict [str , Any ] = {"SearchText" : text }
514+ if catalog_id is not None :
515+ args ["CatalogId" ] = catalog_id
516+ response = self ._client_glue .search_tables (** args )
517+ for tbl in response ["TableList" ]:
518+ yield tbl
519+ while "NextToken" in response :
520+ args ["NextToken" ] = response ["NextToken" ]
521+ response = self ._client_glue .search_tables (** args )
522+ for tbl in response ["TableList" ]:
523+ yield tbl
524+
491525 def databases (self , limit : int = 100 , catalog_id : Optional [str ] = None ) -> DataFrame :
492526 """
493527 Get iterator of databases.
0 commit comments