Skip to content

Commit 4daad90

Browse files
committed
Documenting
1 parent a1f6271 commit 4daad90

File tree

2 files changed

+58
-31
lines changed

2 files changed

+58
-31
lines changed

awswrangler/glue.py

Lines changed: 52 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from typing import Dict, Optional, Any, Iterator
1+
from typing import Dict, Optional, Any, Iterator, List
22
from math import ceil
33
from itertools import islice
44
import re
55
import logging
66

7-
from pandas import DataFrame
7+
from pandas import DataFrame # type: ignore
88

99
from awswrangler import data_types
1010
from awswrangler.athena import Athena
@@ -410,7 +410,12 @@ def get_databases(self, catalog_id: Optional[str] = None) -> Iterator[Dict[str,
410410
for db in page["DatabaseList"]:
411411
yield db
412412

413-
def get_tables(self, catalog_id: Optional[str] = None, database: Optional[str] = None, search: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None) -> Iterator[Dict[str, Any]]:
413+
def get_tables(self,
414+
catalog_id: Optional[str] = None,
415+
database: Optional[str] = None,
416+
search: Optional[str] = None,
417+
prefix: Optional[str] = None,
418+
suffix: Optional[str] = None) -> Iterator[Dict[str, Any]]:
414419
"""
415420
Get an iterator of tables
416421
@@ -446,16 +451,32 @@ def get_tables(self, catalog_id: Optional[str] = None, database: Optional[str] =
446451
for tbl in page["TableList"]:
447452
yield tbl
448453

449-
def tables(self, limit: int = 100, catalog_id: Optional[str] = None, database: Optional[str] = None, search: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None) -> DataFrame:
450-
table_iter = self.get_tables(catalog_id=catalog_id, database=database, search=search, prefix=prefix, suffix=suffix)
454+
def tables(self,
455+
limit: int = 100,
456+
catalog_id: Optional[str] = None,
457+
database: Optional[str] = None,
458+
search: Optional[str] = None,
459+
prefix: Optional[str] = None,
460+
suffix: Optional[str] = None) -> DataFrame:
461+
"""
462+
Get iterator of tables filtered by a search term, prefix, suffix.
463+
464+
:param limit: Max number of tables
465+
:param catalog_id: The ID of the Data Catalog from which to retrieve Databases. If none is provided, the AWS account ID is used by default.
466+
:param database: Glue database name
467+
:param search: Select only tables with the given string in the name.
468+
:param prefix: Select only tables with the given string in the name prefix.
469+
:param suffix: Select only tables with the given string in the name suffix.
470+
471+
:return: Pandas Dataframe filled by formatted infos
472+
"""
473+
table_iter = self.get_tables(catalog_id=catalog_id,
474+
database=database,
475+
search=search,
476+
prefix=prefix,
477+
suffix=suffix)
451478
tables = islice(table_iter, limit)
452-
df_dict = {
453-
"Database": [],
454-
"Table": [],
455-
"Description": [],
456-
"Columns": [],
457-
"Partitions": []
458-
}
479+
df_dict: Dict[str, List] = {"Database": [], "Table": [], "Description": [], "Columns": [], "Partitions": []}
459480
for table in tables:
460481
df_dict["Database"].append(table["DatabaseName"])
461482
df_dict["Table"].append(table["Name"])
@@ -468,12 +489,16 @@ def tables(self, limit: int = 100, catalog_id: Optional[str] = None, database: O
468489
return DataFrame(data=df_dict)
469490

470491
def databases(self, limit: int = 100, catalog_id: Optional[str] = None) -> DataFrame:
492+
"""
493+
Get iterator of databases.
494+
495+
:param limit: Max number of tables
496+
:param catalog_id: The ID of the Data Catalog from which to retrieve Databases. If none is provided, the AWS account ID is used by default.
497+
:return: Pandas Dataframe filled by formatted infos
498+
"""
471499
database_iter = self.get_databases(catalog_id=catalog_id)
472500
dbs = islice(database_iter, limit)
473-
df_dict = {
474-
"Database": [],
475-
"Description": []
476-
}
501+
df_dict: Dict[str, List] = {"Database": [], "Description": []}
477502
for db in dbs:
478503
df_dict["Database"].append(db["Name"])
479504
if "Description" in db:
@@ -483,23 +508,19 @@ def databases(self, limit: int = 100, catalog_id: Optional[str] = None) -> DataF
483508
return DataFrame(data=df_dict)
484509

485510
def table(self, database: str, name: str, catalog_id: Optional[str] = None) -> DataFrame:
511+
"""
512+
Get table details as Pandas Dataframe
513+
514+
:param database: Glue database name
515+
:param name: Table name
516+
:param catalog_id: The ID of the Data Catalog from which to retrieve Databases. If none is provided, the AWS account ID is used by default.
517+
:return: Pandas Dataframe filled by formatted infos
518+
"""
486519
if catalog_id is None:
487-
table: Dict[str, Any] = self._client_glue.get_table(
488-
DatabaseName=database,
489-
Name=name
490-
)["Table"]
520+
table: Dict[str, Any] = self._client_glue.get_table(DatabaseName=database, Name=name)["Table"]
491521
else:
492-
table = self._client_glue.get_table(
493-
CatalogId=catalog_id,
494-
DatabaseName=database,
495-
Name=name
496-
)["Table"]
497-
df_dict = {
498-
"Column Name": [],
499-
"Type": [],
500-
"Partition": [],
501-
"Comment": []
502-
}
522+
table = self._client_glue.get_table(CatalogId=catalog_id, DatabaseName=database, Name=name)["Table"]
523+
df_dict: Dict[str, List] = {"Column Name": [], "Type": [], "Partition": [], "Comment": []}
503524
for col in table["StorageDescriptor"]["Columns"]:
504525
df_dict["Column Name"].append(col["Name"])
505526
df_dict["Type"].append(col["Type"])

testing/test_awswrangler/test_glue.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,9 @@ def test_get_tables_suffix(session, table):
127127
for tbl in tables:
128128
if tbl["Name"] == table:
129129
assert tbl["TableType"] == "EXTERNAL_TABLE"
130+
131+
132+
def test_glue_utils(session, database, table):
133+
assert len(session.glue.databases().index) > 1
134+
assert len(session.glue.tables().index) > 1
135+
assert len(session.glue.table(database=database, name=table).index) > 1

0 commit comments

Comments
 (0)