Skip to content

Commit 38cabdf

Browse files
committed
Add validate_schema to wr.s3.read_parquet_table.
1 parent d2b33ba commit 38cabdf

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

awswrangler/s3/_read.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,7 @@ def read_parquet_table(
861861
database: str,
862862
filters: Optional[Union[List[Tuple], List[List[Tuple]]]] = None,
863863
columns: Optional[List[str]] = None,
864+
validate_schema: bool = True,
864865
categories: List[str] = None,
865866
chunked: Union[bool, int] = False,
866867
use_threads: bool = True,
@@ -900,6 +901,10 @@ def read_parquet_table(
900901
List of filters to apply, like ``[[('x', '=', 0), ...], ...]``.
901902
columns : List[str], optional
902903
Names of columns to read from the file(s).
904+
validate_schema:
905+
Check that individual file schemas are all the same / compatible. Schemas within a
906+
folder prefix should all be the same. Disable if you have schemas that are different
907+
and want to disable this check.
903908
categories: List[str], optional
904909
List of columns names that should be returned as pandas.Categorical.
905910
Recommended for memory restricted environments.
@@ -959,6 +964,7 @@ def read_parquet_table(
959964
path=path,
960965
filters=filters,
961966
columns=columns,
967+
validate_schema=validate_schema,
962968
categories=categories,
963969
chunked=chunked,
964970
dataset=True,

0 commit comments

Comments
 (0)