Skip to content

Commit 691a050

Browse files
Fixing index validation in validate schema (#1735)
1 parent 1a4b130 commit 691a050

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

awswrangler/s3/_read_parquet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,7 @@ def _read_parquet(
551551
)
552552
if validate_schema and columns:
553553
for column in columns:
554-
if column not in df.columns:
554+
if column not in df.columns and column not in df.index.names:
555555
raise exceptions.InvalidArgument(f"column: {column} does not exist")
556556
return df
557557

tests/test_s3_parquet.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,3 +569,21 @@ def test_read_parquet_versioned(path) -> None:
569569
df_temp = wr.s3.read_parquet(path_file, version_id=version_id)
570570
assert df_temp.equals(df)
571571
assert version_id == wr.s3.describe_objects(path=path_file, version_id=version_id)[path_file]["VersionId"]
572+
573+
574+
def test_read_parquet_schema_validation_with_index_column(path) -> None:
575+
path_file = f"{path}file.parquet"
576+
df = pd.DataFrame({"idx": [1], "col": [2]})
577+
df0 = df.set_index("idx")
578+
wr.s3.to_parquet(
579+
df=df0,
580+
path=path_file,
581+
index=True,
582+
)
583+
df1 = wr.s3.read_parquet(
584+
path=path_file,
585+
ignore_index=False,
586+
columns=["idx", "col"],
587+
validate_schema=True,
588+
)
589+
assert df0.shape == df1.shape

0 commit comments

Comments
 (0)