@@ -462,7 +462,6 @@ def _read_parquet_file(
462462 boto3_session : boto3 .Session ,
463463 s3_additional_kwargs : Optional [Dict [str , str ]],
464464 use_threads : Union [bool , int ],
465- validate_schema : Optional [bool ],
466465 version_id : Optional [str ] = None ,
467466 pyarrow_additional_kwargs : Optional [Dict [str , Any ]] = None ,
468467) -> pa .Table :
@@ -482,12 +481,6 @@ def _read_parquet_file(
482481 read_dictionary = categories ,
483482 coerce_int96_timestamp_unit = pyarrow_args ["coerce_int96_timestamp_unit" ],
484483 )
485- if validate_schema and pq_file and columns :
486- pq_file_columns : List [str ] = pq_file .schema .names
487- for column in columns :
488- if column not in pq_file_columns :
489- raise exceptions .InvalidArgument (f"column: { column } does not exist" )
490-
491484 if pq_file is None :
492485 raise exceptions .InvalidFile (f"Invalid Parquet file: { path } " )
493486 return pq_file .read (columns = columns , use_threads = False , use_pandas_metadata = False )
@@ -536,7 +529,7 @@ def _read_parquet(
536529) -> pd .DataFrame :
537530 pyarrow_args = _set_default_pyarrow_additional_kwargs (pyarrow_additional_kwargs )
538531 boto3_session = _utils .ensure_session (boto3_session )
539- return _arrowtable2df (
532+ df : pd . DataFrame = _arrowtable2df (
540533 table = _read_parquet_file (
541534 path = path ,
542535 columns = columns ,
@@ -545,7 +538,6 @@ def _read_parquet(
545538 s3_additional_kwargs = s3_additional_kwargs ,
546539 use_threads = use_threads ,
547540 version_id = version_id ,
548- validate_schema = validate_schema ,
549541 pyarrow_additional_kwargs = pyarrow_args ,
550542 ),
551543 categories = categories ,
@@ -557,6 +549,11 @@ def _read_parquet(
557549 path_root = path_root ,
558550 timestamp_as_object = pyarrow_args ["timestamp_as_object" ],
559551 )
552+ if validate_schema and columns :
553+ for column in columns :
554+ if column not in df .columns :
555+ raise exceptions .InvalidArgument (f"column: { column } does not exist" )
556+ return df
560557
561558
562559def read_parquet (
0 commit comments