@@ -471,10 +471,50 @@ def _get_first_property_value_with_tracking(self, props: Properties, used_keys:
471471 return props [key ]
472472 return None
473473
474+ def _convert_str_to_bool (self , value : Any ) -> bool :
475+ """Convert string or other value to boolean, handling string representations properly."""
476+ if isinstance (value , str ):
477+ return strtobool (value )
478+ return bool (value )
479+
480+ def _resolve_s3_region (
481+ self , provided_region : Optional [str ], resolve_region_override : Any , bucket : Optional [str ]
482+ ) -> Optional [str ]:
483+ """
484+ Resolve S3 region based on configuration and optional bucket-based resolution.
485+
486+ Args:
487+ provided_region: Region explicitly provided in configuration
488+ resolve_region_override: Whether to resolve region from bucket (can be string or bool)
489+ bucket: Bucket name for region resolution
490+
491+ Returns:
492+ The resolved region string, or None if no region could be determined
493+ """
494+ # Handle resolve_region_override conversion
495+ should_resolve_region = False
496+ if resolve_region_override is not None :
497+ should_resolve_region = self ._convert_str_to_bool (resolve_region_override )
498+
499+ # If no region provided or explicit resolve requested, try to resolve from bucket
500+ if provided_region is None or should_resolve_region :
501+ resolved_region = _cached_resolve_s3_region (bucket = bucket )
502+
503+ # Warn if resolved region differs from provided region
504+ if provided_region is not None and resolved_region and resolved_region != provided_region :
505+ logger .warning (
506+ f"PyArrow FileIO overriding S3 bucket region for bucket { bucket } : "
507+ f"provided region { provided_region } , actual region { resolved_region } "
508+ )
509+
510+ return resolved_region or provided_region
511+
512+ return provided_region
513+
474514 def _initialize_oss_fs (self ) -> FileSystem :
475515 from pyarrow .fs import S3FileSystem
476516
477- properties = filter_properties (self .properties , key_predicate = lambda k : k .startswith (("oss ." , "client." )))
517+ properties = filter_properties (self .properties , key_predicate = lambda k : k .startswith (("s3 ." , "client." , "oss ." )))
478518 used_keys : set [str ] = set ()
479519 client_kwargs = {}
480520
@@ -490,12 +530,10 @@ def _initialize_oss_fs(self) -> FileSystem:
490530 client_kwargs ["session_token" ] = session_token
491531 if region := get (S3_REGION , AWS_REGION , "oss.region" ):
492532 client_kwargs ["region" ] = region
493- # Check for force_virtual_addressing in order of preference. For oss FS, defaulting to True if not found
494533 if force_virtual_addressing := get (S3_FORCE_VIRTUAL_ADDRESSING , "oss.force_virtual_addressing" ):
495- if isinstance (force_virtual_addressing , str ): # S3_FORCE_VIRTUAL_ADDRESSING's value can be a string
496- force_virtual_addressing = strtobool (force_virtual_addressing )
497- client_kwargs ["force_virtual_addressing" ] = force_virtual_addressing
534+ client_kwargs ["force_virtual_addressing" ] = self ._convert_str_to_bool (force_virtual_addressing )
498535 else :
536+ # For OSS FS, default to True
499537 client_kwargs ["force_virtual_addressing" ] = True
500538 if proxy_uri := get (S3_PROXY_URI , "oss.proxy_options" ):
501539 client_kwargs ["proxy_options" ] = proxy_uri
@@ -532,26 +570,13 @@ def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem:
532570 if session_token := get (S3_SESSION_TOKEN , AWS_SESSION_TOKEN , "s3.session_token" ):
533571 client_kwargs ["session_token" ] = session_token
534572
535- provided_region = get (S3_REGION , AWS_REGION )
536- # Do this when we don't provide the region at all, or when we explicitly enable it
537- if provided_region is None or property_as_bool (self .properties , S3_RESOLVE_REGION , False ) is True :
538- # Resolve region from netloc(bucket), fallback to user-provided region
539- # Only supported by buckets hosted by S3
540- bucket_region = _cached_resolve_s3_region (bucket = netloc ) or provided_region
541- if provided_region is not None and bucket_region != provided_region :
542- logger .warning (
543- f"PyArrow FileIO overriding S3 bucket region for bucket { netloc } : "
544- f"provided region { provided_region } , actual region { bucket_region } "
545- )
546- else :
547- bucket_region = provided_region
548- client_kwargs ["region" ] = bucket_region
549- used_keys .add (S3_RESOLVE_REGION )
573+ # Handle S3 region configuration with optional auto-resolution
574+ client_kwargs ["region" ] = self ._resolve_s3_region (
575+ provided_region = get (S3_REGION , AWS_REGION ), resolve_region_override = get (S3_RESOLVE_REGION ), bucket = netloc
576+ )
550577
551578 if force_virtual_addressing := get (S3_FORCE_VIRTUAL_ADDRESSING , "s3.force_virtual_addressing" ):
552- if isinstance (force_virtual_addressing , str ): # S3_FORCE_VIRTUAL_ADDRESSING's value can be a string
553- force_virtual_addressing = strtobool (force_virtual_addressing )
554- client_kwargs ["force_virtual_addressing" ] = force_virtual_addressing
579+ client_kwargs ["force_virtual_addressing" ] = self ._convert_str_to_bool (force_virtual_addressing )
555580
556581 if proxy_uri := get (S3_PROXY_URI , "s3.proxy_options" ):
557582 client_kwargs ["proxy_options" ] = proxy_uri
0 commit comments