Skip to content

Commit 57724f4

Browse files
authored
fix(delta-table): normalize S3 LocationConstraint values (#547)
The S3 GetBucketLocation operation returns `null` (`None`) for buckets in the legacy `us-east-1` region and `EU` for very old buckets that were created in the historical `EU` region (now `eu-west-1`). For every other region the API already returns the correct AWS region string. Add a function that normalizes bucket location.
1 parent 3953076 commit 57724f4

File tree

3 files changed

+37
-3
lines changed

3 files changed

+37
-3
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 1.0.47
2+
3+
* **Fix delta-table: normalize S3 LocationConstraint values to handle us-east-1 and EU buckets**
4+
15
## 1.0.46
26

37
* **Fix delta-table `pyo3_runtime.PanicException: Forked process detected` on Linux**

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.0.46" # pragma: no cover
1+
__version__ = "1.0.47" # pragma: no cover

unstructured_ingest/processes/connectors/delta_table.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,15 @@ def precheck(self):
134134

135135
response = s3_client.get_bucket_location(Bucket=bucket_name)
136136

137-
if self.connection_config.aws_region != response.get("LocationConstraint"):
138-
raise ValueError("Wrong AWS Region was provided.")
137+
bucket_region = _normalize_location_constraint(response.get("LocationConstraint"))
138+
139+
if self.connection_config.aws_region != bucket_region:
140+
raise ValueError(
141+
"Wrong AWS region provided: bucket "
142+
f"'{bucket_name}' resides in '{bucket_region}', "
143+
"but configuration specifies "
144+
f"'{self.connection_config.aws_region}'."
145+
)
139146

140147
except Exception as e:
141148
logger.error(f"failed to validate connection: {e}", exc_info=True)
@@ -270,6 +277,29 @@ def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None: # type:
270277
self.upload_dataframe(df=df, file_data=file_data)
271278

272279

280+
def _normalize_location_constraint(location: Optional[str]) -> str:
281+
"""Return canonical AWS region name for a LocationConstraint value.
282+
283+
The S3 GetBucketLocation operation returns `null` (`None`) for buckets in
284+
the legacy `us-east-1` region and `EU` for very old buckets that were
285+
created in the historical `EU` region (now `eu-west-1`). For every other
286+
region the API already returns the correct AWS region string. This helper
287+
normalises the legacy values so callers can reliably compare regions.
288+
289+
Args:
290+
location: The LocationConstraint value returned by the S3 GetBucketLocation operation.
291+
292+
Returns:
293+
The canonical AWS region name for the given location constraint.
294+
"""
295+
296+
if location is None:
297+
return "us-east-1"
298+
if location == "EU":
299+
return "eu-west-1"
300+
return location
301+
302+
273303
delta_table_destination_entry = DestinationRegistryEntry(
274304
connection_config=DeltaTableConnectionConfig,
275305
uploader=DeltaTableUploader,

0 commit comments

Comments
 (0)