Unstructured-IO · CyMule · Jul 31, 2025 · Jul 29, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.1.4
+
+* **Fix**: prevent S3 path conflicts using hash-based directory isolation
+
 ## 1.1.3
 
 * **Fix: Remove unnecessary deletion operation in ES connector**

diff --git a/...results/s3-minio/directory_structure.json → ...ed_results/s3-minio/expected_s3_keys.json b/...results/s3-minio/directory_structure.json → ...ed_results/s3-minio/expected_s3_keys.json
@@ -1,5 +1,5 @@
 {
-  "directory_structure": [
+  "s3_keys": [
     "wiki_movie_plots_small.csv"
   ]
 }
diff --git a/...s/s3-specialchar/directory_structure.json → ...ults/s3-specialchar/expected_s3_keys.json b/...s/s3-specialchar/directory_structure.json → ...ults/s3-specialchar/expected_s3_keys.json
@@ -1,5 +1,5 @@
 {
-  "directory_structure": [
+  "s3_keys": [
     "Why_is_the_sky_blue?.txt",
     "[test]?*.txt"
   ]

diff --git a/...ected_results/s3/directory_structure.json → ...expected_results/s3/expected_s3_keys.json b/...ected_results/s3/directory_structure.json → ...expected_results/s3/expected_s3_keys.json
@@ -1,5 +1,5 @@
 {
-  "directory_structure": [
+  "s3_keys": [
     "2023-Jan-economic-outlook.pdf",
     "Silent-Giant-(1).pdf",
     "page-with-formula.pdf",

diff --git a/test/integration/connectors/utils/validation/source.py b/test/integration/connectors/utils/validation/source.py
@@ -167,11 +167,26 @@ def run_expected_download_files_validation(
 
 
 def run_directory_structure_validation(expected_output_dir: Path, download_files: list[str]):
-    directory_record = expected_output_dir / "directory_structure.json"
-    with directory_record.open("r") as directory_file:
-        directory_file_contents = json.load(directory_file)
-    directory_structure = directory_file_contents["directory_structure"]
-    assert directory_structure == download_files
+    s3_keys_file = expected_output_dir / "expected_s3_keys.json"
+
+    if s3_keys_file.exists():
+        with s3_keys_file.open("r") as f:
+            s3_keys = json.load(f)["s3_keys"]
+
+        expected_filenames = [Path(s3_key).name for s3_key in s3_keys]
+        actual_filenames = [Path(download_file).name for download_file in download_files]
+
+        expected_filenames.sort()
+        actual_filenames.sort()
+        assert expected_filenames == actual_filenames, (
+            f"Expected filenames: {expected_filenames}, "
+            f"Got filenames: {actual_filenames}"
+        )
+    else:
+        directory_record = expected_output_dir / "directory_structure.json"
+        with directory_record.open("r") as f:
+            directory_structure = json.load(f)["directory_structure"]
+        assert directory_structure == download_files
 
 
 def update_fixtures(

diff --git a/unstructured_ingest/__version__.py b/unstructured_ingest/__version__.py
@@ -1 +1 @@
-__version__ = "1.1.3"  # pragma: no cover
+__version__ = "1.1.4"  # pragma: no cover
diff --git a/unstructured_ingest/interfaces/downloader.py b/unstructured_ingest/interfaces/downloader.py
@@ -36,9 +36,11 @@ class Downloader(BaseProcess, BaseConnector, ABC):
     def get_download_path(self, file_data: FileData) -> Optional[Path]:
         if not file_data.source_identifiers:
             return None
+
         rel_path = file_data.source_identifiers.relative_path
         if not rel_path:
             return None
+
         rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
         return self.download_dir / Path(rel_path)
 

diff --git a/unstructured_ingest/processes/connectors/fsspec/fsspec.py b/unstructured_ingest/processes/connectors/fsspec/fsspec.py
@@ -270,6 +270,22 @@ class FsspecDownloader(Downloader):
     download_config: Optional[FsspecDownloaderConfigT] = field(
         default_factory=lambda: FsspecDownloaderConfig()
     )
+
+    def get_download_path(self, file_data: FileData) -> Optional[Path]:
+        if not file_data.source_identifiers:
+            return None
+
+        filename = file_data.source_identifiers.filename
+        if not filename:
+            return None
+
+        mkdir_concurrent_safe(self.download_dir)
+
+        temp_dir = tempfile.mkdtemp(
+            prefix="unstructured_", 
+            dir=self.download_dir
+        )
+        return Path(temp_dir) / filename
 
     def is_async(self) -> bool:
         with self.connection_config.get_client(protocol=self.protocol) as client:

diff --git a/unstructured_ingest/utils/filesystem.py b/unstructured_ingest/utils/filesystem.py
@@ -24,4 +24,4 @@ def mkdir_concurrent_safe(path: Path) -> None:
         path.mkdir(parents=True, exist_ok=True)
     except FileExistsError:
         if not (path.exists() and path.is_dir()):
-            raise
+            raise
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = "1.1.3" # pragma: no cover
		__version__ = "1.1.4" # pragma: no cover