Skip to content

Commit 47e8d65

Browse files
fix: fsspec download a single file instead of downloading files that match pattern (#541)
### Fixes: - Use `get_file` instead of `get` method to download file in `fsspec` based connectors. `get` matches file name patterns and doesn't allow to fetch files that use some special characters (e.g. `[` or `]`)
1 parent 663254b commit 47e8d65

File tree

7 files changed

+268
-222
lines changed

7 files changed

+268
-222
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## 1.0.45
2+
3+
### Fixes
4+
5+
* **Fix downloading files that have special characters (like `[` or `]`) inside their names, when using `fsspec` based connectors**
6+
17
## 1.0.44
28

39
* **Improve DeltaTable ingestion process and reliability**
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"directory_structure": [
3-
"Why_is_the_sky_blue?.txt"
3+
"Why_is_the_sky_blue?.txt",
4+
"[test]?*.txt"
45
]
56
}

test/integration/connectors/expected_results/s3-specialchar/file_data/869bf15f-e840-51dc-a818-8d0b817817c9.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"date_created": "1731627148.0",
1717
"date_modified": "1731627148.0",
18-
"date_processed": "1734441638.470403",
18+
"date_processed": "1750862131.7378173",
1919
"permissions_data": null,
2020
"filesize_bytes": 14
2121
},
@@ -31,6 +31,6 @@
3131
"original_file_path": "utic-dev-tech-fixtures/special-characters/Why_is_the_sky_blue?.txt"
3232
},
3333
"reprocess": false,
34-
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxwyhhssj/Why_is_the_sky_blue?.txt",
34+
"local_download_path": "/tmp/tmpo4c0jypd/Why_is_the_sky_blue?.txt",
3535
"display_name": "utic-dev-tech-fixtures/special-characters/Why_is_the_sky_blue?.txt"
3636
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"identifier": "e2cb44ab-2f1a-5037-901d-a284371047bb",
3+
"connector_type": "s3",
4+
"source_identifiers": {
5+
"filename": "[test]?*.txt",
6+
"fullpath": "utic-dev-tech-fixtures/special-characters/[test]?*.txt",
7+
"rel_path": "[test]?*.txt"
8+
},
9+
"metadata": {
10+
"url": "s3://utic-dev-tech-fixtures/special-characters/[test]?*.txt",
11+
"version": "aa50658109a5296b6b0fda35084b78be",
12+
"record_locator": {
13+
"protocol": "s3",
14+
"remote_file_path": "s3://utic-dev-tech-fixtures/special-characters/"
15+
},
16+
"date_created": "1750861154.0",
17+
"date_modified": "1750861154.0",
18+
"date_processed": "1750862132.0641234",
19+
"permissions_data": null,
20+
"filesize_bytes": 47
21+
},
22+
"additional_metadata": {
23+
"Key": "utic-dev-tech-fixtures/special-characters/[test]?*.txt",
24+
"LastModified": "2025-06-25T14:19:14+00:00",
25+
"ETag": "\"aa50658109a5296b6b0fda35084b78be\"",
26+
"ChecksumAlgorithm": [
27+
"CRC64NVME"
28+
],
29+
"Size": 47,
30+
"StorageClass": "STANDARD",
31+
"type": "file",
32+
"size": 47,
33+
"name": "utic-dev-tech-fixtures/special-characters/[test]?*.txt",
34+
"original_file_path": "utic-dev-tech-fixtures/special-characters/[test]?*.txt"
35+
},
36+
"reprocess": false,
37+
"local_download_path": "/tmp/tmpo4c0jypd/[test]?*.txt",
38+
"display_name": "utic-dev-tech-fixtures/special-characters/[test]?*.txt"
39+
}

0 commit comments

Comments
 (0)