Skip to content

Commit eb2a401

Browse files
Add wildcard retrieve of files (#336)
* supporting wildcard matching * tweaking type hinting * removing extra line breaks * improving return for legibility * validation shell script changes * mypy fixes * further tweaks to type hinting Co-authored-by: Nick Miles <[email protected]> Co-authored-by: Igor Tavares <[email protected]>
1 parent 294d0d2 commit eb2a401

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

awswrangler/s3/_list.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Amazon S3 List Module (PRIVATE)."""
22

33
import datetime
4+
import fnmatch
45
import logging
56
from typing import Any, Dict, List, Optional, Sequence, Union
67

@@ -64,10 +65,12 @@ def _list_objects(
6465
last_modified_begin: Optional[datetime.datetime] = None,
6566
last_modified_end: Optional[datetime.datetime] = None,
6667
boto3_session: Optional[boto3.Session] = None,
68+
wildcard_character: str = "*",
6769
) -> List[str]:
70+
wildcard_prefix: str = path.split(wildcard_character)[0]
6871
bucket: str
6972
prefix: str
70-
bucket, prefix = _utils.parse_path(path=path)
73+
bucket, prefix = _utils.parse_path(path=wildcard_prefix)
7174
_suffix: Union[List[str], None] = [suffix] if isinstance(suffix, str) else suffix
7275
_ignore_suffix: Union[List[str], None] = [ignore_suffix] if isinstance(ignore_suffix, str) else ignore_suffix
7376
client_s3: boto3.client = _utils.client(service_name="s3", session=boto3_session)
@@ -102,6 +105,9 @@ def _list_objects(
102105
key = pfx["Prefix"]
103106
paths.append(f"s3://{bucket}/{key}")
104107

108+
if wildcard_character in path:
109+
paths = fnmatch.filter(paths, path)
110+
105111
return paths if _ignore_suffix is None else [p for p in paths if p.endswith(tuple(_ignore_suffix)) is False]
106112

107113

0 commit comments

Comments
 (0)