@@ -45,83 +45,83 @@ def get_parts(path_string: str):
4545def has_wildcards (path : str ) -> bool :
4646 """
4747 Check if a path contains wildcard characters.
48-
48+
4949 Args:
5050 path: Path string to check
51-
51+
5252 Returns:
5353 True if path contains wildcards (*, ?, [])
5454 """
55- return any (char in path for char in ['*' , '?' , '[' ])
55+ return any (char in path for char in ["*" , "?" , "[" ])
5656
5757
5858def split_wildcard_path (path : str ):
5959 """
6060 Split a path with wildcards into a non-wildcard prefix and wildcard pattern.
61-
61+
6262 For cloud storage, we need to list blobs with a prefix, then filter by pattern.
6363 This function finds the longest non-wildcard prefix for listing.
64-
64+
6565 Args:
6666 path: Path with potential wildcards (e.g., "bucket/path/subdir/*.parquet")
67-
67+
6868 Returns:
6969 tuple: (prefix, pattern) where:
7070 - prefix: Non-wildcard prefix for listing (e.g., "bucket/path/subdir/")
7171 - pattern: Full path with wildcards for matching (e.g., "bucket/path/subdir/*.parquet")
72-
72+
7373 Examples:
7474 >>> split_wildcard_path("bucket/path/*.parquet")
7575 ('bucket/path/', 'bucket/path/*.parquet')
76-
76+
7777 >>> split_wildcard_path("bucket/path/file[0-9].parquet")
7878 ('bucket/path/', 'bucket/path/file[0-9].parquet')
79-
79+
8080 >>> split_wildcard_path("bucket/*/data.parquet")
8181 ('bucket/', 'bucket/*/data.parquet')
8282 """
8383 if not has_wildcards (path ):
8484 return path , path
85-
85+
8686 # Find the first wildcard character
8787 wildcard_pos = len (path )
88- for char in ['*' , '?' , '[' ]:
88+ for char in ["*" , "?" , "[" ]:
8989 pos = path .find (char )
9090 if pos != - 1 and pos < wildcard_pos :
9191 wildcard_pos = pos
92-
92+
9393 # Find the last path separator before the wildcard
9494 prefix = path [:wildcard_pos ]
9595 last_sep = prefix .rfind (OS_SEP )
96-
96+
9797 if last_sep != - 1 :
9898 # Include the separator in the prefix
99- prefix = path [:last_sep + 1 ]
99+ prefix = path [: last_sep + 1 ]
100100 else :
101101 # No separator before wildcard, prefix is empty or bucket name
102102 prefix = ""
103-
103+
104104 return prefix , path
105105
106106
107107def match_wildcard (pattern : str , path : str ) -> bool :
108108 """
109109 Match a path against a wildcard pattern using glob-like semantics.
110-
110+
111111 Unlike fnmatch, this function treats path separators specially:
112112 - '*' matches any characters EXCEPT path separators
113113 - '?' matches any single character EXCEPT path separators
114114 - Use '**' to match across directory boundaries (not yet supported)
115-
115+
116116 This ensures consistent behavior with glob.glob() used for local files.
117-
117+
118118 Args:
119119 pattern: Pattern with wildcards (e.g., "bucket/path/*.parquet")
120120 path: Path to match (e.g., "bucket/path/file1.parquet")
121-
121+
122122 Returns:
123123 True if path matches pattern
124-
124+
125125 Examples:
126126 >>> match_wildcard("bucket/path/*.parquet", "bucket/path/file.parquet")
127127 True
@@ -131,14 +131,14 @@ def match_wildcard(pattern: str, path: str) -> bool:
131131 # Split pattern and path into parts using OS path separator for cross-platform compatibility
132132 pattern_parts = pattern .split (OS_SEP )
133133 path_parts = path .split (OS_SEP )
134-
134+
135135 # Must have same number of path parts for a match (wildcards don't cross directory boundaries)
136136 if len (pattern_parts ) != len (path_parts ):
137137 return False
138-
138+
139139 # Match each part using fnmatch
140140 for pattern_part , path_part in zip (pattern_parts , path_parts ):
141141 if not fnmatch .fnmatch (path_part , pattern_part ):
142142 return False
143-
143+
144144 return True
0 commit comments