Skip to content

Commit 2339a81

Browse files
authored
Merge pull request #361 from reef-technologies/add_ls_wildcard
Add wildcard capabilities to Bucket.ls
2 parents f7c1626 + cdba79b commit 2339a81

File tree

7 files changed

+301
-32
lines changed

7 files changed

+301
-32
lines changed

.github/workflows/cd.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ on:
55
tags: 'v*' # push events to matching v*, i.e. v1.0, v20.15.10
66

77
env:
8-
PYTHON_DEFAULT_VERSION: "3.10"
8+
PYTHON_DEFAULT_VERSION: "3.11"
99

1010
jobs:
1111
deploy:
@@ -14,11 +14,11 @@ jobs:
1414
B2_PYPI_PASSWORD: ${{ secrets.B2_PYPI_PASSWORD }}
1515
runs-on: ubuntu-latest
1616
steps:
17-
- uses: actions/checkout@v2
17+
- uses: actions/checkout@v3
1818
with:
1919
fetch-depth: 0
2020
- name: Set up Python ${{ env.PYTHON_DEFAULT_VERSION }}
21-
uses: actions/setup-python@v2
21+
uses: actions/setup-python@v4
2222
with:
2323
python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
2424
- name: Display Python version

.github/workflows/ci.yml

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,21 @@ on:
77
branches: [master]
88

99
env:
10-
PYTHON_DEFAULT_VERSION: "3.10"
10+
PYTHON_DEFAULT_VERSION: "3.11"
1111
SKIP_COVERAGE_PYTHON_VERSION_PREFIX: "pypy"
1212

1313
jobs:
1414
lint:
1515
runs-on: ubuntu-latest
1616
steps:
17-
- uses: actions/checkout@v2
17+
- uses: actions/checkout@v3
1818
with:
1919
fetch-depth: 0
2020
- uses: codespell-project/actions-codespell@2391250ab05295bddd51e36a8c6295edb6343b0e
2121
with:
2222
ignore_words_list: datas
2323
- name: Set up Python ${{ env.PYTHON_DEFAULT_VERSION }}
24-
uses: actions/setup-python@v3
24+
uses: actions/setup-python@v4
2525
with:
2626
python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
2727
cache: "pip"
@@ -30,19 +30,20 @@ jobs:
3030
- name: Run linters
3131
run: nox -vs lint
3232
- name: Validate changelog
33-
if: ${{ ! startsWith(github.ref, 'refs/heads/dependabot/') }}
33+
# Library was designed to be used with pull requests only.
34+
if: ${{ github.event_name == 'pull_request' && ! startsWith(github.ref, 'refs/heads/dependabot/') }}
3435
uses: zattoo/changelog@v1
3536
with:
3637
token: ${{ github.token }}
3738
build:
3839
needs: lint
3940
runs-on: ubuntu-latest
4041
steps:
41-
- uses: actions/checkout@v2
42+
- uses: actions/checkout@v3
4243
with:
4344
fetch-depth: 0
4445
- name: Set up Python ${{ env.PYTHON_DEFAULT_VERSION }}
45-
uses: actions/setup-python@v3
46+
uses: actions/setup-python@v4
4647
with:
4748
python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
4849
cache: "pip"
@@ -57,13 +58,13 @@ jobs:
5758
B2_TEST_APPLICATION_KEY_ID: ${{ secrets.B2_TEST_APPLICATION_KEY_ID }}
5859
runs-on: ubuntu-latest
5960
steps:
60-
- uses: actions/checkout@v2
61+
- uses: actions/checkout@v3
6162
if: ${{ env.B2_TEST_APPLICATION_KEY != '' && env.B2_TEST_APPLICATION_KEY_ID != '' }} # TODO: skip this whole job instead
6263
with:
6364
fetch-depth: 0
6465
- name: Set up Python ${{ env.PYTHON_DEFAULT_VERSION }}
6566
if: ${{ env.B2_TEST_APPLICATION_KEY != '' && env.B2_TEST_APPLICATION_KEY_ID != '' }} # TODO: skip this whole job instead
66-
uses: actions/setup-python@v3
67+
uses: actions/setup-python@v4
6768
with:
6869
python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
6970
cache: "pip"
@@ -83,7 +84,7 @@ jobs:
8384
fail-fast: false
8485
matrix:
8586
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
86-
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11.0-beta.1", "pypy-3.7", "pypy-3.8"]
87+
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11.0", "pypy-3.7", "pypy-3.8"]
8788
exclude:
8889
- os: "macos-latest"
8990
python-version: "pypy-3.7"
@@ -94,11 +95,11 @@ jobs:
9495
- os: "windows-latest"
9596
python-version: "pypy-3.8"
9697
steps:
97-
- uses: actions/checkout@v2
98+
- uses: actions/checkout@v3
9899
with:
99100
fetch-depth: 0
100101
- name: Set up Python ${{ matrix.python-version }}
101-
uses: actions/setup-python@v3
102+
uses: actions/setup-python@v4
102103
with:
103104
python-version: ${{ matrix.python-version }}
104105
cache: "pip"
@@ -115,11 +116,11 @@ jobs:
115116
needs: build
116117
runs-on: ubuntu-latest
117118
steps:
118-
- uses: actions/checkout@v2
119+
- uses: actions/checkout@v3
119120
with:
120121
fetch-depth: 0
121122
- name: Set up Python ${{ env.PYTHON_DEFAULT_VERSION }}
122-
uses: actions/setup-python@v3
123+
uses: actions/setup-python@v4
123124
with:
124125
python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
125126
cache: "pip"

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
### Added
1010
* Authorizing a key for a single bucket ensures that this bucket is cached
11+
* `Bucket.ls` operation supports wildcard matching strings
1112

1213
### Infrastructure
1314
* Additional tests for listing files/versions
15+
* Ensured that changelog validation only happens on pull requests
16+
* Upgraded GitHub actions checkout to v3, python-setup to v4
1417

1518
## [1.18.0] - 2022-09-20
1619

b2sdk/bucket.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
#
99
######################################################################
1010

11+
import fnmatch
1112
import logging
13+
import pathlib
1214

1315
from typing import Optional, Tuple
1416

@@ -323,7 +325,8 @@ def ls(
323325
folder_to_list: str = '',
324326
latest_only: bool = True,
325327
recursive: bool = False,
326-
fetch_count: Optional[int] = 10000
328+
fetch_count: Optional[int] = 10000,
329+
with_wildcard: bool = False,
327330
):
328331
"""
329332
Pretend that folders exist and yields the information about the files in a folder.
@@ -339,21 +342,59 @@ def ls(
339342
:param folder_to_list: the name of the folder to list; must not start with "/".
340343
Empty string means top-level folder
341344
:param latest_only: when ``False`` returns info about all versions of a file,
342-
when ``True``, just returns info about the most recent versions
345+
when ``True``, just returns info about the most recent versions
343346
:param recursive: if ``True``, list folders recursively
344347
:param fetch_count: how many entries to return or ``None`` to use the default. Acceptable values: 1 - 10000
348+
:param with_wildcard: Accepts "*", "?", "[]" and "[!]" in folder_to_list, similarly to what shell does.
349+
As of 1.19.0 it can only be enabled when recursive is also enabled.
350+
Also, in this mode, folder_to_list is considered to be a filename or a pattern.
345351
:rtype: generator[tuple[b2sdk.v2.FileVersion, str]]
346352
:returns: generator of (file_version, folder_name) tuples
347353
348354
.. note::
349-
In case of `recursive=True`, folder_name is returned only for first file in the folder.
355+
In case of `recursive=True`, folder_name is not returned.
350356
"""
357+
# Ensure that recursive is enabled when with_wildcard is enabled.
358+
if with_wildcard and not recursive:
359+
raise ValueError('with_wildcard requires recursive to be turned on as well')
360+
351361
# Every file returned must have a name that starts with the
352362
# folder name and a "/".
353363
prefix = folder_to_list
354-
if prefix != '' and not prefix.endswith('/'):
364+
# In case of wildcards, we don't assume that this is folder that we're searching through.
365+
# It could be an exact file, e.g. 'a/b.txt' that we're trying to locate.
366+
if prefix != '' and not prefix.endswith('/') and not with_wildcard:
355367
prefix += '/'
356368

369+
# If we're running with wildcard-matching, we could get
370+
# a different prefix from it. We search for the first
371+
# occurrence of the special characters and fetch
372+
# parent path from that place.
373+
# Examples:
374+
# 'b/c/*.txt' –> 'b/c/'
375+
# '*.txt' –> ''
376+
# 'a/*/result.[ct]sv' –> 'a/'
377+
if with_wildcard:
378+
for wildcard_character in '*?[':
379+
try:
380+
starter_index = folder_to_list.index(wildcard_character)
381+
except ValueError:
382+
continue
383+
384+
# +1 to include the starter character. Using posix path to
385+
# ensure consistent behaviour on Windows (e.g. case sensitivity).
386+
path = pathlib.PurePosixPath(folder_to_list[:starter_index + 1])
387+
parent_path = str(path.parent)
388+
# Path considers dot to be the empty path.
389+
# There's no shorter path than that.
390+
if parent_path == '.':
391+
prefix = ''
392+
break
393+
# We could receive paths in different stage, e.g. 'a/*/result.[ct]sv' has two
394+
# possible parent paths: 'a/' and 'a/*/', with the first one being the correct one
395+
if len(parent_path) < len(prefix):
396+
prefix = parent_path
397+
357398
# Loop until all files in the named directory have been listed.
358399
# The starting point of the first list_file_names request is the
359400
# prefix we're looking for. The prefix ends with '/', which is
@@ -378,7 +419,13 @@ def ls(
378419
if not file_version.file_name.startswith(prefix):
379420
# We're past the files we care about
380421
return
422+
if with_wildcard and not fnmatch.fnmatchcase(
423+
file_version.file_name, folder_to_list
424+
):
425+
# File doesn't match our wildcard rules
426+
continue
381427
after_prefix = file_version.file_name[len(prefix):]
428+
# In case of wildcards, we don't care about folders at all, and it's recursive by default.
382429
if '/' not in after_prefix or recursive:
383430
# This is not a folder, so we'll print it out and
384431
# continue on.

b2sdk/v1/bucket.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,8 @@ def ls(
250250
folder_to_list: str = '',
251251
show_versions: bool = False,
252252
recursive: bool = False,
253-
fetch_count: Optional[int] = 10000
253+
fetch_count: Optional[int] = 10000,
254+
**kwargs
254255
):
255256
"""
256257
Pretend that folders exist and yields the information about the files in a folder.
@@ -273,9 +274,9 @@ def ls(
273274
:returns: generator of (file_version, folder_name) tuples
274275
275276
.. note::
276-
In case of `recursive=True`, folder_name is returned only for first file in the folder.
277+
In case of `recursive=True`, folder_name is not returned.
277278
"""
278-
return super().ls(folder_to_list, not show_versions, recursive, fetch_count)
279+
return super().ls(folder_to_list, not show_versions, recursive, fetch_count, **kwargs)
279280

280281

281282
def download_file_and_return_info_dict(

test/integration/base.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,13 @@
88
#
99
######################################################################
1010

11-
from typing import Optional
1211
import http.client
1312
import os
14-
import random
15-
import string
1613

1714
import pytest
1815

1916
from b2sdk.v2 import current_time_millis
20-
17+
from b2sdk.v2.exception import DuplicateBucketName
2118
from .bucket_cleaner import BucketCleaner
2219
from .helpers import GENERAL_BUCKET_NAME_PREFIX, BUCKET_NAME_LENGTH, BUCKET_CREATED_AT_MILLIS, bucket_name_part, authorize
2320

@@ -63,8 +60,32 @@ def write_zeros(self, file, number):
6360
written += line_len
6461

6562
def create_bucket(self):
66-
return self.b2_api.create_bucket(
67-
self.generate_bucket_name(),
68-
'allPublic',
69-
bucket_info={BUCKET_CREATED_AT_MILLIS: str(current_time_millis())}
70-
)
63+
bucket_name = self.generate_bucket_name()
64+
try:
65+
return self.b2_api.create_bucket(
66+
bucket_name,
67+
'allPublic',
68+
bucket_info={BUCKET_CREATED_AT_MILLIS: str(current_time_millis())}
69+
)
70+
except DuplicateBucketName:
71+
self._duplicated_bucket_name_debug_info(bucket_name)
72+
raise
73+
74+
def _duplicated_bucket_name_debug_info(self, bucket_name: str) -> None:
75+
# Trying to obtain as much information as possible about this bucket.
76+
print(' DUPLICATED BUCKET DEBUG START '.center(60, '='))
77+
bucket = self.b2_api.get_bucket_by_name(bucket_name)
78+
79+
print('Bucket metadata:')
80+
bucket_dict = bucket.as_dict()
81+
for info_key, info in bucket_dict.items():
82+
print('\t%s: "%s"' % (info_key, info))
83+
84+
print('All files (and their versions) inside the bucket:')
85+
ls_generator = bucket.ls(recursive=True, latest_only=False)
86+
for file_version, _directory in ls_generator:
87+
# as_dict() is bound to have more info than we can use,
88+
# but maybe some of it will cast some light on the issue.
89+
print('\t%s (%s)' % (file_version.file_name, file_version.as_dict()))
90+
91+
print(' DUPLICATED BUCKET DEBUG END '.center(60, '='))

0 commit comments

Comments
 (0)