Skip to content

Commit f284a1e

Browse files
Merge branch 'main' of github.com:snakemake/snakemake-storage-plugin-gs
2 parents bb92fdb + 27c80dc commit f284a1e

File tree

5 files changed

+95
-191
lines changed

5 files changed

+95
-191
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ jobs:
9494
poetry run python tests/test_fake_gcs.py
9595
9696
- name: Run pytest
97-
run: poetry run coverage run -m pytest -v tests/tests.py
97+
run: poetry run coverage run -m pytest -vv -s tests/tests.py
9898

9999
- name: Run Coverage
100100
run: poetry run coverage report -m

.gitignore

Lines changed: 8 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -1,163 +1,18 @@
1-
poetry.lock
2-
# Byte-compiled / optimized / DLL files
3-
__pycache__/
4-
*.py[cod]
5-
*$py.class
6-
7-
# C extensions
8-
*.so
9-
10-
# Distribution / packaging
11-
.Python
12-
build/
13-
develop-eggs/
14-
dist/
15-
downloads/
16-
eggs/
17-
.eggs/
18-
lib/
19-
lib64/
20-
parts/
21-
sdist/
22-
var/
23-
wheels/
24-
share/python-wheels/
25-
*.egg-info/
26-
.installed.cfg
27-
*.egg
28-
MANIFEST
1+
# coverage
292

30-
# PyInstaller
31-
# Usually these files are written by a python script from a template
32-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33-
*.manifest
34-
*.spec
35-
36-
# Installer logs
37-
pip-log.txt
38-
pip-delete-this-directory.txt
39-
40-
# Unit test / coverage reports
41-
htmlcov/
42-
.tox/
43-
.nox/
443
.coverage
45-
.coverage.*
46-
.cache
47-
nosetests.xml
48-
coverage.xml
49-
*.cover
50-
*.py,cover
51-
.hypothesis/
52-
.pytest_cache/
53-
cover/
54-
55-
# Translations
56-
*.mo
57-
*.pot
58-
59-
# Django stuff:
60-
*.log
61-
local_settings.py
62-
db.sqlite3
63-
db.sqlite3-journal
64-
65-
# Flask stuff:
66-
instance/
67-
.webassets-cache
68-
69-
# Scrapy stuff:
70-
.scrapy
71-
72-
# Sphinx documentation
73-
docs/_build/
74-
75-
# PyBuilder
76-
.pybuilder/
77-
target/
78-
79-
# Jupyter Notebook
80-
.ipynb_checkpoints
81-
82-
# IPython
83-
profile_default/
84-
ipython_config.py
85-
86-
# pyenv
87-
# For a library or package, you might want to ignore these files since the code is
88-
# intended to run in multiple environments; otherwise, check them in:
89-
# .python-version
90-
91-
# pipenv
92-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
94-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
95-
# install all needed dependencies.
96-
#Pipfile.lock
97-
98-
# poetry
99-
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100-
# This is especially recommended for binary packages to ensure reproducibility, and is more
101-
# commonly ignored for libraries.
102-
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103-
#poetry.lock
104-
105-
# pdm
106-
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107-
#pdm.lock
108-
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109-
# in version control.
110-
# https://pdm.fming.dev/#use-with-ide
111-
.pdm.toml
112-
113-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114-
__pypackages__/
115-
116-
# Celery stuff
117-
celerybeat-schedule
118-
celerybeat.pid
119-
120-
# SageMath parsed files
121-
*.sage.py
122-
123-
# Environments
124-
.env
125-
.venv
126-
env/
127-
venv/
128-
ENV/
129-
env.bak/
130-
venv.bak/
131-
132-
# Spyder project settings
133-
.spyderproject
134-
.spyproject
135-
136-
# Rope project settings
137-
.ropeproject
1384

139-
# mkdocs documentation
140-
/site
5+
# .pytest_cache
1416

142-
# mypy
143-
.mypy_cache/
144-
.dmypy.json
145-
dmypy.json
7+
pytest_cache
1468

147-
# Pyre type checker
148-
.pyre/
1499

150-
# pytype static type analyzer
151-
.pytype/
10+
# __pycache__ and any .pyc files
15211

153-
# Cython debug symbols
154-
cython_debug/
12+
__pycache__
13+
*.pyc
15514

156-
# PyCharm
157-
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158-
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159-
# and can be added to the global gitignore or merged into this file. For a more nuclear
160-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
161-
#.idea/
15+
**/__pycache__/
16+
**/*.pyc
16217

16318
poetry.lock

snakemake_storage_plugin_gcs/__init__.py

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121
get_constant_prefix,
2222
Mtime,
2323
)
24+
from snakemake_interface_common.logging import get_logger
25+
2426
from urllib.parse import urlparse
2527
import base64
2628
import os
27-
29+
from pathlib import Path
2830
import google.cloud.exceptions
2931
from google.cloud import storage
3032
from google.api_core import retry
@@ -266,6 +268,7 @@ def __post_init__(self):
266268
self.key = parsed.path.lstrip("/")
267269
self._local_suffix = self._local_suffix_from_key(self.key)
268270
self._is_dir = None
271+
self.logger = get_logger()
269272

270273
def cleanup(self):
271274
# Close any open connections, unmount stuff, etc.
@@ -288,8 +291,8 @@ async def inventory(self, cache: IOCacheStorageInterface):
288291
- cache.size
289292
"""
290293
if self.get_inventory_parent() in cache.exists_in_storage:
291-
# bucket has been inventorized before, stop here
292-
return
294+
# bucket has been inventorized before, stop here
295+
return
293296

294297
# check if bucket exists
295298
if not self.bucket.exists():
@@ -381,29 +384,20 @@ def store_object(self):
381384
382385
TODO: note from vsoch - I'm not sure I read this function name right,
383386
but I didn't find an equivalent "upload" function so I thought this might
384-
be it. The original function comment is below.
387+
be it. The original function comment is below.
385388
"""
386389
# Ensure that the object is stored at the location specified by
387390
# self.local_path().
388391
try:
389-
if not self.bucket.exists():
390-
self.client.create_bucket(self.bucket)
392+
self.ensure_bucket_exists()
391393

392394
# Distinguish between single file, and folder
393-
f = self.local_path()
394-
if os.path.isdir(f):
395-
# Ensure the "directory" exists
396-
self.blob.upload_from_string(
397-
"", content_type="application/x-www-form-urlencoded;charset=UTF-8"
398-
)
399-
for root, _, files in os.walk(f):
400-
for filename in files:
401-
filename = os.path.join(root, filename)
402-
bucket_path = filename.lstrip(self.bucket.name).lstrip("/")
403-
blob = self.bucket.blob(bucket_path)
404-
blob.upload_from_filename(filename)
395+
local_object = self.local_path()
396+
if os.path.isdir(local_object):
397+
self.upload_directory(local_directory_path=local_object)
405398
else:
406-
self.blob.upload_from_filename(f)
399+
self.blob.upload_from_filename(local_object)
400+
407401
except google.cloud.exceptions.Forbidden as e:
408402
raise WorkflowError(
409403
e,
@@ -413,13 +407,57 @@ def store_object(self):
413407
"--scopes (see Snakemake documentation).",
414408
)
415409

410+
def ensure_bucket_exists(self) -> None:
411+
"""
412+
Check that the bucket exists, if not create it.
413+
"""
414+
if not self.bucket.exists():
415+
self.client.create_bucket(self.bucket)
416+
417+
def upload_directory(self, local_directory_path: Path):
418+
"""
419+
Upload a directory to the storage.
420+
"""
421+
self.ensure_bucket_exists()
422+
423+
# if the local directory is empty, we need to create a blob
424+
# with no content to represent the directory
425+
if not os.listdir(local_directory_path):
426+
self.blob.upload_from_string(
427+
"", content_type="application/x-www-form-urlencoded;charset=UTF-8"
428+
)
429+
430+
for root, _, files in os.walk(local_directory_path):
431+
for filename in files:
432+
relative_filepath = os.path.join(root, filename)
433+
local_prefix = self.provider.local_prefix.as_posix()
434+
435+
# remove the prefix ("".snakemake/storage/gcs/{bucket_name}/)
436+
# this gives us the path to the file relative to the bucket
437+
bucket_file_path = (
438+
relative_filepath.removeprefix(local_prefix)
439+
.lstrip("/")
440+
.removeprefix(self.bucket_name)
441+
.lstrip("/")
442+
)
443+
444+
blob = self.bucket.blob(bucket_file_path)
445+
blob.upload_from_filename(relative_filepath)
446+
416447
@retry.Retry(predicate=google_cloud_retry_predicate)
417-
def remove(self):
448+
def remove(self) -> None:
418449
"""
419450
Remove the object from the storage.
420451
"""
421-
# This was a total guess lol
422-
self.blob.delete()
452+
if self.is_directory():
453+
prefix = self.key
454+
if not prefix.endswith("/"):
455+
prefix += "/"
456+
blobs = self.client.list_blobs(self.bucket_name, prefix=prefix)
457+
for blob in blobs:
458+
blob.delete()
459+
else:
460+
self.blob.delete()
423461

424462
# The following to methods are only required if the class inherits from
425463
# StorageObjectGlob.

tests/test_fake_gcs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
# Create a new Bucket
4040
bucket = client.bucket("snakemake-test-bucket")
41+
4142
try:
4243
client.create_bucket(bucket)
4344
except Conflict:
@@ -54,6 +55,5 @@
5455
blob = bucket.blob(file_name)
5556
blob.upload_from_string(contents)
5657

57-
5858
assert not bucket.blob("foo").exists()
5959
print(list(bucket.list_blobs()))

0 commit comments

Comments
 (0)