Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions fsspec/implementations/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,66 @@ def _open(
cache_options=cache_options,
**kwargs,
)

def rm(self, path, recursive=False, maxdepth=None):
path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
for p in reversed(path):
self.rm_file(p)

def rm_file(self, path, message=None, **kwargs):
"""
Remove a file from a specified branch using a given commit message.

Since Github DELETE operation requires a branch name, and we can't reliably
determine whether the provided SHA refers to a branch, tag, or commit, we
assume it's a branch. If it's not, the user will encounter an error when
attempting to retrieve the file SHA or delete the file.

Parameters
----------
path: str
The file's location relative to the repository root.
message: str, optional
The commit message for the deletion.
"""

if not self.username:
raise ValueError("Authentication required")

path = self._strip_protocol(path)

# Attempt to get SHA from cache or Github API
sha = self._get_sha_from_cache(path)
if not sha:
url = self.content_url.format(
org=self.org, repo=self.repo, path=path.lstrip("/"), sha=self.root
)
r = requests.get(url, timeout=self.timeout, **self.kw)
if r.status_code == 404:
raise FileNotFoundError(path)
r.raise_for_status()
sha = r.json()["sha"]

# Delete the file
delete_url = self.content_url.format(
org=self.org, repo=self.repo, path=path, sha=self.root
)
branch = self.root
data = {
"message": message or f"Delete {path}",
"sha": sha,
**({"branch": branch} if branch else {}),
}

r = requests.delete(delete_url, json=data, timeout=self.timeout, **self.kw)
r.raise_for_status()

self.invalidate_cache(path)

def _get_sha_from_cache(self, path):
for entries in self.dircache.values():
for entry in entries:
entry_path = entry.get("name")
if entry_path and entry_path == path and "sha" in entry:
return entry["sha"]
return None
16 changes: 16 additions & 0 deletions fsspec/implementations/tests/test_github.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

import fsspec


Expand Down Expand Up @@ -46,3 +48,17 @@ def test_github_ls():
expected = {"brain_networks.csv", "mpg.csv", "penguins.csv", "README.md", "raw"}
# check if the result is a subset of the expected files
assert expected.issubset(ls_result)


def test_github_rm():
# trying to remove a file without passing authentication should raise ValueError
fs = fsspec.filesystem("github", org="mwaskom", repo="seaborn-data")
with pytest.raises(ValueError):
fs.rm("mpg.csv")

# trying to remove a file which doesn't exist should raise FineNotFoundError
fs = fsspec.filesystem(
"github", org="mwaskom", repo="seaborn-data", username="user", token="token"
)
with pytest.raises(FileNotFoundError):
fs.rm("/this-file-doesnt-exist")
Loading