Skip to content

Add support for rm_file in GitHubFileSystem implementation #1839

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 7, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions fsspec/implementations/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,63 @@ def _open(
cache_options=cache_options,
**kwargs,
)

def rm(self, path, **kwargs):
self.rm_file(path, **kwargs)

def rm_file(self, path, branch=None, message=None, **kwargs):
"""
Remove a file from a specified branch using a given commit message.

Parameters
----------
path : str
The file's location relative to the repository root.
branch : str, optional
The branch containing the file. Defaults to the repository's default branch if not provided.
message : str, optional
The commit message for the deletion.
"""

if not self.username:
raise ValueError("Authentication required")

path = self._strip_protocol(path)

# Attempt to get SHA from cache or Github API
sha = self._get_sha_from_cache(path)
if not sha:
url = self.content_url.format(
org=self.org, repo=self.repo, path=path.lstrip("/"), sha=self.root
)
r = requests.get(url, timeout=self.timeout, **self.kw)
if r.status_code == 404:
raise FileNotFoundError(path)
r.raise_for_status()
sha = r.json()["sha"]

# Delete the file
delete_url = self.content_url.format(
org=self.org, repo=self.repo, path=path, sha=self.root
)
data = {
"message": message or f"Delete {path}",
"sha": sha,
**({"branch": branch} if branch else {}),
}
r = requests.delete(delete_url, json=data, timeout=self.timeout, **self.kw)
r.raise_for_status()

self.invalidate_cache(path)

def _get_sha_from_cache(self, path):
sha = None
for entries in self.dircache.values():
for entry in entries:
entry_path = entry.get("name")
if entry_path and entry_path == path and "sha" in entry:
sha = entry["sha"]
break
if sha:
break
return sha
15 changes: 15 additions & 0 deletions fsspec/implementations/tests/test_github.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import fsspec
import pytest


def test_github_open_small_file():
Expand Down Expand Up @@ -46,3 +47,17 @@ def test_github_ls():
expected = {"brain_networks.csv", "mpg.csv", "penguins.csv", "README.md", "raw"}
# check if the result is a subset of the expected files
assert expected.issubset(ls_result)


def test_github_rm():
# trying to remove a file without passing authentication should raise ValueError
fs = fsspec.filesystem("github", org="mwaskom", repo="seaborn-data")
with pytest.raises(ValueError):
fs.rm("mpg.csv")

# trying to remove a file which doesn't exist should raise FineNotFoundError
fs = fsspec.filesystem(
"github", org="mwaskom", repo="seaborn-data", username="user", token="token"
)
with pytest.raises(FileNotFoundError):
fs.rm("/this-file-doesnt-exist", branch="master", message="Delete my file")
Loading