Skip to content

Commit b0c88ec

Browse files
authored
Add support for rm_file in GitHubFileSystem implementation (#1839)
1 parent cec29ba commit b0c88ec

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

fsspec/implementations/github.py

+68
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import base64
2+
import re
23

34
import requests
45

@@ -265,3 +266,70 @@ def _open(
265266
cache_options=cache_options,
266267
**kwargs,
267268
)
269+
270+
def rm(self, path, recursive=False, maxdepth=None, message=None):
271+
path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
272+
for p in reversed(path):
273+
self.rm_file(p, message=message)
274+
275+
def rm_file(self, path, message=None, **kwargs):
276+
"""
277+
Remove a file from a specified branch using a given commit message.
278+
279+
Since Github DELETE operation requires a branch name, and we can't reliably
280+
determine whether the provided SHA refers to a branch, tag, or commit, we
281+
assume it's a branch. If it's not, the user will encounter an error when
282+
attempting to retrieve the file SHA or delete the file.
283+
284+
Parameters
285+
----------
286+
path: str
287+
The file's location relative to the repository root.
288+
message: str, optional
289+
The commit message for the deletion.
290+
"""
291+
292+
if not self.username:
293+
raise ValueError("Authentication required")
294+
295+
path = self._strip_protocol(path)
296+
297+
# Attempt to get SHA from cache or Github API
298+
sha = self._get_sha_from_cache(path)
299+
if not sha:
300+
url = self.content_url.format(
301+
org=self.org, repo=self.repo, path=path.lstrip("/"), sha=self.root
302+
)
303+
r = requests.get(url, timeout=self.timeout, **self.kw)
304+
if r.status_code == 404:
305+
raise FileNotFoundError(path)
306+
r.raise_for_status()
307+
sha = r.json()["sha"]
308+
309+
# Delete the file
310+
delete_url = self.content_url.format(
311+
org=self.org, repo=self.repo, path=path, sha=self.root
312+
)
313+
branch = self.root
314+
data = {
315+
"message": message or f"Delete {path}",
316+
"sha": sha,
317+
**({"branch": branch} if branch else {}),
318+
}
319+
320+
r = requests.delete(delete_url, json=data, timeout=self.timeout, **self.kw)
321+
error_message = r.json().get("message", "")
322+
if re.search(r"Branch .+ not found", error_message):
323+
error = "Remove only works when the filesystem is initialised from a branch or default (None)"
324+
raise ValueError(error)
325+
r.raise_for_status()
326+
327+
self.invalidate_cache(path)
328+
329+
def _get_sha_from_cache(self, path):
330+
for entries in self.dircache.values():
331+
for entry in entries:
332+
entry_path = entry.get("name")
333+
if entry_path and entry_path == path and "sha" in entry:
334+
return entry["sha"]
335+
return None

fsspec/implementations/tests/test_github.py

+16
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pytest
2+
13
import fsspec
24

35

@@ -46,3 +48,17 @@ def test_github_ls():
4648
expected = {"brain_networks.csv", "mpg.csv", "penguins.csv", "README.md", "raw"}
4749
# check if the result is a subset of the expected files
4850
assert expected.issubset(ls_result)
51+
52+
53+
def test_github_rm():
54+
# trying to remove a file without passing authentication should raise ValueError
55+
fs = fsspec.filesystem("github", org="mwaskom", repo="seaborn-data")
56+
with pytest.raises(ValueError):
57+
fs.rm("mpg.csv")
58+
59+
# trying to remove a file which doesn't exist should raise FineNotFoundError
60+
fs = fsspec.filesystem(
61+
"github", org="mwaskom", repo="seaborn-data", username="user", token="token"
62+
)
63+
with pytest.raises(FileNotFoundError):
64+
fs.rm("/this-file-doesnt-exist")

0 commit comments

Comments
 (0)