Skip to content

Implement compress #54

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions pyiron_snippets/files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import tarfile
from pathlib import Path
from typing import cast


def delete_files_and_directories_recursively(path):
Expand Down Expand Up @@ -44,11 +46,12 @@ def __init__(
self, directory: str | Path | DirectoryObject, protected: bool = False
):
if isinstance(directory, str):
self.path = Path(directory)
path = Path(directory)
elif isinstance(directory, Path):
self.path = directory
path = directory
elif isinstance(directory, DirectoryObject):
self.path = directory.path
path = directory.path
self.path: Path = path
self.create()
self._protected = protected

Expand Down Expand Up @@ -97,3 +100,35 @@ def remove_files(self, *files: str):
path = self.get_path(file)
if path.is_file():
path.unlink()

def compress(self, exclude_files: list[str | Path] | None = None):
Copy link
Preview

Copilot AI Aug 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The compress method lacks docstring documentation. It should document the purpose, parameters (especially the exclude_files behavior), return value, and the silent behavior when target file exists.

Suggested change
def compress(self, exclude_files: list[str | Path] | None = None):
def compress(self, exclude_files: list[str | Path] | None = None):
"""
Compress the directory into a `.tar.gz` archive, optionally excluding specified files.
Parameters
----------
exclude_files : list of str or pathlib.Path, optional
A list of file paths (relative to the directory or absolute) to exclude from the archive.
Each path is resolved relative to the directory if not absolute. Only files matching
the resolved paths are excluded; directories are not excluded recursively.
Returns
-------
None
Notes
-----
- If the target archive file (`<directory>.tar.gz`) already exists, the method does nothing and returns silently.
- Only files are added to the archive; directories themselves are not stored.
"""

Copilot uses AI. Check for mistakes.

directory = self.path.resolve()
output_tar_path = directory.with_suffix(".tar.gz")
if output_tar_path.exists():
return
Comment on lines +104 to +108
Copy link
Preview

Copilot AI Aug 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method silently returns without indication when the target file already exists. Consider raising an exception, returning a boolean status, or providing an overwrite parameter to make this behavior explicit to callers.

Suggested change
def compress(self, exclude_files: list[str | Path] | None = None):
directory = self.path.resolve()
output_tar_path = directory.with_suffix(".tar.gz")
if output_tar_path.exists():
return
def compress(self, exclude_files: list[str | Path] | None = None, overwrite: bool = False):
directory = self.path.resolve()
output_tar_path = directory.with_suffix(".tar.gz")
if output_tar_path.exists():
if not overwrite:
raise FileExistsError(f"Archive '{output_tar_path}' already exists. Set overwrite=True to overwrite.")
else:
output_tar_path.unlink()

Copilot uses AI. Check for mistakes.

if exclude_files is None:
exclude_files = []
else:
exclude_files = [Path(f) for f in exclude_files]
exclude_set = {
f.resolve() if f.is_absolute() else (directory / f).resolve()
for f in cast(list[Path], exclude_files)
}
files_to_delete = []
with tarfile.open(output_tar_path, "w:gz") as tar:
for file in directory.rglob("*"):
if file.is_file() and file.resolve() not in exclude_set:
arcname = file.relative_to(directory)
tar.add(file, arcname=arcname)
files_to_delete.append(file)
for file in files_to_delete:
file.unlink()

def decompress(self):
directory = self.path.resolve()
tar_path = directory.with_suffix(".tar.gz")
if not tar_path.exists():
return
with tarfile.open(tar_path, "r:gz") as tar:
tar.extractall(path=directory, filter="fully_trusted")
tar_path.unlink()
37 changes: 37 additions & 0 deletions tests/unit/test_files.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pickle
import tarfile
import unittest
from pathlib import Path

Expand Down Expand Up @@ -92,6 +93,42 @@ def test_remove(self):
msg="Should be able to remove just one file",
)

def test_compress(self):
while Path("test.tar.gz").exists():
Path("test.tar.gz").unlink()
self.directory.write(file_name="test1.txt", content="something")
self.directory.write(file_name="test2.txt", content="something")
self.directory.compress(exclude_files=["test1.txt"])
self.assertTrue(Path("test.tar.gz").exists())
with tarfile.open("test.tar.gz", "r:*") as f:
content = [name for name in f.getnames()]
self.assertNotIn(
"test1.txt", content, msg="Excluded file should not be in archive"
)
self.assertIn(
"test2.txt", content, msg="Included file should be in archive"
)
self.assertFalse(
self.directory.file_exists("test2.txt"),
msg="Compressed files should not be in the directory",
)
self.assertTrue(
self.directory.file_exists("test1.txt"),
msg="Excluded file should still be in the directory",
)
# Test that compressing again does not raise an error
self.directory.compress()
self.assertTrue(Path("test.tar.gz").exists())
self.directory.decompress()
self.assertTrue(
self.directory.file_exists("test2.txt"),
msg="Decompressed files should be back in the directory",
)
self.assertFalse(
Path("test.tar.gz").exists(),
msg="Archive should be deleted after decompression",
)


if __name__ == "__main__":
unittest.main()
Loading