Skip to content

Commit 5781566

Browse files
authored
Merge pull request #237 from KumarLabJax/add-purge-cli
add jabs-cli prune command; implement project metadata merging for jabs-init command
2 parents b337414 + 2febb2d commit 5781566

File tree

4 files changed

+186
-29
lines changed

4 files changed

+186
-29
lines changed

src/jabs/project/project.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,60 @@ def project_paths(self) -> ProjectPaths:
146146
"""get the project paths object for this project"""
147147
return self._paths
148148

149+
def get_derived_file_paths(self, video_name: str) -> list[Path]:
150+
"""Return a list of paths for files derived from a given video.
151+
152+
Includes:
153+
- all files under features/<video base name>/** (recursive)
154+
- all files under cache/convex_hulls/<video base name>/** (recursive)
155+
- cache/<video base name>_pose_est_v*_cache.h5
156+
- predictions/<video base name>.h5
157+
- annotations/<video base name>.json
158+
159+
Excludes:
160+
- video file
161+
- pose file
162+
163+
Args:
164+
video_name: File name (or key) of the video in this project.
165+
166+
Returns:
167+
List of pathlib.Path objects for all related files.
168+
"""
169+
paths: list[Path] = []
170+
base = Path(video_name).with_suffix("").name
171+
172+
# Feature files (recursive under features/<base>/)
173+
feature_root = self._paths.feature_dir / base
174+
if feature_root.exists():
175+
for p in feature_root.rglob("*"):
176+
if p.is_file():
177+
paths.append(p)
178+
179+
# Cached convex hulls (recursive under cache/convex_hulls/<base>/)
180+
if self._paths.cache_dir is not None:
181+
ch_root = self._paths.cache_dir / "convex_hulls" / base
182+
if ch_root.exists():
183+
for p in ch_root.rglob("*"):
184+
if p.is_file():
185+
paths.append(p)
186+
187+
# Cached pose files: cache/<base>_pose_est_v*_cache.h5
188+
for p in self._paths.cache_dir.glob(f"{base}_pose_est_v*_cache.h5"):
189+
paths.append(p)
190+
191+
# Predictions file: predictions/<base>.h5
192+
prediction = self._paths.prediction_dir / f"{base}.h5"
193+
if prediction.exists():
194+
paths.append(prediction)
195+
196+
# Annotation file
197+
annotation = self._paths.annotations_dir / f"{base}.json"
198+
if annotation.exists():
199+
paths.append(annotation)
200+
201+
return paths
202+
149203
@property
150204
def labeler(self) -> str | None:
151205
"""return name of labeler

src/jabs/project/settings_manager.py

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -97,49 +97,73 @@ def video_metadata(self, video: str) -> dict:
9797
"""
9898
return self._project_info["video_files"][video].get("metadata", {})
9999

100-
def set_project_metadata(self, metadata: dict) -> None:
101-
"""Set or replace project and per-video metadata.
100+
def remove_video_from_project_file(self, video_name: str, sync=True) -> None:
101+
"""Remove a video entry from the project if it exists.
102102
103-
Removes any existing project-level and per-video metadata, then sets new metadata as provided.
104-
Only the "metadata" field is updated for each video; other fields are preserved.
103+
Note: does not remove any associated video or data files from disk, this only
104+
removes the entry from the project.json file.
105105
106106
Args:
107-
metadata (dict): Dictionary containing new project-level metadata under the "project" key,
108-
and per-video metadata under the "videos" key. Example:
107+
video_name: Name of the video file to remove.
108+
sync: If True, save the project file after removal to sync the on-disk copy. Defaults to True.
109+
"""
110+
video_files = self._project_info.get("video_files", {})
111+
video_files.pop(video_name, None)
112+
113+
if sync:
114+
self.save_project_file()
115+
116+
def set_project_metadata(self, metadata: dict, replace: bool = False) -> None:
117+
"""Set or merge project and per-video metadata.
118+
119+
By default, existing metadata is merged: new fields are added and existing fields
120+
are updated. If clear_existing=True, all existing metadata is cleared first.
121+
122+
Args:
123+
metadata (dict): Dictionary containing new project-level metadata under the
124+
"project" key, and per-video metadata under the "videos" key. Example:
109125
{
110126
"project": {...},
111127
"videos": {
112128
"video1": {...},
113129
...
114130
}
115131
}
116-
117-
See src/jabs/schema/metadata.py for the expected structure of the metadata.
132+
replace (bool): If true, replace existing metadata instead of merge.
133+
Defaults to False.
118134
119135
Raises:
120136
KeyError: If metadata for a video is provided for a video not present in the project.
121-
"""
122-
# Remove existing project-level metadata
123-
self._project_info.pop("metadata", None)
124137
125-
# Remove existing metadata from each video
138+
Note:
139+
See src/jabs/schema/metadata.py for metadata schema.
140+
"""
126141
video_files = self._project_info.get("video_files", {})
127-
for video_entry in video_files.values():
128-
video_entry.pop("metadata", None)
129142

130-
# Set new project-level metadata if provided
143+
if replace:
144+
# Remove all existing metadata
145+
self._project_info.pop("metadata", None)
146+
for video_entry in video_files.values():
147+
video_entry.pop("metadata", None)
148+
149+
# Merge or replace project-level metadata
131150
if "project" in metadata:
132-
self._project_info["metadata"] = metadata["project"]
151+
existing_project_meta = self._project_info.get("metadata", {}) if not replace else {}
152+
merged_project_meta = dict(existing_project_meta)
153+
merged_project_meta.update(metadata["project"])
154+
self._project_info["metadata"] = merged_project_meta
133155

134-
# Update per-video metadata
156+
# Merge or replace per-video metadata
135157
for video_name, video_metadata in metadata.get("videos", {}).items():
136-
# get the existing video entry, raise KeyError if not found
158+
if video_name not in video_files:
159+
raise KeyError(f"Video '{video_name}' not found in project.")
160+
137161
video_entry = video_files[video_name]
138-
if video_metadata:
139-
video_entry["metadata"] = video_metadata
140-
video_files[video_name] = video_entry
162+
existing_video_meta = video_entry.get("metadata", {}) if not replace else {}
163+
merged_video_meta = dict(existing_video_meta)
164+
merged_video_meta.update(video_metadata)
165+
video_entry["metadata"] = merged_video_meta
141166

142-
# Save changes
143167
self.save_project_file()
144168

145169
def save_behavior(self, behavior: str, data: dict):

src/jabs/scripts/cli.py

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from rich.console import Console
1414

1515
from jabs.classifier import Classifier
16-
from jabs.project import Project, export_training_data
16+
from jabs.project import Project, export_training_data, get_videos_to_prune
1717
from jabs.types import ClassifierType
1818

1919
# find out which classifiers are supported in this environment
@@ -156,6 +156,82 @@ def rename_behavior(ctx, directory: Path, old_name: str, new_name: str) -> None:
156156
jabs_project.rename_behavior(old_name, new_name)
157157

158158

159+
@cli.command(name="prune")
160+
@click.argument(
161+
"directory",
162+
type=click.Path(
163+
exists=True,
164+
file_okay=False,
165+
dir_okay=True,
166+
path_type=Path,
167+
),
168+
)
169+
@click.option(
170+
"--behavior",
171+
type=str,
172+
default=None,
173+
help="Filter by behavior name. If provided, only videos labeled for this behavior will be retained; otherwise, all videos with any labeled behavior are kept.",
174+
)
175+
@click.pass_context
176+
def prune(ctx, directory: Path, behavior: str | None):
177+
"""Prune unused videos from a JABS project directory."""
178+
if not Project.is_valid_project_directory(directory):
179+
raise click.ClickException(f"Invalid JABS project directory: {directory}")
180+
181+
project = Project(directory)
182+
videos_to_prune = get_videos_to_prune(project, behavior)
183+
184+
if not videos_to_prune:
185+
click.echo("No videos to prune.")
186+
return
187+
188+
click.echo(
189+
f"Found {len(videos_to_prune)} videos to prune out of {len(project.video_manager.videos)} total videos."
190+
)
191+
click.echo("The following videos will be removed:")
192+
for video_path in videos_to_prune:
193+
click.echo(f" - {video_path.video_path.name}")
194+
195+
confirm = click.confirm("Do you want to proceed with pruning these videos?", default=False)
196+
197+
if not confirm:
198+
click.echo("Pruning cancelled.")
199+
return
200+
201+
for video_paths in videos_to_prune:
202+
# get related files that also need to be cleaned up
203+
derived_files = project.get_derived_file_paths(video_paths.video_path.name)
204+
205+
# Remove files, ignore file not found errors
206+
try:
207+
video_paths.video_path.unlink()
208+
except FileNotFoundError:
209+
pass
210+
except Exception as e:
211+
click.echo(f"Warning: failed to delete video or pose file: {e}")
212+
213+
try:
214+
video_paths.pose_path.unlink()
215+
except FileNotFoundError:
216+
pass
217+
except Exception as e:
218+
click.echo(f"Warning: failed to delete video or pose file: {e}")
219+
220+
for file in derived_files:
221+
try:
222+
file.unlink()
223+
except FileNotFoundError:
224+
continue
225+
except Exception as e:
226+
click.echo(f"Warning: failed to delete derived file {file}: {e}")
227+
228+
# remove from the project.json file
229+
project.settings_manager.remove_video_from_project_file(
230+
video_paths.video_path.name, sync=False
231+
)
232+
project.settings_manager.save_project_file()
233+
234+
159235
def main():
160236
"""Entry point for the JABS CLI."""
161237
cli(obj={})

src/jabs/scripts/initialize_project.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ def main():
227227
distance_unit = project.feature_manager.distance_unit
228228
if metadata:
229229
has_metadata = False
230+
replace = True
230231

231232
if project.settings_manager.project_metadata != {}:
232233
has_metadata = True
@@ -236,14 +237,16 @@ def main():
236237
has_metadata = True
237238
break
238239

239-
if has_metadata and not args.force:
240+
if has_metadata:
240241
response = (
241-
input("Warning: Project already has metadata. Overwrite? [y/N]: ").strip().lower()
242+
input(
243+
"Metadata already exists. Apply new metadata by [M]erge (default) or [R]eplace (clear existing)? [M/r]: "
244+
)
245+
.strip()
246+
.lower()
242247
)
243-
if response != "y":
244-
print("Aborting. Use --force to overwrite without prompt.")
245-
sys.exit(1)
246-
project.settings_manager.set_project_metadata(metadata)
248+
replace = response == "r"
249+
project.settings_manager.set_project_metadata(metadata, replace=replace)
247250

248251
# iterate over each video and try to pair it with an h5 file
249252
# this test is quick, don't bother to parallelize

0 commit comments

Comments
 (0)