From 500cd59af14fc62fa0cf51f2e16dc10fb469fec5 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Wed, 15 Oct 2025 23:33:38 -0400 Subject: [PATCH 01/14] initial implementation of model probability calibration --- src/jabs/classifier/classifier.py | 200 ++++++++++++++++++----- src/jabs/scripts/classify.py | 6 +- src/jabs/scripts/stats.py | 2 +- src/jabs/ui/central_widget.py | 2 +- src/jabs/ui/main_window.py | 14 ++ src/jabs/ui/settings_dialog.py | 262 ++++++++++++++++++++++++++++++ 6 files changed, 440 insertions(+), 46 deletions(-) create mode 100644 src/jabs/ui/settings_dialog.py diff --git a/src/jabs/classifier/classifier.py b/src/jabs/classifier/classifier.py index a38bfed3..df66ca01 100644 --- a/src/jabs/classifier/classifier.py +++ b/src/jabs/classifier/classifier.py @@ -8,6 +8,7 @@ import joblib import numpy as np import pandas as pd +from sklearn.calibration import CalibratedClassifierCV from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.exceptions import InconsistentVersionWarning from sklearn.metrics import ( @@ -21,7 +22,7 @@ from jabs.types import ClassifierType from jabs.utils import hash_file -_VERSION = 9 +_VERSION = 10 _classifier_choices = [ClassifierType.RANDOM_FOREST, ClassifierType.GRADIENT_BOOSTING] @@ -60,6 +61,7 @@ class Classifier: def __init__(self, classifier=ClassifierType.RANDOM_FOREST, n_jobs=1): self._classifier_type = classifier self._classifier = None + self._behavior_settings = None self._project_settings = None self._behavior = None self._feature_names = None @@ -141,10 +143,10 @@ def classifier_hash(self) -> str: return "NO HASH" @property - def project_settings(self) -> dict: - """return a copy of dictionary of project settings for this classifier""" - if self._project_settings is not None: - return dict(self._project_settings) + def behavior_settings(self) -> dict: + """return a copy of dictionary of behavior-specific settings for this classifier""" + if self._behavior_settings is not None: + return dict(self._behavior_settings) return {} @property @@ -361,19 +363,21 @@ def set_project_settings(self, project: Project): if no behavior is currently set will use project defaults """ if self._behavior is None: - self._project_settings = project.get_project_defaults() + self._behavior_settings = project.get_project_defaults() else: - self._project_settings = project.settings_manager.get_behavior(self._behavior) + self._behavior_settings = project.settings_manager.get_behavior(self._behavior) + + self._project_settings = project.settings_manager.project_settings.get("settings", {}) def set_dict_settings(self, settings: dict): - """assign project settings via a dict to the classifier + """assign behavior-specific settings via a dict to the classifier Args: settings: dict of project settings. Must be same structure as project.settings_manager.get_behavior TODO: Add checks to enforce conformity to project settings """ - self._project_settings = dict(settings) + self._behavior_settings = dict(settings) def classifier_choices(self): """get the available classifier types @@ -403,7 +407,7 @@ def train(self, data, random_seed: int | None = None): raises ValueError for having either unset project settings or an unset classifier """ - if self._project_settings is None: + if self._behavior_settings is None: raise ValueError("Project settings for classifier unset, cannot train classifier.") # Assume that feature names is provided, otherwise extract it from the dataframe @@ -416,32 +420,90 @@ def train(self, data, random_seed: int | None = None): features = data["training_data"] labels = data["training_labels"] # Symmetric augmentation should occur before balancing so that the class with more labels can sample from the whole set - if self._project_settings.get("symmetric_behavior", False): + if self._behavior_settings.get("symmetric_behavior", False): features, labels = self.augment_symmetric(features, labels) - if self._project_settings.get("balance_labels", False): + if self._behavior_settings.get("balance_labels", False): features, labels = self.downsample_balance(features, labels, random_seed) - if self._classifier_type == ClassifierType.RANDOM_FOREST: - self._classifier = self._fit_random_forest(features, labels, random_seed=random_seed) - elif self._classifier_type == ClassifierType.GRADIENT_BOOSTING: - self._classifier = self._fit_gradient_boost(features, labels, random_seed=random_seed) - elif _xgboost is not None and self._classifier_type == ClassifierType.XGBOOST: + # Optional probability calibration, this is currently set at the project level + calibrate = self._project_settings.get("calibrate_probabilities", False) + calibration_method = self._project_settings.get( + "calibration_method", "isotonic" + ) # or 'sigmoid' + calibration_cv = self._project_settings.get("calibration_cv", 3) + + if calibrate: + # Build an unfitted base estimator + if self._classifier_type == ClassifierType.RANDOM_FOREST: + base_estimator = self._make_random_forest(random_seed=random_seed) + elif self._classifier_type == ClassifierType.GRADIENT_BOOSTING: + base_estimator = self._make_gradient_boost(random_seed=random_seed) + elif _xgboost is not None and self._classifier_type == ClassifierType.XGBOOST: + base_estimator = self._make_xgboost(random_seed=random_seed) + else: + raise ValueError("Unsupported classifier") + + # Wrap with calibrated classifier and fit + self._classifier = CalibratedClassifierCV( + estimator=base_estimator, method=calibration_method, cv=calibration_cv + ) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - self._classifier = self._fit_xgboost(features, labels, random_seed=random_seed) + self._classifier.fit(self._clean_features_for_training(features), labels) else: - raise ValueError("Unsupported classifier") + # Fit without calibration (original behavior) + if self._classifier_type == ClassifierType.RANDOM_FOREST: + self._classifier = self._fit_random_forest( + features, labels, random_seed=random_seed + ) + elif self._classifier_type == ClassifierType.GRADIENT_BOOSTING: + self._classifier = self._fit_gradient_boost( + features, labels, random_seed=random_seed + ) + elif _xgboost is not None and self._classifier_type == ClassifierType.XGBOOST: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=FutureWarning) + self._classifier = self._fit_xgboost(features, labels, random_seed=random_seed) + else: + raise ValueError("Unsupported classifier") # Classifier may have been re-used from a prior training, blank the logging attributes self._classifier_file = None self._classifier_hash = None self._classifier_source = None + def _clean_features_for_training(self, features: pd.DataFrame): + """Clean feature matrix prior to fitting based on classifier type. + + For XGBoost, only replace +/- inf with 0 (XGBoost can handle NaN). + For sklearn tree models, also fill NaNs with 0. + """ + if self._classifier_type == ClassifierType.XGBOOST: + return features.replace([np.inf, -np.inf], 0) + return features.replace([np.inf, -np.inf], 0).fillna(0) + + def _make_random_forest(self, random_seed: int | None = None): + if random_seed is not None: + return RandomForestClassifier(n_jobs=self._n_jobs, random_state=random_seed) + return RandomForestClassifier(n_jobs=self._n_jobs) + + def _make_gradient_boost(self, random_seed: int | None = None): + if random_seed is not None: + return GradientBoostingClassifier(random_state=random_seed) + return GradientBoostingClassifier() + + def _make_xgboost(self, random_seed: int | None = None): + if random_seed is not None: + return _xgboost.XGBClassifier(n_jobs=self._n_jobs, random_state=random_seed) + return _xgboost.XGBClassifier(n_jobs=self._n_jobs) + def sort_features_to_classify(self, features): """sorts features to match the current classifier""" - if self._classifier_type == ClassifierType.XGBOOST: + if isinstance(self._classifier, CalibratedClassifierCV): + # Use the training-time feature order we stored + classifier_columns = self._feature_names + elif self._classifier_type == ClassifierType.XGBOOST: classifier_columns = self._classifier.get_booster().feature_names - # sklearn places feature names in the same spot else: classifier_columns = self._classifier.feature_names_in_ features_sorted = features[classifier_columns] @@ -518,6 +580,7 @@ def load(self, path: Path): self._classifier = c._classifier self._behavior = c._behavior + self._behavior_settings = c._behavior_settings self._project_settings = c._project_settings self._classifier_type = c._classifier_type if c._classifier_file is not None: @@ -568,27 +631,65 @@ def combine_data(per_frame, window): return pd.concat([per_frame, window], axis=1) def _fit_random_forest(self, features, labels, random_seed: int | None = None): - if random_seed is not None: - classifier = RandomForestClassifier(n_jobs=self._n_jobs, random_state=random_seed) - else: - classifier = RandomForestClassifier(n_jobs=self._n_jobs) + classifier = self._make_random_forest(random_seed=random_seed) return classifier.fit(features.replace([np.inf, -np.inf], 0).fillna(0), labels) def _fit_gradient_boost(self, features, labels, random_seed: int | None = None): - if random_seed is not None: - classifier = GradientBoostingClassifier(random_state=random_seed) - else: - classifier = GradientBoostingClassifier() + classifier = self._make_gradient_boost(random_seed=random_seed) return classifier.fit(features.replace([np.inf, -np.inf], 0).fillna(0), labels) def _fit_xgboost(self, features, labels, random_seed: int | None = None): - if random_seed is not None: - classifier = _xgboost.XGBClassifier(n_jobs=self._n_jobs, random_state=random_seed) - else: - classifier = _xgboost.XGBClassifier(n_jobs=self._n_jobs) + classifier = self._make_xgboost(random_seed=random_seed) classifier.fit(features.replace([np.inf, -np.inf]), labels) return classifier + def _get_estimator_with_feature_importances(self): + """Return the underlying estimator that exposes `feature_importances_`, if available. + + Handles calibrated classifiers by retrieving the estimator from the first + calibrated fold. Returns None if no estimator with `feature_importances_` is found. + """ + est = self._classifier + # If wrapped by CalibratedClassifierCV, peel off the estimator + if isinstance(est, CalibratedClassifierCV): + try: + cc0 = est.calibrated_classifiers_[0] + est = cc0.estimator + except Exception: + return None + # Some sklearn/xgboost estimators expose feature_importances_ + return est if hasattr(est, "feature_importances_") else None + + def get_calibrated_feature_importances(self): + """Return averaged feature importances across calibrated folds. + + For CalibratedClassifierCV with tree-based base estimators (RF/GBT/XGBoost), + this computes the mean and std of `feature_importances_` across + `calibrated_classifiers_` estimators and returns a list of tuples: + [(feature_name, mean_importance, std_importance), ...] sorted by mean desc. + + Returns None if unavailable (e.g., non-tree base estimators). + """ + est = self._classifier + if not isinstance(est, CalibratedClassifierCV): + return None + try: + base_ests = [cc.estimator for cc in est.calibrated_classifiers_] + except Exception: + return None + base_ests = [be for be in base_ests if hasattr(be, "feature_importances_")] + if not base_ests: + return None + importances = np.vstack([be.feature_importances_ for be in base_ests]) + mean_imp = importances.mean(axis=0) + std_imp = importances.std(axis=0) + names = self._feature_names or [f"feature_{i}" for i in range(mean_imp.shape[0])] + if len(names) != len(mean_imp): + names = [f"feature_{i}" for i in range(len(mean_imp))] + items = list(zip(names, mean_imp, std_imp, strict=True)) + items.sort(key=lambda t: t[1], reverse=True) + return items + def print_feature_importance(self, feature_list, limit=20): """print the most important features and their importance @@ -596,20 +697,37 @@ def print_feature_importance(self, feature_list, limit=20): feature_list: list of feature names used in the classifier limit: maximum number of features to print, defaults to 20 """ - # Get numerical feature importance - importances = list(self._classifier.feature_importances_) - # List of tuples with variable and importance + # Prefer calibrated importances if available + if isinstance(self._classifier, CalibratedClassifierCV): + items = self.get_calibrated_feature_importances() + if items is not None: + print(f"{'Feature Name':100} Mean Importance Std") + print("-" * 120) + for name, mean_imp, std_imp in items[:limit]: + print(f"{name:100} {mean_imp:0.4f} {std_imp:0.4f}") + return + # fall through to base-estimator single-source path if calibrated but no importances + + # Fallback: single estimator feature_importances_ + est = self._get_estimator_with_feature_importances() + if est is None: + print( + "Feature importances are unavailable for the current classifier (e.g., calibrated logistic/linear models)." + ) + return + importances = list(est.feature_importances_) + names = feature_list if feature_list is not None else (self._feature_names or []) + if len(importances) != len(names): + names = [f"feature_{i}" for i in range(len(importances))] feature_importance = [ - (feature, round(importance, 2)) - for feature, importance in zip(feature_list, importances, strict=True) + (feature, round(importance, 4)) + for feature, importance in zip(names, importances, strict=False) ] - # Sort the feature importance by most important first feature_importance = sorted(feature_importance, key=lambda x: x[1], reverse=True) - # Print out the feature and importance print(f"{'Feature Name':100} Importance") print("-" * 120) for feature, importance in feature_importance[:limit]: - print(f"{feature:100} {importance:0.2f}") + print(f"{feature:100} {importance:0.4f}") @staticmethod def count_label_threshold(all_counts: dict): diff --git a/src/jabs/scripts/classify.py b/src/jabs/scripts/classify.py index cfd38c1e..90478157 100755 --- a/src/jabs/scripts/classify.py +++ b/src/jabs/scripts/classify.py @@ -105,7 +105,7 @@ def classify_pose( prediction_labels = np.full((pose_est.num_identities, pose_est.num_frames), -1, dtype=np.int8) prediction_prob = np.zeros_like(prediction_labels, dtype=np.float32) - classifier_settings = classifier.project_settings + classifier_settings = classifier.behavior_settings print(f"Classifying {input_pose_file}...") @@ -188,7 +188,7 @@ def train(training_file: Path) -> Classifier: Classifier: The trained classifier instance. """ classifier = Classifier.from_training_file(training_file) - classifier_settings = classifier.project_settings + classifier_settings = classifier.behavior_settings print("Training classifier for:", classifier.behavior_name) print(f" Classifier Type: {__CLASSIFIER_CHOICES[classifier.classifier_type]}") @@ -315,7 +315,7 @@ def classify_main(): sys.exit(str(e)) behavior = classifier.behavior_name - classifier_settings = classifier.project_settings + classifier_settings = classifier.behavior_settings print(f"Classifying using trained classifier: {args.classifier}") try: diff --git a/src/jabs/scripts/stats.py b/src/jabs/scripts/stats.py index 2eda38ba..6d522e99 100644 --- a/src/jabs/scripts/stats.py +++ b/src/jabs/scripts/stats.py @@ -124,7 +124,7 @@ def main(): print(f"\nClassifier: {classifier.classifier_name}") print(f"Behavior: {features['behavior']}") unit = ( - "cm" if classifier.project_settings["cm_units"] == ProjectDistanceUnit.CM else "pixel" + "cm" if classifier.behavior_settings["cm_units"] == ProjectDistanceUnit.CM else "pixel" ) print(f"Feature Distance Unit: {unit}") print("-" * 70) diff --git a/src/jabs/ui/central_widget.py b/src/jabs/ui/central_widget.py index f1f0fc64..47856de5 100644 --- a/src/jabs/ui/central_widget.py +++ b/src/jabs/ui/central_widget.py @@ -655,7 +655,7 @@ def _update_classifier_controls(self) -> None: self._controls.set_classifier_selection(self._classifier.classifier_type) # does the classifier match the current settings? - classifier_settings = self._classifier.project_settings + classifier_settings = self._classifier.behavior_settings if ( classifier_settings is not None and classifier_settings.get("window_size", None) == self.window_size diff --git a/src/jabs/ui/main_window.py b/src/jabs/ui/main_window.py index e4863f8d..0eead518 100644 --- a/src/jabs/ui/main_window.py +++ b/src/jabs/ui/main_window.py @@ -20,6 +20,7 @@ from .progress_dialog import create_progress_dialog from .project_loader_thread import ProjectLoaderThread from .project_pruning_dialog import ProjectPruningDialog +from .settings_dialog import JabsSettingsDialog from .stacked_timeline_widget import StackedTimelineWidget from .user_guide_dialog import UserGuideDialog from .util import send_file_to_recycle_bin @@ -119,6 +120,13 @@ def __init__(self, app_name: str, app_name_long: str, *args, **kwargs) -> None: self._clear_cache.triggered.connect(self._clear_cache_action) app_menu.addAction(self._clear_cache) + # model calibration settings + self._settings_action = QtGui.QAction("JABS Settings", self) + self._settings_action.setStatusTip("Open settings dialog") + self._settings_action.setEnabled(False) + self._settings_action.triggered.connect(self._open_settings_dialog) + app_menu.addAction(self._settings_action) + # exit action exit_action = QtGui.QAction(f" &Quit {self._app_name}", self) exit_action.setShortcut(QtGui.QKeySequence("Ctrl+Q")) @@ -725,6 +733,7 @@ def _project_loaded_callback(self) -> None: self._project.feature_manager.can_use_segmentation_features ) self._clear_cache.setEnabled(True) + self._settings_action.setEnabled(True) available_objects = self._project.feature_manager.static_objects for static_object, menu_item in self.enable_landmark_features.items(): if static_object in available_objects: @@ -990,3 +999,8 @@ def _view_license(self) -> None: """View the license agreement (JABS->View License Agreement menu action)""" dialog = LicenseAgreementDialog(self, view_only=True) dialog.exec_() + + def _open_settings_dialog(self) -> None: + """Open the settings dialog (JABS->Settings menu action)""" + dialog = JabsSettingsDialog(parent=self, project_settings=self._project.settings_manager) + dialog.exec_() diff --git a/src/jabs/ui/settings_dialog.py b/src/jabs/ui/settings_dialog.py new file mode 100644 index 00000000..be5efaa8 --- /dev/null +++ b/src/jabs/ui/settings_dialog.py @@ -0,0 +1,262 @@ +from PySide6.QtCore import Qt, Signal +from PySide6.QtWidgets import ( + QAbstractScrollArea, + QCheckBox, + QComboBox, + QDialog, + QDialogButtonBox, + QFrame, + QGridLayout, + QGroupBox, + QLabel, + QLayout, + QScrollArea, + QSizePolicy, + QSpacerItem, + QSpinBox, + QToolButton, + QVBoxLayout, + QWidget, +) + +from jabs.project.settings_manager import SettingsManager + + +class CollapsibleSection(QWidget): + """A simple collapsible section with a header ToolButton and a content area.""" + + sizeChanged = Signal() + + def __init__(self, title: str, content: QWidget, parent: QWidget | None = None) -> None: + super().__init__(parent) + self._content = content + self._toggle_btn = QToolButton(self) + self._toggle_btn.setStyleSheet("QToolButton { border: none; }") + self._toggle_btn.setToolButtonStyle(Qt.ToolButtonStyle.ToolButtonTextBesideIcon) + self._toggle_btn.setArrowType(Qt.ArrowType.RightArrow) + self._toggle_btn.setText(title) + self._toggle_btn.setCheckable(True) + self._toggle_btn.setChecked(False) + self._toggle_btn.toggled.connect(self._on_toggled) + + line = QFrame(self) + line.setFrameShape(QFrame.Shape.HLine) + line.setFrameShadow(QFrame.Shadow.Sunken) + + self._content.setVisible(False) + # Ensure the collapsible widget and its content expand to fit content + self.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred) + self._content.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred) + + lay = QVBoxLayout(self) + lay.setContentsMargins(0, 0, 0, 0) + lay.addWidget(self._toggle_btn) + lay.addWidget(line) + lay.addWidget(self._content) + + def _on_toggled(self, checked: bool) -> None: + self._toggle_btn.setArrowType( + Qt.ArrowType.DownArrow if checked else Qt.ArrowType.RightArrow + ) + self._content.setVisible(checked) + self._content.updateGeometry() + # Ask ancestors to recompute layout so the page grows inside the scroll area + p = self.parentWidget() + if p is not None and p.layout() is not None: + p.layout().activate() + lg = self.layout() + if lg is not None: + lg.activate() + # Let ancestors recompute size hints and notify listeners + pw = self.parentWidget() + if pw is not None: + pw.updateGeometry() + self.updateGeometry() + self.sizeChanged.emit() + + +class JabsSettingsDialog(QDialog): + """Dialog for changing project settings. + + Parameters + ---------- + project_settings: ProjectSettings + Object providing .get(key, default) and either .set(key, value) or item assignment. + parent: QWidget | None + """ + + def __init__(self, project_settings: SettingsManager, parent: QWidget | None = None) -> None: + super().__init__(parent) + self.setWindowTitle("Project Settings") + self._settings_manager = project_settings + + # Allow resizing and show scrollbars if content overflows + self.setSizeGripEnabled(True) + + # Widgets + self._calibrate_checkbox = QCheckBox( + "Enable probability calibration (calibrate_probabilities)" + ) + self._method_selection = QComboBox() + self._method_selection.addItems( + ["isotonic", "sigmoid"] + ) # default will be set from settings + self._cv_selection = QSpinBox() + self._cv_selection.setRange(2, 10) + self._cv_selection.setAccelerated(True) + self._cv_selection.setToolTip("Number of CV folds used inside the calibrator") + + # Load current values from project settings (keys must match classifier usage) + current_settings = project_settings.project_settings.get("settings", {}) + calibrate = current_settings.get("calibrate_probabilities", False) + method = current_settings.get("calibration_method", "isotonic") + cv = current_settings.get("calibration_cv", 3) + + self._calibrate_checkbox.setChecked(calibrate) + idx = max(0, self._method_selection.findText(method)) + self._method_selection.setCurrentIndex(idx) + self._cv_selection.setValue(cv) + + # Layout for form + form = QWidget(self) + grid = QGridLayout(form) + grid.setContentsMargins(0, 0, 0, 0) + grid.setHorizontalSpacing(12) + grid.setVerticalSpacing(8) + grid.setColumnStretch(0, 0) # labels column: natural size + grid.setColumnStretch(1, 0) # inputs column: natural size + grid.setColumnStretch(2, 1) # expanding whitespace to the right + + # Keep inputs compact; whitespace grows in column 2 + self._method_selection.setSizeAdjustPolicy(QComboBox.SizeAdjustPolicy.AdjustToContents) + self._method_selection.setFixedWidth(self._method_selection.sizeHint().width() + 24) + self._method_selection.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self._cv_selection.setFixedWidth(90) + self._cv_selection.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self._calibrate_checkbox.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + grid.addWidget(QLabel("Calibrate probabilities:"), 0, 0, Qt.AlignRight) + grid.addWidget(self._calibrate_checkbox, 0, 1) + + grid.addWidget(QLabel("Calibration method:"), 1, 0, Qt.AlignRight) + grid.addWidget(self._method_selection, 1, 1) + + grid.addWidget(QLabel("calibration cv (folds):"), 2, 0, Qt.AlignRight) + grid.addWidget(self._cv_selection, 2, 1) + + grid.addItem(QSpacerItem(0, 0, QSizePolicy.Expanding, QSizePolicy.Minimum), 0, 2, 3, 1) + + # Help / inline docs (rich text) + help_label = QLabel(self) + help_label.setTextFormat(Qt.TextFormat.RichText) + help_label.setWordWrap(True) + help_label.setText( + """ +
Calibrate probabilities remaps raw model scores to better probabilities, using a small + cross-validation inside training. This improves metrics like log-loss and decision thresholds. +
+Tip: If you see probabilities stuck near 0/1, try enabling calibration.
+ """ + ) + help_label.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred) + + calibration_help_panel = CollapsibleSection("What do these do?", help_label, self) + calibration_help_panel.setSizePolicy( + QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred + ) + help_label.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred) + _help_toggle_btn = ( + calibration_help_panel._toggle_btn + ) # internal; used to wire scrolling after scroll area is created + + # When the collapsible expands/collapses, grow the group box to fit content and leave scrolling to the dialog + def _reflow_calibration_group(): + calibration_group.adjustSize() + page_layout.activate() + scroll.ensureWidgetVisible(calibration_help_panel) + + calibration_help_panel.sizeChanged.connect(_reflow_calibration_group) + + # Group box for Model Calibration section + calibration_group = QGroupBox("Model Calibration", self) + calibration_group.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Preferred) + group_vbox = QVBoxLayout(calibration_group) + group_vbox.setContentsMargins(12, 12, 12, 12) + group_vbox.setSpacing(8) + group_vbox.addWidget(form) + group_vbox.addWidget(calibration_help_panel) + group_vbox.addStretch(0) + + # Scrollable page to host settings sections + page = QWidget(self) + page.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Preferred) + page_layout = QVBoxLayout(page) + page_layout.setSizeConstraint(QLayout.SizeConstraint.SetMinAndMaxSize) + page_layout.setContentsMargins(0, 0, 0, 0) + page_layout.setSpacing(10) + page_layout.addWidget(calibration_group) + page_layout.setAlignment(calibration_group, Qt.AlignTop) + page_layout.addStretch(1) + + scroll = QScrollArea(self) + scroll.setWidget(page) + scroll.setWidgetResizable(True) + scroll.setFrameShape(QFrame.Shape.NoFrame) + scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOff) + scroll.setVerticalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAsNeeded) + scroll.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Expanding) + # Keep content width constant: viewport gutter above matches scrollbar width + scroll.setSizeAdjustPolicy(QAbstractScrollArea.SizeAdjustPolicy.AdjustToContents) + + # When the help section is expanded, ensure it is visible within the scroll area + try: + + def _on_help_toggled(checked: bool) -> None: + if checked: + # Only scroll; don't resize caps — keeps scrollbar on the window only + scroll.ensureWidgetVisible(calibration_help_panel) + + _help_toggle_btn.toggled.connect(_on_help_toggled) + except Exception: + pass + + # Buttons + btn_box = QDialogButtonBox(self) + btn_save = btn_box.addButton("Save", QDialogButtonBox.ButtonRole.AcceptRole) + btn_close = btn_box.addButton("Close", QDialogButtonBox.ButtonRole.RejectRole) + btn_save.clicked.connect(self._on_save) + btn_close.clicked.connect(self.reject) + + # Main layout + main = QVBoxLayout(self) + main.addWidget(scroll, 1) + main.addWidget(btn_box) + + self.setLayout(main) + + # Size to content initially, then give a taller starting height; user-resize preserved later + self.adjustSize() + self.resize(max(self.width(), 720), max(self.height(), 1000)) + + def _on_save(self) -> None: + # Persist back to project settings + settings = { + "settings": { + "calibrate_probabilities": self._calibrate_checkbox.isChecked(), + "calibration_method": self._method_selection.currentText(), + "calibration_cv": self._cv_selection.value(), + } + } + self._settings_manager.save_project_file(settings) + self.accept() From e4041d0de5017d1ebc0b6b2e66591abd81353c7e Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Thu, 16 Oct 2025 08:29:28 -0400 Subject: [PATCH 02/14] fix some Qt flag usage --- src/jabs/ui/settings_dialog.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/jabs/ui/settings_dialog.py b/src/jabs/ui/settings_dialog.py index be5efaa8..95b4e40d 100644 --- a/src/jabs/ui/settings_dialog.py +++ b/src/jabs/ui/settings_dialog.py @@ -130,23 +130,25 @@ def __init__(self, project_settings: SettingsManager, parent: QWidget | None = N # Keep inputs compact; whitespace grows in column 2 self._method_selection.setSizeAdjustPolicy(QComboBox.SizeAdjustPolicy.AdjustToContents) self._method_selection.setFixedWidth(self._method_selection.sizeHint().width() + 24) - self._method_selection.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + self._method_selection.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed) self._cv_selection.setFixedWidth(90) - self._cv_selection.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + self._cv_selection.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed) - self._calibrate_checkbox.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + self._calibrate_checkbox.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed) - grid.addWidget(QLabel("Calibrate probabilities:"), 0, 0, Qt.AlignRight) + grid.addWidget(QLabel("Calibrate probabilities:"), 0, 0, Qt.AlignmentFlag.AlignRight) grid.addWidget(self._calibrate_checkbox, 0, 1) - grid.addWidget(QLabel("Calibration method:"), 1, 0, Qt.AlignRight) + grid.addWidget(QLabel("Calibration method:"), 1, 0, Qt.AlignmentFlag.AlignRight) grid.addWidget(self._method_selection, 1, 1) - grid.addWidget(QLabel("calibration cv (folds):"), 2, 0, Qt.AlignRight) + grid.addWidget(QLabel("calibration cv (folds):"), 2, 0, Qt.AlignmentFlag.AlignRight) grid.addWidget(self._cv_selection, 2, 1) - grid.addItem(QSpacerItem(0, 0, QSizePolicy.Expanding, QSizePolicy.Minimum), 0, 2, 3, 1) + grid.addItem( + QSpacerItem(0, 0, QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Minimum), 0, 2, 3, 1 + ) # Help / inline docs (rich text) help_label = QLabel(self) @@ -206,7 +208,7 @@ def _reflow_calibration_group(): page_layout.setContentsMargins(0, 0, 0, 0) page_layout.setSpacing(10) page_layout.addWidget(calibration_group) - page_layout.setAlignment(calibration_group, Qt.AlignTop) + page_layout.setAlignment(calibration_group, Qt.AlignmentFlag.AlignTop) page_layout.addStretch(1) scroll = QScrollArea(self) From 76b3bdf9224472a4c194dc69d26b3a91f792f1e4 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Thu, 16 Oct 2025 08:45:14 -0400 Subject: [PATCH 03/14] some refactoring in classifier.py --- src/jabs/classifier/classifier.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/jabs/classifier/classifier.py b/src/jabs/classifier/classifier.py index df66ca01..02b81798 100644 --- a/src/jabs/classifier/classifier.py +++ b/src/jabs/classifier/classifier.py @@ -670,23 +670,25 @@ def get_calibrated_feature_importances(self): Returns None if unavailable (e.g., non-tree base estimators). """ - est = self._classifier - if not isinstance(est, CalibratedClassifierCV): + if not isinstance(self._classifier, CalibratedClassifierCV): return None try: - base_ests = [cc.estimator for cc in est.calibrated_classifiers_] + base_ests = [cc.estimator for cc in self._classifier.calibrated_classifiers_] except Exception: return None + + # get the base estimators that have feature_importances_ base_ests = [be for be in base_ests if hasattr(be, "feature_importances_")] if not base_ests: return None + + # get the mean and standard deviation of feature importances from the base estimators importances = np.vstack([be.feature_importances_ for be in base_ests]) mean_imp = importances.mean(axis=0) std_imp = importances.std(axis=0) - names = self._feature_names or [f"feature_{i}" for i in range(mean_imp.shape[0])] - if len(names) != len(mean_imp): - names = [f"feature_{i}" for i in range(len(mean_imp))] - items = list(zip(names, mean_imp, std_imp, strict=True)) + + # combine with feature names and sort by mean importance + items = list(zip(self._feature_names, mean_imp, std_imp, strict=True)) items.sort(key=lambda t: t[1], reverse=True) return items @@ -701,19 +703,17 @@ def print_feature_importance(self, feature_list, limit=20): if isinstance(self._classifier, CalibratedClassifierCV): items = self.get_calibrated_feature_importances() if items is not None: - print(f"{'Feature Name':100} Mean Importance Std") + print(f"{'Feature Name':100} Mean Importance Std") print("-" * 120) for name, mean_imp, std_imp in items[:limit]: - print(f"{name:100} {mean_imp:0.4f} {std_imp:0.4f}") + print(f"{name:100} {mean_imp:0.4f} {std_imp:0.4f}") return # fall through to base-estimator single-source path if calibrated but no importances # Fallback: single estimator feature_importances_ est = self._get_estimator_with_feature_importances() if est is None: - print( - "Feature importances are unavailable for the current classifier (e.g., calibrated logistic/linear models)." - ) + print("Feature importances are unavailable for the current classifier.") return importances = list(est.feature_importances_) names = feature_list if feature_list is not None else (self._feature_names or []) From 3fe7536cf87d2309fdafa1c32ed2fb7011b2a59c Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Thu, 16 Oct 2025 09:35:06 -0400 Subject: [PATCH 04/14] refactoring in JabsSettingsDialog --- src/jabs/ui/settings_dialog.py | 50 ++++++++++++++++------------------ 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/src/jabs/ui/settings_dialog.py b/src/jabs/ui/settings_dialog.py index 95b4e40d..e103fd1e 100644 --- a/src/jabs/ui/settings_dialog.py +++ b/src/jabs/ui/settings_dialog.py @@ -60,29 +60,29 @@ def _on_toggled(self, checked: bool) -> None: ) self._content.setVisible(checked) self._content.updateGeometry() + # Ask ancestors to recompute layout so the page grows inside the scroll area - p = self.parentWidget() - if p is not None and p.layout() is not None: - p.layout().activate() - lg = self.layout() - if lg is not None: - lg.activate() + parent = self.parentWidget() + if parent is not None and parent.layout() is not None: + parent.layout().activate() + + if self.layout() is not None: + self.layout().activate() + # Let ancestors recompute size hints and notify listeners - pw = self.parentWidget() - if pw is not None: - pw.updateGeometry() + if parent is not None: + parent.updateGeometry() self.updateGeometry() self.sizeChanged.emit() class JabsSettingsDialog(QDialog): - """Dialog for changing project settings. + """ + Dialog for changing project settings. - Parameters - ---------- - project_settings: ProjectSettings - Object providing .get(key, default) and either .set(key, value) or item assignment. - parent: QWidget | None + Args: + project_settings (SettingsManager): Project settings manager used to load and save settings. + parent (QWidget | None, optional): Parent widget for this dialog. Defaults to None. """ def __init__(self, project_settings: SettingsManager, parent: QWidget | None = None) -> None: @@ -218,20 +218,16 @@ def _reflow_calibration_group(): scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOff) scroll.setVerticalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAsNeeded) scroll.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Expanding) + # Keep content width constant: viewport gutter above matches scrollbar width scroll.setSizeAdjustPolicy(QAbstractScrollArea.SizeAdjustPolicy.AdjustToContents) - # When the help section is expanded, ensure it is visible within the scroll area - try: - - def _on_help_toggled(checked: bool) -> None: - if checked: - # Only scroll; don't resize caps — keeps scrollbar on the window only - scroll.ensureWidgetVisible(calibration_help_panel) + def _on_help_toggled(checked: bool) -> None: + if checked: + # Only scroll; don't resize caps — keeps scrollbar on the window only + scroll.ensureWidgetVisible(calibration_help_panel) - _help_toggle_btn.toggled.connect(_on_help_toggled) - except Exception: - pass + _help_toggle_btn.toggled.connect(_on_help_toggled) # Buttons btn_box = QDialogButtonBox(self) @@ -249,10 +245,10 @@ def _on_help_toggled(checked: bool) -> None: # Size to content initially, then give a taller starting height; user-resize preserved later self.adjustSize() - self.resize(max(self.width(), 720), max(self.height(), 1000)) + self.resize(max(self.width(), 700), max(self.height(), 600)) def _on_save(self) -> None: - # Persist back to project settings + """Save settings to project and close dialog.""" settings = { "settings": { "calibrate_probabilities": self._calibrate_checkbox.isChecked(), From 0fe5d90c2e0f1136852a375a9f921397ebfe7b80 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Fri, 17 Oct 2025 14:52:47 -0400 Subject: [PATCH 05/14] save the probability calibration settings in exported training data --- src/jabs/classifier/classifier.py | 29 +++++++++++++++----------- src/jabs/constants.py | 3 +++ src/jabs/project/export_training.py | 12 +++++++++++ src/jabs/project/prediction_manager.py | 2 +- src/jabs/project/project.py | 7 +++++-- src/jabs/project/read_training.py | 19 ++++++++++++++++- src/jabs/project/settings_manager.py | 11 +++++++++- src/jabs/project/video_manager.py | 2 +- src/jabs/ui/central_widget.py | 2 +- src/jabs/ui/main_window.py | 2 +- 10 files changed, 69 insertions(+), 20 deletions(-) diff --git a/src/jabs/classifier/classifier.py b/src/jabs/classifier/classifier.py index 02b81798..c84e97a3 100644 --- a/src/jabs/classifier/classifier.py +++ b/src/jabs/classifier/classifier.py @@ -18,6 +18,7 @@ ) from sklearn.model_selection import LeaveOneGroupOut, train_test_split +from jabs.constants import DEFAULT_CALIBRATION_CV, DEFAULT_CALIBRATION_METHOD from jabs.project import Project, TrackLabels, load_training_data from jabs.types import ClassifierType from jabs.utils import hash_file @@ -62,7 +63,7 @@ def __init__(self, classifier=ClassifierType.RANDOM_FOREST, n_jobs=1): self._classifier_type = classifier self._classifier = None self._behavior_settings = None - self._project_settings = None + self._jabs_settings = None self._behavior = None self._feature_names = None self._n_jobs = n_jobs @@ -93,7 +94,9 @@ def from_training_file(cls, path: Path): classifier = cls() classifier.behavior_name = behavior - classifier.set_dict_settings(loaded_training_data["settings"]) + classifier.set_behavior_settings(loaded_training_data["behavior_settings"]) + classifier._jabs_settings = loaded_training_data["jabs_settings"] + classifier_type = ClassifierType(loaded_training_data["classifier_type"]) if classifier_type in classifier.classifier_choices(): classifier.set_classifier(classifier_type) @@ -101,6 +104,7 @@ def from_training_file(cls, path: Path): print( f"Specified classifier type {classifier_type.name} is unavailable, using default: {classifier.classifier_type.name}" ) + training_features = classifier.combine_data( loaded_training_data["per_frame"], loaded_training_data["window"] ) @@ -367,9 +371,10 @@ def set_project_settings(self, project: Project): else: self._behavior_settings = project.settings_manager.get_behavior(self._behavior) - self._project_settings = project.settings_manager.project_settings.get("settings", {}) + # grab other JABS settings from settings manager, some might be used by the classifier + self._jabs_settings = project.settings_manager.settings - def set_dict_settings(self, settings: dict): + def set_behavior_settings(self, settings: dict): """assign behavior-specific settings via a dict to the classifier Args: @@ -425,14 +430,14 @@ def train(self, data, random_seed: int | None = None): if self._behavior_settings.get("balance_labels", False): features, labels = self.downsample_balance(features, labels, random_seed) - # Optional probability calibration, this is currently set at the project level - calibrate = self._project_settings.get("calibrate_probabilities", False) - calibration_method = self._project_settings.get( - "calibration_method", "isotonic" - ) # or 'sigmoid' - calibration_cv = self._project_settings.get("calibration_cv", 3) + # Optional probability calibration + calibrate_probabilities = self._jabs_settings.get("calibrate_probabilities", False) + if calibrate_probabilities: + calibration_method = self._jabs_settings.get( + "calibration_method", DEFAULT_CALIBRATION_METHOD + ) + calibration_cv = self._jabs_settings.get("calibration_cv", DEFAULT_CALIBRATION_CV) - if calibrate: # Build an unfitted base estimator if self._classifier_type == ClassifierType.RANDOM_FOREST: base_estimator = self._make_random_forest(random_seed=random_seed) @@ -581,7 +586,7 @@ def load(self, path: Path): self._classifier = c._classifier self._behavior = c._behavior self._behavior_settings = c._behavior_settings - self._project_settings = c._project_settings + self._jabs_settings = c._jabs_settings self._classifier_type = c._classifier_type if c._classifier_file is not None: self._classifier_file = c._classifier_file diff --git a/src/jabs/constants.py b/src/jabs/constants.py index a4a585d0..80c0e853 100644 --- a/src/jabs/constants.py +++ b/src/jabs/constants.py @@ -8,3 +8,6 @@ # some defaults for compressing hdf5 output COMPRESSION = "gzip" COMPRESSION_OPTS_DEFAULT = 6 + +DEFAULT_CALIBRATION_METHOD = "isotonic" +DEFAULT_CALIBRATION_CV = 3 diff --git a/src/jabs/project/export_training.py b/src/jabs/project/export_training.py index 3565454d..12248fae 100644 --- a/src/jabs/project/export_training.py +++ b/src/jabs/project/export_training.py @@ -7,6 +7,7 @@ import jabs.feature_extraction import jabs.version +from jabs.constants import DEFAULT_CALIBRATION_CV, DEFAULT_CALIBRATION_METHOD from jabs.project.project_utils import to_safe_name from jabs.utils import FINAL_TRAIN_SEED @@ -64,6 +65,17 @@ def export_training_data( write_project_settings(out_h5, project.settings_manager.get_behavior(behavior), "settings") out_h5.attrs["classifier_type"] = classifier_type.value out_h5.attrs["training_seed"] = training_seed + out_h5.attrs["calibrate_probabilities"] = project.settings_manager.settings.get( + "calibrate_probabilities", False + ) + if out_h5.attrs["calibrate_probabilities"]: + out_h5.attrs["calibration_method"] = project.settings_manager.settings.get( + "calibration_method", DEFAULT_CALIBRATION_METHOD + ) + out_h5.attrs["calibration_cv"] = project.settings_manager.settings.get( + "calibration_cv", DEFAULT_CALIBRATION_CV + ) + feature_group = out_h5.create_group("features") for feature, data in features["per_frame"].items(): feature_group.create_dataset(f"per_frame/{feature}", data=data) diff --git a/src/jabs/project/prediction_manager.py b/src/jabs/project/prediction_manager.py index af3407d3..d4e63caa 100644 --- a/src/jabs/project/prediction_manager.py +++ b/src/jabs/project/prediction_manager.py @@ -126,7 +126,7 @@ def load_predictions(self, video: str, behavior: str): file_base = Path(video).with_suffix("").name + ".h5" path = self._project.project_paths.prediction_dir / file_base - nident = self._project.settings_manager.project_settings["video_files"][video][ + nident = self._project.settings_manager.project_dictionary["video_files"][video][ "identities" ] diff --git a/src/jabs/project/project.py b/src/jabs/project/project.py index 03abd2ec..b84cc474 100644 --- a/src/jabs/project/project.py +++ b/src/jabs/project/project.py @@ -69,7 +69,10 @@ def __init__( self._session_tracker = SessionTracker(self, tracking_enabled=enable_session_tracker) # write out the defaults to the project file - if self._settings_manager.project_settings.get("defaults") != self.get_project_defaults(): + if ( + self._settings_manager.project_dictionary.get("defaults") + != self.get_project_defaults() + ): self._settings_manager.save_project_file({"defaults": self.get_project_defaults()}) # Start a session tracker for this project. @@ -110,7 +113,7 @@ def classifier_dir(self): @property def settings(self): """get the project metadata and preferences.""" - return self._settings_manager.project_settings + return self._settings_manager.project_dictionary @property def settings_manager(self) -> SettingsManager: diff --git a/src/jabs/project/read_training.py b/src/jabs/project/read_training.py index 5e5ed492..3a6bdbc7 100644 --- a/src/jabs/project/read_training.py +++ b/src/jabs/project/read_training.py @@ -4,6 +4,7 @@ import h5py import pandas as pd +from jabs.constants import DEFAULT_CALIBRATION_CV, DEFAULT_CALIBRATION_METHOD from jabs.types import ClassifierType, ProjectDistanceUnit @@ -79,9 +80,10 @@ def load_training_data(training_file: Path): with h5py.File(training_file, "r") as in_h5: features["min_pose_version"] = in_h5.attrs["min_pose_version"] features["behavior"] = in_h5.attrs["behavior"] - features["settings"] = read_project_settings(in_h5["settings"]) + features["behavior_settings"] = read_project_settings(in_h5["settings"]) features["training_seed"] = in_h5.attrs["training_seed"] features["classifier_type"] = ClassifierType(in_h5.attrs["classifier_type"]) + # convert the string distance_unit attr to corresponding # ProjectDistanceUnit enum unit = in_h5.attrs.get("distance_unit") @@ -92,6 +94,21 @@ def load_training_data(training_file: Path): else: features["distance_unit"] = ProjectDistanceUnit[unit] + features["jabs_settings"] = {} + + # load other jabs settings that might or might not be present + calibrate_probabilities = in_h5.attrs.get("calibrate_probabilities", False) + if calibrate_probabilities: + features["jabs_settings"].extend( + { + "calibrate_probabilities": calibrate_probabilities, + "calibration_method": in_h5.attrs.get( + "calibration_method", DEFAULT_CALIBRATION_METHOD + ), + "calibration_cv": in_h5.attrs.get("calibration_cv", DEFAULT_CALIBRATION_CV), + } + ) + features["labels"] = in_h5["label"][:] features["groups"] = in_h5["group"][:] diff --git a/src/jabs/project/settings_manager.py b/src/jabs/project/settings_manager.py index d6aaeaf6..f5ddb031 100644 --- a/src/jabs/project/settings_manager.py +++ b/src/jabs/project/settings_manager.py @@ -57,7 +57,7 @@ def save_project_file(self, data: dict | None = None): tmp.replace(self._paths.project_file) @property - def project_settings(self) -> dict: + def project_dictionary(self) -> dict: """Get a copy of the current project properties and settings. Returns: @@ -65,6 +65,15 @@ def project_settings(self) -> dict: """ return dict(self._project_info) + @property + def settings(self) -> dict: + """Get a copy of general JABS settings from project file + + Returns: + dict + """ + return dict(self._project_info.get("settings", {})) + @property def behavior_names(self) -> list[str]: """Get a list of all behaviors defined in the project settings. diff --git a/src/jabs/project/video_manager.py b/src/jabs/project/video_manager.py index e02ef73d..60e451e3 100644 --- a/src/jabs/project/video_manager.py +++ b/src/jabs/project/video_manager.py @@ -142,7 +142,7 @@ def get_video_identity_count(self, video_name: str) -> int: def _load_video_metadata(self): """Load metadata for each video and calculate total identities.""" - video_metadata = self._settings_manager.project_settings.get("video_files", {}) + video_metadata = self._settings_manager.project_dictionary.get("video_files", {}) flush = False for video in self._videos: vinfo = video_metadata.get(video, {}) diff --git a/src/jabs/ui/central_widget.py b/src/jabs/ui/central_widget.py index 47856de5..39015d5f 100644 --- a/src/jabs/ui/central_widget.py +++ b/src/jabs/ui/central_widget.py @@ -675,7 +675,7 @@ def _train_button_clicked(self) -> None: # make sure video playback is stopped self._player_widget.stop() - # setup training thread + # setup training thread, training thread will configure self._classifier with current settings self._training_thread = TrainingThread( self._classifier, self._project, diff --git a/src/jabs/ui/main_window.py b/src/jabs/ui/main_window.py index 0eead518..aaa92766 100644 --- a/src/jabs/ui/main_window.py +++ b/src/jabs/ui/main_window.py @@ -532,7 +532,7 @@ def behavior_label_add_event(self, behaviors: list[str]) -> None: """handle project updates required when user adds new behavior labels""" # check for new behaviors for behavior in behaviors: - if behavior not in self._project.settings_manager.project_settings["behavior"]: + if behavior not in self._project.settings_manager.project_dictionary["behavior"]: # save new behavior with default settings self._project.settings_manager.save_behavior(behavior, {}) From 4b9d258cb42b1242d7f74ec46af0950a17f7da11 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Fri, 17 Oct 2025 17:17:39 -0400 Subject: [PATCH 06/14] finalizing jabs settings dialog and classifier probability calibration --- src/jabs/behavior_search/behavior_search_util.py | 2 +- src/jabs/classifier/classifier.py | 10 +++++----- src/jabs/scripts/classify.py | 1 + src/jabs/scripts/initialize_project.py | 2 +- src/jabs/ui/settings_dialog.py | 7 ++++--- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/jabs/behavior_search/behavior_search_util.py b/src/jabs/behavior_search/behavior_search_util.py index 5334b5ac..b9a11eb6 100644 --- a/src/jabs/behavior_search/behavior_search_util.py +++ b/src/jabs/behavior_search/behavior_search_util.py @@ -114,7 +114,7 @@ def _search_behaviors_gen( ) case PredictionBehaviorSearchQuery() as pred_query: - proj_settings = project.settings_manager.project_settings + proj_settings = project.settings_manager.project_dictionary if pred_query.behavior_label is None: behavior_dict = proj_settings.get("behavior", {}) behaviors = list(behavior_dict.keys()) diff --git a/src/jabs/classifier/classifier.py b/src/jabs/classifier/classifier.py index c84e97a3..873cb770 100644 --- a/src/jabs/classifier/classifier.py +++ b/src/jabs/classifier/classifier.py @@ -32,11 +32,11 @@ # we were able to import xgboost, make it available as an option: _classifier_choices.append(ClassifierType.XGBOOST) except Exception: - # we were unable to import the xgboost module. It's either not - # installed (it should be if the user used our requirements-old.txt) - # or it may have been unable to be imported due to a missing - # libomp. Either way, we won't add it to the available choices and - # we can otherwise ignore this exception + # we were unable to import the xgboost module -- possibly due to a missing + # libomp (which is not available by default on macOS). Mac users should + # install libomp via Homebrew (brew install libomp) to enable XGBoost support (this is + # detailed in the installation instructions). + # we won't add it to the available choices and we can otherwise ignore this exception _xgboost = None diff --git a/src/jabs/scripts/classify.py b/src/jabs/scripts/classify.py index 90478157..319cacf8 100755 --- a/src/jabs/scripts/classify.py +++ b/src/jabs/scripts/classify.py @@ -197,6 +197,7 @@ def train(training_file: Path) -> Classifier: print(f" Balanced Labels: {classifier_settings['balance_labels']}") print(f" Symmetric Behavior: {classifier_settings['symmetric_behavior']}") print(f" CM Units: {bool(classifier_settings['cm_units'])}") + print(f" Calibrate Probabilities: {classifier.calibrate_probabilities}") return classifier diff --git a/src/jabs/scripts/initialize_project.py b/src/jabs/scripts/initialize_project.py index d67cd9b5..f1f7db4e 100755 --- a/src/jabs/scripts/initialize_project.py +++ b/src/jabs/scripts/initialize_project.py @@ -295,7 +295,7 @@ def validation_job_producer(): # save window sizes to project settings deduped_window_sizes = set( - project.settings_manager.project_settings.get("window_sizes", []) + window_sizes + project.settings_manager.project_dictionary.get("window_sizes", []) + window_sizes ) project.settings_manager.save_project_file({"window_sizes": list(deduped_window_sizes)}) diff --git a/src/jabs/ui/settings_dialog.py b/src/jabs/ui/settings_dialog.py index e103fd1e..7b0c0e08 100644 --- a/src/jabs/ui/settings_dialog.py +++ b/src/jabs/ui/settings_dialog.py @@ -19,6 +19,7 @@ QWidget, ) +from jabs.constants import DEFAULT_CALIBRATION_CV, DEFAULT_CALIBRATION_METHOD from jabs.project.settings_manager import SettingsManager @@ -107,10 +108,10 @@ def __init__(self, project_settings: SettingsManager, parent: QWidget | None = N self._cv_selection.setToolTip("Number of CV folds used inside the calibrator") # Load current values from project settings (keys must match classifier usage) - current_settings = project_settings.project_settings.get("settings", {}) + current_settings = project_settings.project_dictionary.get("settings", {}) calibrate = current_settings.get("calibrate_probabilities", False) - method = current_settings.get("calibration_method", "isotonic") - cv = current_settings.get("calibration_cv", 3) + method = current_settings.get("calibration_method", DEFAULT_CALIBRATION_METHOD) + cv = current_settings.get("calibration_cv", DEFAULT_CALIBRATION_CV) self._calibrate_checkbox.setChecked(calibrate) idx = max(0, self._method_selection.findText(method)) From 34cf9eb19bfa69c9299741c84d9ccc1d309876c1 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Fri, 17 Oct 2025 17:41:45 -0400 Subject: [PATCH 07/14] edit help text --- src/jabs/ui/settings_dialog.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jabs/ui/settings_dialog.py b/src/jabs/ui/settings_dialog.py index 7b0c0e08..56077d4f 100644 --- a/src/jabs/ui/settings_dialog.py +++ b/src/jabs/ui/settings_dialog.py @@ -169,6 +169,7 @@ def __init__(self, project_settings: SettingsManager, parent: QWidget | None = NIsotonic is recommended, since it should work well in most cases (>~2000 labeled samples).
Tip: If you see probabilities stuck near 0/1, try enabling calibration.
""" ) From d56f9c7b9ae649eb5db902273d48eef8e8db52c8 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Fri, 17 Oct 2025 21:14:27 -0400 Subject: [PATCH 08/14] fix unittests --- tests/project/test_prediction_manager.py | 2 +- tests/test_behavior_search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/project/test_prediction_manager.py b/tests/project/test_prediction_manager.py index 9f6b3016..899c2268 100644 --- a/tests/project/test_prediction_manager.py +++ b/tests/project/test_prediction_manager.py @@ -25,7 +25,7 @@ class MockSettingsManager: """Class to simulate project settings.""" def __init__(self): - self.project_settings = {"video_files": {"test_video.avi": {"identities": 2}}} + self.project_dictionary = {"video_files": {"test_video.avi": {"identities": 2}}} @pytest.fixture diff --git a/tests/test_behavior_search.py b/tests/test_behavior_search.py index 6ffbe152..2c22dd60 100644 --- a/tests/test_behavior_search.py +++ b/tests/test_behavior_search.py @@ -40,7 +40,7 @@ def load_predictions(video, behavior): # SettingsManager mock settings_manager = MagicMock() - settings_manager.project_settings = project_settings or {} + settings_manager.project_dictionary = project_settings or {} # Project mock project = MagicMock() From 745288d2830de7b51eb5d51f33be8de9f2950d33 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Fri, 17 Oct 2025 21:16:49 -0400 Subject: [PATCH 09/14] fix typo in docstring --- tests/test_behavior_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_behavior_search.py b/tests/test_behavior_search.py index 2c22dd60..c99902ca 100644 --- a/tests/test_behavior_search.py +++ b/tests/test_behavior_search.py @@ -51,7 +51,7 @@ def load_predictions(video, behavior): def _correct_probs(preds, probs): - """Move 0.0 - 1.0 probaabilities into 0.5 - 1.0 range based on predictions.""" + """Move 0.0 - 1.0 probabilities into 0.5 - 1.0 range based on predictions.""" probs = probs.copy() probs[preds == 0] = 1.0 - probs[preds == 0] From 876f48251991e3a39fa5eed5270c011bef707166 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Mon, 20 Oct 2025 10:15:10 -0400 Subject: [PATCH 10/14] update inline help text --- src/jabs/ui/settings_dialog.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/jabs/ui/settings_dialog.py b/src/jabs/ui/settings_dialog.py index 56077d4f..6a884766 100644 --- a/src/jabs/ui/settings_dialog.py +++ b/src/jabs/ui/settings_dialog.py @@ -159,18 +159,21 @@ def __init__(self, project_settings: SettingsManager, parent: QWidget | None = N """Calibrate probabilities remaps raw model scores to better probabilities, using a small - cross-validation inside training. This improves metrics like log-loss and decision thresholds. -
+ cross-validation inside training. This improves metrics like log-loss and decision thresholds.Isotonic is recommended, since it should work well in most cases (>~2000 labeled samples).
-Tip: If you see probabilities stuck near 0/1, try enabling calibration.
+When to use isotonic: If you have thousands of labeled frames and probabilities seem too extreme + (many near 0 or 1), isotonic will give smoother and more realistic confidence scores.
+Tip: If training is slow or your dataset is small, start with sigmoid and
+ calibration_cv = 3. You can switch to isotonic later as your dataset grows.
Calibrate probabilities remaps raw model scores to better probabilities, using a small - cross-validation inside training. This improves metrics like log-loss and decision thresholds.
+Calibrate probabilities remaps raw model scores to better probabilities using + cross-validation inside training. This improves log-loss, Brier score, and makes thresholding + (e.g., show if p ≥ 0.7) more reliable.
+calibration_cv setting) increase the data required for selecting
+ isotonic.When to use isotonic: If you have thousands of labeled frames and probabilities seem too extreme - (many near 0 or 1), isotonic will give smoother and more realistic confidence scores.
-Tip: If training is slow or your dataset is small, start with sigmoid and
- calibration_cv = 3. You can switch to isotonic later as your dataset grows.
Guidance: If your dataset is large (thousands of labeled frames and roughly balanced),
+ auto will select isotonic. If it selects sigmoid, you can collect more labels or reduce
+ calibration_cv to allow isotonic to activate.
Tip: Most users should leave calibration_method = auto.
calibration_cv to allow isotonic to activate.
Tip: Most users should leave calibration_method = auto.
Saving reliability plots: If Save reliability plots is enabled, JABS will write reliability
+ figures after training/validation to <project dir>/plots/<timestamp>/.
+ Each run creates a new timestamped folder so results are easy to compare.