diff --git a/CHANGELOG.md b/CHANGELOG.md index 880ab649..3bc7968c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ # v0.6.6 (Upcoming) ### New Checks +* Added checks for the duration of DynamicTables by checking start_time, stop_time, timestamp, duration, and spike_times columns. [#628](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/628) * Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625) ### Improvements -* Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624) +* Added documentation to API and CLI docs on how to use the dandi config option. [#628](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/628) # v0.6.5 (July 25, 2025) diff --git a/src/nwbinspector/checks/__init__.py b/src/nwbinspector/checks/__init__.py index 3011d187..72ceb832 100644 --- a/src/nwbinspector/checks/__init__.py +++ b/src/nwbinspector/checks/__init__.py @@ -73,6 +73,7 @@ check_ids_unique, check_single_row, check_table_time_columns_are_not_negative, + check_table_time_columns_duration, check_table_values_for_dict, check_time_interval_time_columns, check_time_intervals_stop_after_start, @@ -142,6 +143,7 @@ "check_time_intervals_stop_after_start", "check_table_values_for_dict", "check_table_time_columns_are_not_negative", + "check_table_time_columns_duration", "check_resolution", "check_missing_unit", "check_regular_timestamps", diff --git a/src/nwbinspector/checks/_tables.py b/src/nwbinspector/checks/_tables.py index ef99d02e..a77ba144 100644 --- a/src/nwbinspector/checks/_tables.py +++ b/src/nwbinspector/checks/_tables.py @@ -292,3 +292,90 @@ def check_table_time_columns_are_not_negative(table: DynamicTable) -> Optional[I ) return None + + +@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=DynamicTable) +def check_table_time_columns_duration( + table: DynamicTable, duration_threshold: float = 31557600.0 +) -> Optional[InspectorMessage]: + """ + Check if the duration spanned by time columns in a DynamicTable exceeds a threshold. + + This check examines time-related columns (start_time, stop_time, timestamp, spike_times) + and calculates the duration as max(time) - min(time). If this exceeds the threshold + (default: 1 year = 31,557,600 seconds), a warning is issued. + + Parameters + ---------- + table: DynamicTable + The table to check + duration_threshold: float, optional + Maximum expected duration in seconds. Default is 1 year (365.25 days). + + Returns + ------- + Optional[InspectorMessage] + Warning message if duration exceeds threshold, None otherwise + """ + if len(table.id) == 0: + return None # Empty table + + start_times = [] + end_times = [] + + # Check for start_time and stop_time columns (e.g., trials) + if "start_time" in table.colnames and len(table["start_time"]) > 0: + start_times.append(float(table["start_time"][0])) + if "stop_time" in table.colnames and len(table["stop_time"]) > 0: + end_times.append(float(table["stop_time"][-1])) + + # Check for timestamp column (possibly with duration) + if "timestamp" in table.colnames and len(table["timestamp"]) > 0: + timestamp_data = table["timestamp"] + start_times.append(float(timestamp_data[0])) + + if "duration" in table.colnames and len(table["duration"]) > 0: + duration_data = table["duration"] + end_times.append(float(timestamp_data[-1] + duration_data[-1])) + else: + end_times.append(float(timestamp_data[-1])) + + # Check for spike_times column (Units table) + # Assume spike times are ordered within each unit + if "spike_times" in table.colnames and len(table["spike_times"]) > 0: + idxs = table["spike_times"].data[:] + + # Remove zeros from idxs (units with no spikes) + idxs = idxs[idxs != 0] + + if len(idxs) > 0: + st_data = table["spike_times"].target + + if len(idxs) > 1: + start = float(np.min(np.r_[st_data[0], st_data[idxs[:-1]]])) + else: + start = float(st_data[0]) + + end = float(np.max(st_data[idxs - 1])) + start_times.append(start) + end_times.append(end) + + # Calculate duration if we found any time data + if start_times and end_times: + duration = max(end_times) - min(start_times) + + # Check if duration exceeds threshold + if duration > duration_threshold: + # Convert to years for the message + duration_years = duration / 31557600.0 + threshold_years = duration_threshold / 31557600.0 + return InspectorMessage( + message=( + f"DynamicTable '{table.name}' has a duration of {duration:.2f} seconds " + f"({duration_years:.2f} years), which exceeds the threshold of " + f"{duration_threshold:.2f} seconds ({threshold_years:.2f} years). " + "Please verify that this is correct." + ) + ) + + return None diff --git a/tests/unit_tests/test_tables.py b/tests/unit_tests/test_tables.py index 3e0b8027..b7055515 100644 --- a/tests/unit_tests/test_tables.py +++ b/tests/unit_tests/test_tables.py @@ -16,6 +16,7 @@ check_ids_unique, check_single_row, check_table_time_columns_are_not_negative, + check_table_time_columns_duration, check_table_values_for_dict, check_time_interval_time_columns, check_time_intervals_stop_after_start, @@ -498,3 +499,82 @@ def test_table_time_columns_are_not_negative_multidimensional_pass(): test_table.add_row(test_time=[0.0, 1.0, 2.0, 3.0]) assert check_table_time_columns_are_not_negative(test_table) is None + + +def test_check_table_time_columns_duration_pass_short(): + """Test that short duration tables pass the check.""" + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=10.0) + table.add_row(start_time=15.0, stop_time=25.0) + table.add_row(start_time=30.0, stop_time=100.0) + + assert check_table_time_columns_duration(table) is None + + +def test_check_table_time_columns_duration_fail_exceeds_threshold(): + """Test that tables with duration exceeding 1 year fail.""" + one_year = 31557600.0 + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=100.0) + table.add_row(start_time=one_year + 1000, stop_time=one_year + 2000) + + result = check_table_time_columns_duration(table) + assert result is not None + assert "trials" in result.message + assert "exceeds the threshold" in result.message + assert result.importance == Importance.BEST_PRACTICE_SUGGESTION + + +def test_check_table_time_columns_duration_pass_empty(): + """Test that empty tables pass.""" + table = TimeIntervals(name="trials", description="test trials") + assert check_table_time_columns_duration(table) is None + + +def test_check_table_time_columns_duration_pass_custom_threshold(): + """Test that custom threshold works correctly.""" + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=100.0) + table.add_row(start_time=150.0, stop_time=200.0) + + # Should fail with 100 second threshold + result = check_table_time_columns_duration(table, duration_threshold=100.0) + assert result is not None + + # Should pass with 300 second threshold + result = check_table_time_columns_duration(table, duration_threshold=300.0) + assert result is None + + +def test_check_table_time_columns_duration_with_timestamp(): + """Test with timestamp column.""" + table = DynamicTable(name="events", description="test events") + table.add_column(name="timestamp", description="event timestamps") + table.add_row(timestamp=0.0) + table.add_row(timestamp=100.0) + + assert check_table_time_columns_duration(table) is None + + +def test_check_table_time_columns_duration_with_timestamp_and_duration(): + """Test with timestamp and duration columns.""" + one_year = 31557600.0 + table = DynamicTable(name="events", description="test events") + table.add_column(name="timestamp", description="event timestamps") + table.add_column(name="duration", description="event durations") + table.add_row(timestamp=0.0, duration=10.0) + table.add_row(timestamp=one_year, duration=1000.0) + + result = check_table_time_columns_duration(table) + assert result is not None + assert "exceeds the threshold" in result.message + + +def test_check_table_time_columns_duration_no_time_columns(): + """Test that tables without time columns pass.""" + table = DynamicTable(name="test_table", description="test") + table.add_column(name="value", description="some data") + table.add_row(value=123) + table.add_row(value=456) + + assert check_table_time_columns_duration(table) is None