Improves GenerateHeatmap transform and documentation

eclipse0922 · eclipse0922 · commit 40c492dfa2bb · 2025-09-27T00:06:32.000+09:00
Enhances the GenerateHeatmap transform with better normalization,
spatial metadata handling, and comprehensive documentation.

The changes ensure correct heatmap normalization, and improve
handling of spatial metadata inheritance from reference images.
Also improves input validation and fixes shape inconsistencies.

Adds new test cases to cover edge cases and improve code reliability.
Signed-off-by: sewon.jeon &lt;sewon.jeon@connecteve.com&gt;
diff --git a/monai/transforms/post/array.py b/monai/transforms/post/array.py
@@ -757,14 +757,16 @@ class GenerateHeatmap(Transform):
 
     Notes:
         - Coordinates are interpreted in voxel units and expected in (Y, X) for 2D or (Z, Y, X) for 3D.
-        - Output shape:
-            - Non-batched points (N, D): (N, H, W[, D])
-            - Batched points (B, N, D): (B, N, H, W[, D])
+        - Target spatial_shape is (Y, X) for 2D and (Z, Y, X) for 3D.
+        - Output layout uses channel-first convention with one channel per landmark:
+            - Non-batched points (N, D): (N, Y, X) for 2D or (N, Z, Y, X) for 3D
+            - Batched points (B, N, D): (B, N, Y, X) for 2D or (B, N, Z, Y, X) for 3D
         - Each channel corresponds to one landmark.
 
     Args:
         sigma: gaussian standard deviation. A single value is broadcast across all spatial dimensions.
         spatial_shape: optional fallback spatial shape. If ``None`` it must be provided when calling the transform.
+            A single int value will be broadcast to all spatial dimensions.
         truncated: extent, in multiples of ``sigma``, used to crop the gaussian support window.
         normalize: normalize every heatmap channel to ``[0, 1]`` when ``True``.
         dtype: target dtype for the generated heatmaps (accepts numpy or torch dtypes).
@@ -840,9 +842,9 @@ def __call__(self, points: NdarrayOrTensor, spatial_shape: Sequence[int] | None
                 # write back
                 region.copy_(updated)
                 if self.normalize:
-                    peak = updated.max()
-                    if peak.item() > 0:
-                        heatmap[b_idx, idx] /= peak
+                    peak = updated.amax()
+                    denom = torch.where(peak > 0, peak, torch.ones_like(peak))
+                    heatmap[b_idx, idx] = heatmap[b_idx, idx] / denom
 
         if not is_batched:
             heatmap = heatmap.squeeze(0)
diff --git a/monai/transforms/post/dictionary.py b/monai/transforms/post/dictionary.py
@@ -518,12 +518,35 @@ class GenerateHeatmapd(MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.GenerateHeatmap`.
     Converts landmark coordinates into gaussian heatmaps and optionally copies metadata from a reference image.
 
+    Args:
+        keys: keys of the corresponding items in the dictionary.
+        sigma: standard deviation for the Gaussian kernel. Can be a single value or sequence matching number of points.
+        heatmap_keys: keys to store output heatmaps. Default: "{key}_heatmap" for each key.
+        ref_image_keys: keys of reference images to inherit spatial metadata from. When provided, heatmaps will
+            have the same shape, affine, and spatial metadata as the reference images.
+        spatial_shape: spatial dimensions of output heatmaps. Can be:
+            - Single shape (tuple): applied to all keys
+            - List of shapes: one per key (must match keys length)
+        truncated: truncation distance for Gaussian kernel computation (in sigmas).
+        normalize: if True, normalize each heatmap's peak value to 1.0.
+        dtype: output data type for heatmaps. Defaults to np.float32.
+        allow_missing_keys: if True, don't raise error if some keys are missing in data.
+
+    Returns:
+        Dictionary with original data plus generated heatmaps at specified keys.
+
+    Raises:
+        ValueError: If heatmap_keys/ref_image_keys length doesn't match keys length.
+        ValueError: If no spatial shape can be determined (need spatial_shape or ref_image_keys).
+        ValueError: If input points have invalid shape (must be 2D or 3D).
+
     Notes:
         - Default heatmap_keys are generated as "{key}_heatmap" for each input key
         - Shape inference precedence: static spatial_shape > ref_image
         - Output shapes:
             - Non-batched points (N, D): (N, H, W[, D])
             - Batched points (B, N, D): (B, N, H, W[, D])
+        - When using ref_image_keys, heatmaps inherit affine and spatial metadata from reference
     """
 
     backend = GenerateHeatmap.backend
@@ -575,7 +598,7 @@ def __call__(self, data: Mapping[Hashable, Any]) -> dict[Hashable, Any]:
                 # Copy metadata if reference is MetaTensor
                 if isinstance(reference, MetaTensor) and isinstance(heatmap, MetaTensor):
                     heatmap.affine = reference.affine
-                    self._update_spatial_metadata(heatmap, reference)
+                    self._update_spatial_metadata(heatmap, shape)
             d[out_key] = heatmap
         return d
 
@@ -628,7 +651,7 @@ def _determine_shape(
             return static_shape
         points_t = convert_to_tensor(points, dtype=torch.float32, track_meta=False)
         if points_t.ndim not in (2, 3):
-            raise ValueError(self._ERR_INVALID_POINTS)
+            raise ValueError(f"{self._ERR_INVALID_POINTS} Got {points_t.ndim}D tensor.")
         spatial_dims = int(points_t.shape[-1])
         if ref_key is not None and ref_key in data:
             return self._shape_from_reference(data[ref_key], spatial_dims)
@@ -646,10 +669,8 @@ def _shape_from_reference(self, reference: Any, spatial_dims: int) -> tuple[int,
             return tuple(int(v) for v in reference.shape[-spatial_dims:])
         raise ValueError(self._ERR_REF_NO_SHAPE)
 
-    def _update_spatial_metadata(self, heatmap: MetaTensor, reference: MetaTensor) -> None:
-        """Update spatial metadata of heatmap based on its dimensions."""
-        # trailing dims after channel are spatial regardless of batch presence
-        spatial_shape = heatmap.shape[-(reference.ndim - 1) :]
+    def _update_spatial_metadata(self, heatmap: MetaTensor, spatial_shape: tuple[int, ...]) -> None:
+        """Set spatial_shape explicitly from resolved shape."""
         heatmap.meta["spatial_shape"] = tuple(int(v) for v in spatial_shape)
 
 
diff --git a/tests/transforms/test_generate_heatmapd.py b/tests/transforms/test_generate_heatmapd.py
@@ -128,10 +128,16 @@ def test_dict_static_shape(self, _, points, params, expected_shape, expected_dty
         self.assertEqual(heatmap.shape, expected_shape)
         self.assertEqual(heatmap.dtype, expected_dtype)
 
+        # Verify no NaN or Inf values
+        self.assertFalse(np.isnan(heatmap).any() or np.isinf(heatmap).any())
+
+        # Verify max value is 1.0 for normalized heatmaps
+        np.testing.assert_allclose(heatmap.max(), 1.0, rtol=1e-5)
+
     def test_dict_missing_shape_raises(self):
         # Without ref image or explicit spatial_shape, must raise
         transform = GenerateHeatmapd(keys="points", heatmap_keys="heatmap")
-        with self.assertRaises(ValueError):
+        with self.assertRaisesRegex(ValueError, "spatial_shape|ref_image_keys"):
             transform({"points": np.zeros((1, 2), dtype=np.float32)})
 
     @parameterized.expand(TEST_CASES_DTYPE)
@@ -203,6 +209,35 @@ def test_dict_multiple_keys(self):
         # Verify peaks are at different locations
         self.assertNotEqual(np.argmax(result["hm1"]), np.argmax(result["hm2"]))
 
+    def test_dict_mismatched_heatmap_keys_length(self):
+        """Test ValueError when heatmap_keys length doesn't match keys"""
+        with self.assertRaises(ValueError):
+            GenerateHeatmapd(
+                keys=["pts1", "pts2"],
+                heatmap_keys=["hm1", "hm2", "hm3"],  # Mismatch: 3 heatmap keys for 2 input keys
+                spatial_shape=(8, 8),
+            )
+
+    def test_dict_mismatched_ref_image_keys_length(self):
+        """Test ValueError when ref_image_keys length doesn't match keys"""
+        with self.assertRaises(ValueError):
+            GenerateHeatmapd(
+                keys=["pts1", "pts2"],
+                heatmap_keys=["hm1", "hm2"],
+                ref_image_keys=["img1", "img2", "img3"],  # Mismatch: 3 ref keys for 2 input keys
+                spatial_shape=(8, 8),
+            )
+
+    def test_dict_per_key_spatial_shape_mismatch(self):
+        """Test ValueError when per-key spatial_shape length doesn't match keys"""
+        with self.assertRaises(ValueError):
+            GenerateHeatmapd(
+                keys=["pts1", "pts2"],
+                heatmap_keys=["hm1", "hm2"],
+                spatial_shape=[(8, 8), (8, 8), (8, 8)],  # Mismatch: 3 shapes for 2 keys
+                sigma=1.0,
+            )
+
     def test_metatensor_points_with_ref(self):
         """Test MetaTensor points with reference image - documents current behavior"""
         from monai.data import MetaTensor
@@ -224,9 +259,8 @@ def test_metatensor_points_with_ref(self):
         self.assertIsInstance(heatmap, MetaTensor)
         self.assertEqual(tuple(heatmap.shape), (2, 8, 8, 8))
 
-        # Note: Currently the heatmap may inherit affine from points MetaTensor
-        # This test documents the current behavior
-        # Ideally, the heatmap should use the reference image's affine
+        # Heatmap should inherit affine from the reference image
+        assert_allclose(heatmap.affine, image.affine, type_test=False)
 
 
 if __name__ == "__main__":