add tests for unique_rows

mawright · mawright · commit 610c582b46b0 · 2025-07-21T18:33:08.000-07:00
diff --git a/pytorch_sparse_utils/indexing/unique.py b/pytorch_sparse_utils/indexing/unique.py
@@ -10,7 +10,7 @@ def unique_rows(tensor: Tensor, sorted: bool = True) -> Tensor:
     Args:
         tensor (Tensor): A 2D tensor of integer type.
         sorted (bool): Whether to sort the indices of unique rows before returning.
-            If False, returned indices will be in lexicographic order.
+            If False, returned indices will be in lexicographic order of the rows.
 
     Returns:
         Tensor: A 1D tensor whose elements are the indices of the unique rows of
@@ -43,17 +43,15 @@ def unique_rows(tensor: Tensor, sorted: bool = True) -> Tensor:
     max_vals = tensor.max(0).values
     min_vals = tensor.min(0).values
 
-    # Handle negative values by shifting to nonnegative
-    has_negs = min_vals < 0
-    if has_negs.any():
-        # Shift each column to be nonnegative
-        neg_shift = torch.where(has_negs, min_vals, min_vals.new_zeros([]))
-        tensor = tensor - neg_shift
-        max_vals = max_vals - neg_shift
-
     # Check for overflow problems
-    log_sum = (max_vals + 1).log().sum()
     INT64_MAX = 9223372036854775807
+    if (max_vals >= INT64_MAX).any():
+        raise OverflowError(
+            f"Tensor contains values at or near maximum int64 value ({INT64_MAX}), "
+            "which would lead to overflow errors when computing unique rows."
+        )
+
+    log_sum = (max_vals + 1).log().sum()
     log_max = torch.tensor(INT64_MAX, device=max_vals.device).log()
 
     if log_sum > log_max:
@@ -62,6 +60,14 @@ def unique_rows(tensor: Tensor, sorted: bool = True) -> Tensor:
             f"approx {log_sum.exp()} compared to max int64 value of {INT64_MAX}."
         )
 
+    # Handle negative values by shifting to nonnegative
+    has_negs = min_vals < 0
+    if has_negs.any():
+        # Shift each column to be nonnegative
+        neg_shift = torch.where(has_negs, min_vals, min_vals.new_zeros([]))
+        tensor = tensor - neg_shift
+        max_vals = max_vals - neg_shift
+
     tensor_flat, _ = flatten_nd_indices(tensor.T.long(), max_vals)
     tensor_flat: Tensor = tensor_flat.squeeze(0)
 
diff --git a/tests/indexing/test_misc.py b/tests/indexing/test_misc.py
@@ -0,0 +1,88 @@
+import torch
+import pytest
+
+from pytorch_sparse_utils.indexing.unique import unique_rows
+from pytorch_sparse_utils.indexing.scatter import scatter_to_sparse_tensor
+
+@pytest.mark.cpu_and_cuda
+class TestUniqueRows:
+    def test_basic_functionality(self, device):
+        tensor = torch.tensor([
+            [1, 2, 3],
+            [1, 2, 3],
+            [7, 8, 9],
+            [4, 5, 6],
+            [4, 5, 6],
+        ], device=device
+        )
+        unique_inds = unique_rows(tensor)
+
+        assert torch.equal(unique_inds, torch.tensor([0, 2, 3], device=device))
+
+    def test_negative_values(self, device):
+        tensor = torch.tensor(
+            [
+                [1, 2, 3],
+                [-1, -2, -3],
+                [-1, -2, -3],
+                [4, -10, 3],
+            ], device=device
+        )
+        unique_inds = unique_rows(tensor)
+
+        assert torch.equal(unique_inds, torch.tensor([0, 1, 3], device=device))
+
+    def test_sorted(self, device):
+        tensor = torch.tensor([
+            [-1, -3, 5],
+            [3, 20, 44],
+            [1, 2, 3],
+            [-1, -3, 5],
+            [3, 20, 44]
+        ], device=device)
+
+        unique_unsorted = unique_rows(tensor, sorted=False)
+        unique_sorted = unique_rows(tensor, sorted=True)
+
+        assert not torch.equal(unique_sorted, unique_unsorted)
+        assert torch.equal(unique_sorted, torch.tensor([0, 1, 2], device=device))
+        assert torch.equal(unique_unsorted, torch.tensor([0, 2, 1], device=device))
+
+    def test_error_wrong_dim(self, device):
+        tensor = torch.randint(0, 100, size=(10,), device=device)
+        with pytest.raises(
+            (ValueError, torch.jit.Error),  # pyright: ignore[reportArgumentType]
+            match="Expected a 2D tensor"
+        ):
+            unique_rows(tensor)
+
+    def test_error_not_int(self, device):
+        tensor_float = torch.randn(10, 10, device=device)
+        with pytest.raises(
+            (ValueError, torch.jit.Error),  # pyright: ignore[reportArgumentType]
+            match="Expected integer tensor"
+        ):
+            unique_rows(tensor_float)
+
+        tensor_complex = torch.randn(10, 10, device=device, dtype=torch.complex64)
+        with pytest.raises(
+            (ValueError, torch.jit.Error),  # pyright: ignore[reportArgumentType]
+            match="Expected integer tensor"
+        ):
+            unique_rows(tensor_complex)
+
+    def test_error_overflow(self, device):
+        tensor = torch.randint(-100, 100, size=(10, 4), device=device, dtype=torch.long)
+        tensor[0, :] = torch.iinfo(torch.long).max
+        with pytest.raises(
+            (OverflowError, torch.jit.Error),  # pyright: ignore[reportArgumentType]
+            match="Tensor contains values at or near"
+        ):
+            unique_rows(tensor)
+
+        tensor[0, :] = torch.iinfo(torch.long).max - 100
+        with pytest.raises(
+            (OverflowError, torch.jit.Error),  # pyright: ignore[reportArgumentType]
+            match="would cause integer overflow"
+        ):
+            unique_rows(tensor)