bugfix

mawright · mawright · commit 8baa4eb627af · 2025-07-29T11:43:11.000-07:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,6 +22,7 @@ tests = [
     "pytest-env",
     "hypothesis",
     "pytest-cov",
+    "pytest-xdist",
 ]
 minkowskiengine = ["MinkowskiEngine"]
 spconv = ["spconv"]
diff --git a/pytorch_sparse_utils/utils/batch_topk.py b/pytorch_sparse_utils/utils/batch_topk.py
@@ -81,8 +81,9 @@ def batch_topk(
             each concatenated subsequence. Default: 0 (sequence dimension).
         largest (bool, optional): If True, returns the indices of the largest elements.
             If False, returns those of the smallest elements. Default: True.
-        sorted (bool, optional): If True, returns the elements in sorted order.
-            Default: True.
+        sorted (bool, optional): If True, always returns the elements in sorted order.
+            For technical reasons, the returned elements may be sorted in some cases
+            even when False. Default: True.
         return_values (bool, optional): If True, the output namedtuple will include the
             topk values in addition to the indices and offsets. Default: False.
 
@@ -213,8 +214,8 @@ def batch_topk(
         topk_dim = dim + 1  # account for new leading batch dim
 
         values_all, indices_all = tensor.reshape(batch_shape).topk(
-            k_max_int, topk_dim, largest=largest, sorted=sorted
-        )
+            k_max_int, topk_dim, largest=largest, sorted=True
+        )  # Need to be sorted to be able to select first k for each subseq
 
         # If topk is along sequence length, need to add offsets to indices
         # to globalize them
diff --git a/tests/utils/test_batch_topk.py b/tests/utils/test_batch_topk.py
@@ -2,18 +2,19 @@
 
 import pytest
 import torch
-from hypothesis import HealthCheck, given, settings
+from hypothesis import HealthCheck, example, given, settings
 from hypothesis import strategies as st
 from torch import Tensor
 
+from pytorch_sparse_utils.batching import (
+    batch_offsets_to_seq_lengths,
+    seq_lengths_to_batch_offsets,
+)
 from pytorch_sparse_utils.utils import (
-    batch_topk,
     BatchTopK,
+    batch_topk,
     unpack_batch_topk,
 )
-from pytorch_sparse_utils.batching import (
-    seq_lengths_to_batch_offsets,
-)
 
 
 # Helper utils
@@ -242,6 +243,17 @@ def test_negative_k_raises(self, device):
             batch_topk(t, off, k=-1)
 
     # Property-based test
+    @example(
+        params={
+            "seq_lens": [3, 3],
+            "extra_dims": [],
+            "dim": 0,
+            "k": [1, 3],
+            "largest": False,
+            "sorted_": False,
+            "seed": 0,
+        },
+    )
     @settings(deadline=None, suppress_health_check=[HealthCheck.differing_executors])
     @given(params=batch_topk_inputs())
     def test_property(self, params, device):
@@ -275,6 +287,14 @@ def test_property(self, params, device):
         else:  # tensor
             k_per_batch = params["k"].tolist()
 
+        # Determine if batch_topk will need to actually sort indices even if
+        # sorted = False
+        if not params["sorted_"]:
+            n_seq_lengths = batch_offsets_to_seq_lengths(offsets).unique()
+            if n_seq_lengths.numel() == 1:
+                params["sorted_"] = True
+
+
         ref_idx, ref_off, ref_vals = topk_reference(
             tensor_ref,
             offsets,

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ tests = [`
`22`	`22`	`"pytest-env",`
`23`	`23`	`"hypothesis",`
`24`	`24`	`"pytest-cov",`
	`25`	`+ "pytest-xdist",`
`25`	`26`	`]`
`26`	`27`	`minkowskiengine = ["MinkowskiEngine"]`
`27`	`28`	`spconv = ["spconv"]`