[BugFix] Fix cuda cache empty in GRPO scripts (#3016)

vmoens · web-flow · commit e36d562ced48 · 2025-06-18T13:47:07.000+01:00
diff --git a/sota-implementations/grpo/grpo-async.py b/sota-implementations/grpo/grpo-async.py
@@ -354,7 +354,8 @@ def train(
             with timeit("update_policy_weights"):
                 torchrl_logger.info("Updating policy weights...")
                 weight_updater.push_weights(policy_training)
-                torch.cuda.empty_cache()
+                # TODO: do we need this? Does it interfere with other processes?
+                # torch.cuda.empty_cache()
                 gc.collect()
 
         # Checkpointing disabled to prevent disk space issues
@@ -380,7 +381,8 @@ def train(
 
         # Clear memory
         del loss_val
-        torch.cuda.empty_cache()
+        # TODO: do we need this? Does it interfere with other processes?
+        # torch.cuda.empty_cache()
         gc.collect()
 
     pbar.close()
diff --git a/sota-implementations/grpo/grpo-sync.py b/sota-implementations/grpo/grpo-sync.py
@@ -288,7 +288,8 @@ def train(
 
                 # Clear memory
                 del loss_val
-                torch.cuda.empty_cache()
+                # TODO: do we need this? Does it interfere with other processes?
+                # torch.cuda.empty_cache()
                 gc.collect()
 
                 # Update metrics
@@ -387,7 +388,8 @@ def train(
         with timeit("update_policy_weights"):
             torchrl_logger.info("Updating policy weights...")
             weight_updater.push_weights(policy_training)
-            torch.cuda.empty_cache()
+            # TODO: do we need this? Does it interfere with other processes?
+            # torch.cuda.empty_cache()
             gc.collect()
 
         timeit.print(prefix="timeit")
diff --git a/torchrl/collectors/llm/ray_collector.py b/torchrl/collectors/llm/ray_collector.py
@@ -69,6 +69,8 @@ class RayLLMCollector(LLMCollector):
                >>> for data in collector:  # non-blocking
                ...     # expensive operation - collector is collecting data
 
+            This is somehwat equivalent to using :class:`~torchrl.collectors.MultiSyncDataCollector` (`sync_iter=True`) or
+            :class:`~torchrl.collectors.MultiAsyncDataCollector` (`sync_iter=False`).
             Defaults to `True`.
         verbose (bool, optional): if ``True``, the collector will print progress information.
             Defaults to `False`.