Merge pull request #49 from ruby/mmtk-fork-ractor

peterzhu2118 · web-flow · commit 1794ea058471 · 2025-11-18T18:08:25.000-08:00
Fix forking with multiple Ractors
diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c
@@ -32,6 +32,7 @@ struct objspace {
     unsigned long live_ractor_cache_count;
 
     pthread_mutex_t mutex;
+    rb_atomic_t mutator_blocking_count;
     bool world_stopped;
     pthread_cond_t cond_world_stopped;
     pthread_cond_t cond_world_started;
@@ -131,7 +132,9 @@ rb_mmtk_block_for_gc(MMTk_VMMutatorThread mutator)
     struct objspace *objspace = rb_gc_get_objspace();
 
     size_t starting_gc_count = objspace->gc_count;
+    RUBY_ATOMIC_INC(objspace->mutator_blocking_count);
     int lock_lev = RB_GC_VM_LOCK();
+    RUBY_ATOMIC_DEC(objspace->mutator_blocking_count);
     int err;
     if ((err = pthread_mutex_lock(&objspace->mutex)) != 0) {
         rb_bug("ERROR: cannot lock objspace->mutex: %s", strerror(err));
@@ -1049,7 +1052,25 @@ rb_gc_impl_before_fork(void *objspace_ptr)
 {
     struct objspace *objspace = objspace_ptr;
 
+  retry:
     objspace->fork_hook_vm_lock_lev = RB_GC_VM_LOCK();
+    rb_gc_vm_barrier();
+
+    /* At this point, we know that all the Ractors are paused because of the
+     * rb_gc_vm_barrier above. Since rb_mmtk_block_for_gc is a barrier point,
+     * one or more Ractors could be paused there. However, mmtk_before_fork is
+     * not compatible with that because it assumes that the MMTk workers are idle,
+     * but the workers are not idle because they are busy working on a GC.
+     *
+     * This essentially implements a trylock. It will optimistically lock but will
+     * release the lock if it detects that any other Ractors are waiting in
+     * rb_mmtk_block_for_gc.
+     */
+    rb_atomic_t mutator_blocking_count = RUBY_ATOMIC_LOAD(objspace->mutator_blocking_count);
+    if (mutator_blocking_count != 0) {
+        RB_GC_VM_UNLOCK(objspace->fork_hook_vm_lock_lev);
+        goto retry;
+    }
 
     mmtk_before_fork();
 }