diff --git a/verl/trainer/ppo/ray_trainer.py b/verl/trainer/ppo/ray_trainer.py index 02ac4d67185..5f33f6e3af3 100644 --- a/verl/trainer/ppo/ray_trainer.py +++ b/verl/trainer/ppo/ray_trainer.py @@ -1105,9 +1105,7 @@ def fit(self): batch = batch.union(reward_tensor) if self.config.reward_model.launch_reward_fn_async: - future_reward = compute_reward_async.remote( - data=batch, config=self.config, tokenizer=self.tokenizer - ) + future_reward = compute_reward_async.remote(data=batch, reward_fn=self.reward_fn) else: reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.reward_fn)