Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions baselines/ppo/config/ppo_base_puffer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.

wandb:
entity: ""
project: "gpudrive"
group: "test"
project: "adv_filter"
group: "testing"
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

train:
exp_id: PPO # Set dynamically in the script if needed
exp_id: adv_filter # Set dynamically in the script if needed
seed: 42
cpu_offload: false
device: "cuda" # Dynamically set to cuda if available, else cpu
Expand All @@ -63,7 +63,7 @@ train:
torch_deterministic: false
total_timesteps: 1_000_000_000
batch_size: 131_072
minibatch_size: 8192
num_minibatches: 16
learning_rate: 3e-4
anneal_lr: false
gamma: 0.99
Expand All @@ -78,6 +78,10 @@ train:
max_grad_norm: 0.5
target_kl: null
log_window: 1000
# Advantage filtering
apply_advantage_filter: true
initial_th_factor: 0.01
beta: 0.25

# # # Network # # #
network:
Expand Down
13 changes: 10 additions & 3 deletions baselines/ppo/ppo_pufferlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
"max": 1e-1,
},
"batch_size": {"values": [512, 1024, 2048]},
"minibatch_size": {"values": [128, 256, 512]},
"num_minibatches": {"values": [4, 8, 16]},
},
),
project=project,
Expand Down Expand Up @@ -186,9 +186,13 @@ def run(
ent_coef: Annotated[Optional[float], typer.Option(help="Entropy coefficient")] = None,
update_epochs: Annotated[Optional[int], typer.Option(help="The number of epochs for updating the policy")] = None,
batch_size: Annotated[Optional[int], typer.Option(help="The batch size for training")] = None,
minibatch_size: Annotated[Optional[int], typer.Option(help="The minibatch size for training")] = None,
num_minibatches: Annotated[Optional[int], typer.Option(help="The number of minibatches for training")] = None,
gamma: Annotated[Optional[float], typer.Option(help="The discount factor for rewards")] = None,
vf_coef: Annotated[Optional[float], typer.Option(help="Weight for vf_loss")] = None,
# Advantage filtering
apply_advantage_filter: Annotated[Optional[int], typer.Option(help="Whether to use advantage filter; 0 or 1")] = None,
initial_th_factor: Annotated[Optional[float], typer.Option(help="Initial threshold factor for training")] = None,
beta: Annotated[Optional[float], typer.Option(help="Beta parameter for training")] = None,
# Wandb logging options
project: Annotated[Optional[str], typer.Option(help="WandB project name")] = None,
entity: Annotated[Optional[str], typer.Option(help="WandB entity name")] = None,
Expand Down Expand Up @@ -238,10 +242,13 @@ def run(
"ent_coef": ent_coef,
"update_epochs": update_epochs,
"batch_size": batch_size,
"minibatch_size": minibatch_size,
"num_minibatches": num_minibatches,
"render": None if render is None else bool(render),
"gamma": gamma,
"vf_coef": vf_coef,
"apply_advantage_filter": apply_advantage_filter,
"initial_th_factor": initial_th_factor,
"beta": beta,
}
config.train.update(
{k: v for k, v in train_config.items() if v is not None}
Expand Down
Loading