Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@ data/raw/*
data/processed/validation/*
data/processed/training/*
data/processed/testing/*
data/processed/validation_interactive/*
data/processed/pop_play/*
data/processed/hand_designed/*
analyze/figures/*
figures/
checkpoints/
figures_data/
data/other/*
wosac/
data/processed/validation_random/*

# Logging
/wandb
Expand Down
6 changes: 1 addition & 5 deletions baselines/ppo/config/ppo_base_puffer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
road_map_obs: true
partner_obs: true
norm_obs: true
add_reference_path: false
add_reference_pos_xy: false
remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
reward_type: "weighted_combination" # Options: "weighted_combination", "reward_conditioned"
Expand All @@ -42,11 +42,7 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
obs_radius: 50.0 # Visibility radius of the agents
action_space_steer_disc: 13
action_space_accel_disc: 7
# Versatile Behavior Diffusion (VBD): This will slow down training
use_vbd: false
init_steps: 0
vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
vbd_in_obs: false

wandb:
entity: ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,66 @@ mode: "train"
use_rnn: false
eval_model_path: null
baseline: false
data_dir: data/processed/wosac/validation_json_1
data_dir: data/processed/wosac/validation_interactive/json
continue_training: false
model_cpt: null

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 100 # Number of parallel environments
num_worlds: 10 # Number of parallel environments
k_unique_scenes: 1 # Number of unique scenes to sample from
max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
norm_obs: true
add_previous_action: true

# Guidance through expert suggestions
guidance: true # If true, the agent will be guided by expert suggestions
guidance_mode: "log_replay" # Options: "log_replay", "vbd_amortized", "vbd_online"
add_reference_pos_xy: true # If true, a reference path is added to the ego observation
add_reference_speed: true # If true, the reference speeds are added to the ego observation
add_reference_heading: true # If true, the reference heading are added to the ego observation
smoothen_trajectory: true # If true, the velocities and headings are smoothed
guidance_dropout_prob: 0.0 # Probability of out guidance points

# Reward function
reward_type: "guided_autonomy"
collision_weight: -0.1
off_road_weight: -0.1
guidance_speed_weight: 0.005
guidance_heading_weight: 0.005
smoothness_weight: 0.0001

init_mode: wosac_train
dynamics_model: "classic"
remove_non_vehicles: false
collision_behavior: "ignore"
goal_behavior: "ignore"
reward_type: "follow_waypoints"
waypoint_distance_scale: 0.01
speed_distance_scale: 0.01
jerk_smoothness_scale: 0.001

init_mode: all_non_trivial #womd_tracks_to_predict
dynamics_model: "classic"
polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
obs_radius: 50.0 # Visibility radius of the agents
action_space_steer_disc: 15
action_space_accel_disc: 11
view_cone_half_angle: 3.14159 # -> 360° total view field
view_occlude_objects: false
action_space_steer_disc: 13
action_space_accel_disc: 7
action_space_head_tilt_disc: 1
vehicle_steer_range: [-1.57, 1.57] # pi/2 = 1.57, pi/3 = 1.05
vehicle_accel_range: [-4.0, 4.0]
head_tilt_action_range: [-0.7854, 0.7854] # radians (±45°)
init_steps: 0 # Warmup steps
goal_achieved_weight: 0.0
collision_weight: -0.2
off_road_weight: -0.2

# Versatile Behavior Diffusion (VBD)
use_vbd: false
init_steps: 0
vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
vbd_in_obs: false

# Planning guidance
add_reference_path: true # If true, a reference path is added to the ego observation
add_reference_speed: true # If true, the reference speed (scalar) is added to the ego observation
prob_reference_dropout: 0.0 # Value between 0 and 1, probability of a reference point to be zeroed out

wandb:
entity: ""
project: "humanlike"
group: "debug"
group: ""
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

train:
exp_id: waypoint_rs # Set dynamically in the script if needed
exp_id: guidance_logs # Set dynamically in the script if needed
seed: 42
cpu_offload: false
device: "cuda" # Dynamically set to cuda if available, else cpu
Expand All @@ -64,56 +71,58 @@ train:

# # # Data sampling # # #
resample_scenes: false
resample_dataset_size: 500 # Number of unique scenes to sample from
resample_interval: 2_000_000
resample_dataset_size: 10_000 # Number of unique scenes to sample from
resample_interval: 5_000_000
sample_with_replacement: true
shuffle_dataset: true
file_prefix: ""

# # # PPO # # #
torch_deterministic: false
total_timesteps: 2_000_000_000
batch_size: 131072
total_timesteps: 4_000_000_000
batch_size: 65536
minibatch_size: 8192
learning_rate: 3e-4
anneal_lr: true
gamma: 0.99
gamma: 1.0
gae_lambda: 0.95
update_epochs: 4
norm_adv: true
clip_coef: 0.2
clip_vloss: false
vf_clip_coef: 0.2
ent_coef: 0.005
ent_coef: 0.01
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: null

# # # Logging # # #
log_window: 500
log_window: 100
track_realism_metrics: true # Log human-like metrics
track_n_worlds: 3 # Number of worlds to track

# # # Network # # #
network:
embed_dim: 64 # Embedding of the input features
embed_dim: 256 # Embedding of the input features
dropout: 0.01
class_name: "Agent"
num_parameters: 0 # Total trainable parameters, to be filled at runtime

# # # Checkpointing # # #
checkpoint_interval: 250 # Save policy every k iterations
checkpoint_interval: 200 # Save policy every k iterations
checkpoint_path: "./runs"

# # # Rendering # # #
render: true # Determines whether to render the environment (note: will slow down training)
render_3d: false # Render simulator state in 3d or 2d
render_interval: 200 # Render every k iterations
render_interval: 10 # Render every k iterations
render_every_t: 5 # Render every k timesteps
render_k_scenarios: 1 # Number of scenarios to render
render_agent_idx: [0] # Agent observations to render
render_format: "mp4" # Options: gif, mp4
render_fps: 20 # Frames per second
render_fps: 5 # Frames per second
zoom_radius: 100
plot_waypoints: true
plot_guidance_pos_xy: true

vec:
backend: "native" # Only native is currently supported
Expand Down
15 changes: 5 additions & 10 deletions baselines/ppo/config/ppo_population.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ model_cpt: null

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 100 # Number of parallel environments
k_unique_scenes: 100 # Number of unique scenes to sample from
num_worlds: 10 # Number of parallel environments
k_unique_scenes: 10 # Number of unique scenes to sample from
max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
norm_obs: true
remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
reward_type: "reward_conditioned" # Options: "weighted_combination", "reward_conditioned", "follow_waypoints"
reward_type: "reward_conditioned" # Options: "weighted_combination", "reward_conditioned", "guided_autonomy"
collision_weight: -0.75
off_road_weight: -0.75
goal_achieved_weight: 1.0
Expand All @@ -42,16 +42,11 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
action_space_steer_disc: 13
action_space_accel_disc: 7
init_steps: 0 # Warmup steps
# Versatile Behavior Diffusion (VBD): This will slow down training
use_vbd: false
vbd_model_path: "gpudrive/integrations/vbd/weights/epoch=18.ckpt"
vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
vbd_in_obs: false

wandb:
entity: ""
project: "kshotagents"
group: "separate_actor_critic"
group: "debug_mini"
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

Expand Down Expand Up @@ -110,7 +105,7 @@ train:
render_format: "mp4" # Options: gif, mp4
render_fps: 20 # Frames per second
zoom_radius: 100
plot_waypoints: true
plot_guidance_pos_xy: true

vec:
backend: "native" # Only native is currently supported
Expand Down
Loading