Emerge-Lab · aaravpandya · Apr 7, 2025 · Apr 20, 2025 · Apr 20, 2025 · Apr 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -27,11 +27,15 @@ data/raw/*
 data/processed/validation/*
 data/processed/training/*
 data/processed/testing/*
+data/processed/validation_interactive/*
 data/processed/pop_play/*
 data/processed/hand_designed/*
-analyze/figures/*
+figures/
+checkpoints/
+figures_data/
 data/other/*
 wosac/
+data/processed/validation_random/*
 
 # Logging
 /wandb

diff --git a/baselines/ppo/config/ppo_base_puffer.yaml b/baselines/ppo/config/ppo_base_puffer.yaml
@@ -15,7 +15,7 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
   road_map_obs: true
   partner_obs: true
   norm_obs: true
-  add_reference_path: false
+  add_reference_pos_xy: false
   remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
   lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
   reward_type: "weighted_combination" # Options: "weighted_combination", "reward_conditioned"
@@ -42,11 +42,7 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
   obs_radius: 50.0 # Visibility radius of the agents
   action_space_steer_disc: 13
   action_space_accel_disc: 7
-  # Versatile Behavior Diffusion (VBD): This will slow down training
-  use_vbd: false
   init_steps: 0
-  vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
-  vbd_in_obs: false
 
 wandb:
   entity: ""

diff --git a/baselines/ppo/config/ppo_waypoint.yaml → ...lines/ppo/config/ppo_guided_autonomy.yaml b/baselines/ppo/config/ppo_waypoint.yaml → ...lines/ppo/config/ppo_guided_autonomy.yaml
@@ -2,59 +2,66 @@ mode: "train"
 use_rnn: false
 eval_model_path: null
 baseline: false
-data_dir: data/processed/wosac/validation_json_1
+data_dir: data/processed/wosac/validation_interactive/json
 continue_training: false
 model_cpt: null
 
 environment: # Overrides default environment configs (see pygpudrive/env/config.py)
   name: "gpudrive"
-  num_worlds: 100 # Number of parallel environments
+  num_worlds: 10 # Number of parallel environments
   k_unique_scenes: 1 # Number of unique scenes to sample from
-  max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
+  max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
   ego_state: true
   road_map_obs: true
   partner_obs: true
   norm_obs: true
+  add_previous_action: true
+
+  # Guidance through expert suggestions
+  guidance: true # If true, the agent will be guided by expert suggestions
+  guidance_mode: "log_replay" # Options: "log_replay", "vbd_amortized", "vbd_online"
+  add_reference_pos_xy: true # If true, a reference path is added to the ego observation
+  add_reference_speed: true # If true, the reference speeds are added to the ego observation
+  add_reference_heading: true # If true, the reference heading are added to the ego observation
+  smoothen_trajectory: true # If true, the velocities and headings are smoothed
+  guidance_dropout_prob: 0.0 # Probability of out guidance points
+
+  # Reward function
+  reward_type: "guided_autonomy"
+  collision_weight: -0.1
+  off_road_weight: -0.1
+  guidance_speed_weight: 0.005
+  guidance_heading_weight: 0.005
+  smoothness_weight: 0.0001
+
+  init_mode: wosac_train
+  dynamics_model: "classic"
   remove_non_vehicles: false
   collision_behavior: "ignore"
   goal_behavior: "ignore"
-  reward_type: "follow_waypoints"
-  waypoint_distance_scale: 0.01
-  speed_distance_scale: 0.01
-  jerk_smoothness_scale: 0.001
-
-  init_mode: all_non_trivial #womd_tracks_to_predict
-  dynamics_model: "classic"
   polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
   sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
   obs_radius: 50.0 # Visibility radius of the agents
-  action_space_steer_disc: 15
-  action_space_accel_disc: 11
+  view_cone_half_angle: 3.14159 # -> 360° total view field
+  view_occlude_objects: false
+  action_space_steer_disc: 13
+  action_space_accel_disc: 7
+  action_space_head_tilt_disc: 1
+  vehicle_steer_range: [-1.57, 1.57]  # pi/2 = 1.57, pi/3 = 1.05
+  vehicle_accel_range: [-4.0, 4.0]
+  head_tilt_action_range: [-0.7854, 0.7854] # radians (±45°)
   init_steps: 0 # Warmup steps
   goal_achieved_weight: 0.0
-  collision_weight: -0.2
-  off_road_weight: -0.2
-
-  # Versatile Behavior Diffusion (VBD)
-  use_vbd: false
-  init_steps: 0
-  vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
-  vbd_in_obs: false
-
-  # Planning guidance
-  add_reference_path: true # If true, a reference path is added to the ego observation
-  add_reference_speed: true # If true, the reference speed (scalar) is added to the ego observation
-  prob_reference_dropout: 0.0 # Value between 0 and 1, probability of a reference point to be zeroed out
 
 wandb:
   entity: ""
   project: "humanlike"
-  group: "debug"
+  group: ""
   mode: "online" # Options: online, offline, disabled
   tags: ["ppo", "ff"]
 
 train:
-  exp_id: waypoint_rs # Set dynamically in the script if needed
+  exp_id: guidance_logs # Set dynamically in the script if needed
   seed: 42
   cpu_offload: false
   device: "cuda" # Dynamically set to cuda if available, else cpu
@@ -64,56 +71,58 @@ train:
 
   # # # Data sampling # # #
   resample_scenes: false
-  resample_dataset_size: 500 # Number of unique scenes to sample from
-  resample_interval: 2_000_000
+  resample_dataset_size: 10_000 # Number of unique scenes to sample from
+  resample_interval: 5_000_000
   sample_with_replacement: true
   shuffle_dataset: true
   file_prefix: ""
 
   # # # PPO # # #
   torch_deterministic: false
-  total_timesteps: 2_000_000_000
-  batch_size: 131072
+  total_timesteps: 4_000_000_000
+  batch_size: 65536
   minibatch_size: 8192
   learning_rate: 3e-4
   anneal_lr: true
-  gamma: 0.99
+  gamma: 1.0
   gae_lambda: 0.95
   update_epochs: 4
   norm_adv: true
   clip_coef: 0.2
   clip_vloss: false
   vf_clip_coef: 0.2
-  ent_coef: 0.005
+  ent_coef: 0.01
   vf_coef: 0.5
   max_grad_norm: 0.5
   target_kl: null
 
   # # # Logging # # #
-  log_window: 500
+  log_window: 100
   track_realism_metrics: true # Log human-like metrics
   track_n_worlds: 3 # Number of worlds to track
 
   # # # Network # # #
   network:
-    embed_dim: 64 # Embedding of the input features
+    embed_dim: 256 # Embedding of the input features
     dropout: 0.01
     class_name: "Agent"
     num_parameters: 0 # Total trainable parameters, to be filled at runtime
 
   # # # Checkpointing # # #
-  checkpoint_interval: 250 # Save policy every k iterations
+  checkpoint_interval: 200 # Save policy every k iterations
   checkpoint_path: "./runs"
 
   # # # Rendering # # #
   render: true # Determines whether to render the environment (note: will slow down training)
   render_3d: false # Render simulator state in 3d or 2d
-  render_interval: 200 # Render every k iterations
+  render_interval: 10 # Render every k iterations
+  render_every_t: 5 # Render every k timesteps
   render_k_scenarios: 1 # Number of scenarios to render
+  render_agent_idx: [0] # Agent observations to render
   render_format: "mp4" # Options: gif, mp4
-  render_fps: 20 # Frames per second
+  render_fps: 5 # Frames per second
   zoom_radius: 100
-  plot_waypoints: true
+  plot_guidance_pos_xy: true
 
 vec:
   backend: "native" # Only native is currently supported

diff --git a/baselines/ppo/config/ppo_population.yaml b/baselines/ppo/config/ppo_population.yaml
@@ -8,16 +8,16 @@ model_cpt: null
 
 environment: # Overrides default environment configs (see pygpudrive/env/config.py)
   name: "gpudrive"
-  num_worlds: 100 # Number of parallel environments
-  k_unique_scenes: 100 # Number of unique scenes to sample from
+  num_worlds: 10 # Number of parallel environments
+  k_unique_scenes: 10 # Number of unique scenes to sample from
   max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
   ego_state: true
   road_map_obs: true
   partner_obs: true
   norm_obs: true
   remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
   lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
-  reward_type: "reward_conditioned" # Options: "weighted_combination", "reward_conditioned", "follow_waypoints"
+  reward_type: "reward_conditioned" # Options: "weighted_combination", "reward_conditioned", "guided_autonomy"
   collision_weight: -0.75
   off_road_weight: -0.75
   goal_achieved_weight: 1.0
@@ -42,16 +42,11 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
   action_space_steer_disc: 13
   action_space_accel_disc: 7
   init_steps: 0 # Warmup steps
-  # Versatile Behavior Diffusion (VBD): This will slow down training
-  use_vbd: false
-  vbd_model_path: "gpudrive/integrations/vbd/weights/epoch=18.ckpt"
-  vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
-  vbd_in_obs: false
 
 wandb:
   entity: ""
   project: "kshotagents"
-  group: "separate_actor_critic"
+  group: "debug_mini"
   mode: "online" # Options: online, offline, disabled
   tags: ["ppo", "ff"]
 
@@ -110,7 +105,7 @@ train:
   render_format: "mp4" # Options: gif, mp4
   render_fps: 20 # Frames per second
   zoom_radius: 100
-  plot_waypoints: true
+  plot_guidance_pos_xy: true
 
 vec:
   backend: "native" # Only native is currently supported