meta-pytorch · felipemello1 · Jun 4, 2025 · Jun 4, 2025 · Jun 6, 2025 · Jun 11, 2025
diff --git a/recipes/configs/llama3_2/3B_full.yaml b/recipes/configs/llama3_2/3B_full.yaml
@@ -24,23 +24,32 @@ output_dir: /tmp/torchtune/llama3_2_3B/full # /tmp may be deleted by your system
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Llama-3.2-3B-Instruct/original/tokenizer.model
-  max_seq_len: null
+  max_seq_len: 4096
 
-# Dataset and Sampler
+# Dataloader
+dataloader:
+  batch_size: 16
+  # num_workers and pin_memory can be added here if needed
+
+# Dataset - now a list to support multiple weighted sources
 dataset:
-  _component_: torchtune.datasets.alpaca_cleaned_dataset
-  packed: False  # True increases speed
-  split: train[:95%]
-seed: null
-shuffle: True
-batch_size: 4
-
-# Validation
-run_val_every_n_steps: null  # Change to an integer to enable validation every N steps
-dataset_val:
-  _component_: torchtune.datasets.alpaca_cleaned_dataset
-  split: train[95%:]
-batch_size_val: ${batch_size}
+  - _component_: torchtune.datasets.slimorca_iterable_dataset
+    shuffle_buffer_size: 1000
+    weight: 0.8
+    split: train[:5%] # simulate 1 epoch quickly
+  - _component_: torchtune.datasets.alpaca_iterable_dataset
+    shuffle_buffer_size: 1000
+    weight: 0.2
+    split: train[:5%] # simulate 1 epoch quickly
+
+# On-the-fly packing
+# Set packing_strategy: null to disable packing
+packing_strategy:
+  _component_: torchtune.datasets.TextPacker
+
+seed: 42
+
+# Validation not supported yet with iterable datasets
 
 # Model Arguments
 model:
@@ -65,10 +74,11 @@ optimizer:
 loss:
   _component_: torchtune.modules.loss.LinearCrossEntropyLoss
 
-# Training
-epochs: 1
-max_steps_per_epoch: null
-gradient_accumulation_steps: 8  # Use to increase effective batch size
+# Training - now step-based
+num_training_steps: 100 # Total number of training steps to run
+save_every_n_steps: 200 # Save a checkpoint every N steps. Using 200 to avoid ckpt.
+gradient_accumulation_steps: 1
+dataset_metrics_log_freq: 5 # Log dataset-specific metrics every N steps
 
 # Environment
 device: cuda
@@ -83,7 +93,7 @@ optimizer_in_bwd: False  # True saves memory. Requires gradient_accumulation_ste
 
 # Logging
 metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
+  _component_: torchtune.training.metric_logging.WandBLogger
   log_dir: ${output_dir}/logs
 log_every_n_steps: 1
 log_peak_memory_stats: True