Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
901723f
rfc
Jun 4, 2025
ab02d75
Revert "rfc"
Jun 4, 2025
1c0117b
Merge branch 'main' of https://github.com/pytorch/torchtune
Jun 6, 2025
1cc6946
Merge branch 'main' of https://github.com/pytorch/torchtune
Jun 11, 2025
2a2efa2
add packed functions
felipemello1 Jun 11, 2025
6e14b06
enable on full recipe
felipemello1 Jun 11, 2025
f9db469
fix imports + formatting
Jun 11, 2025
ff6fdbe
add max_steps_per_epoch requirement
felipemello1 Jun 11, 2025
5e447ab
address blockers
Jun 11, 2025
7a1dfa5
Merge branch 'main' into online_packing
Jun 11, 2025
1ffd459
Merge branch 'main' into online_packing
Jun 11, 2025
13cda28
small fixes
felipemello1 Jun 12, 2025
d26769c
add md doc
felipemello1 Jun 12, 2025
20cfa80
Merge remote-tracking branch 'refs/remotes/origin/online_packing' int…
felipemello1 Jun 12, 2025
59b8cab
update comments
felipemello1 Jun 12, 2025
5d7d496
update comments
felipemello1 Jun 12, 2025
e193926
update comment
felipemello1 Jun 12, 2025
40d79f4
update comment
felipemello1 Jun 12, 2025
3cab533
first commit
felipemello1 Jun 25, 2025
2212b19
update tests
felipemello1 Jun 25, 2025
4345832
Merge remote-tracking branch 'joecummings/impl-step-based-ckpt' into …
felipemello1 Jun 25, 2025
2eb68b6
linter
Jun 25, 2025
2e51e04
tests pass
Jun 25, 2025
93fa743
it works
Jun 26, 2025
aa9e6f4
remove code
Jun 26, 2025
a5e7234
Merge branch 'iterable_dataset_final' into online_packing
Jun 26, 2025
55be775
adjust pack to have metrics
Jun 26, 2025
382c4e9
remove comment
Jun 26, 2025
5b188ed
update metrics to use handlers
felipemello1 Jul 2, 2025
2eab08d
remove file after refactoring
felipemello1 Jul 2, 2025
58491f1
add distributed tsts
felipemello1 Jul 2, 2025
da7245d
Merge branch 'iterable_dataset_final' of github.com:felipemello1/torc…
Jul 2, 2025
96424d0
tests pass
Jul 2, 2025
853147b
optimize SFTOutputTransform
Jul 2, 2025
96bc317
use ds.sampling_weight
felipemello1 Jul 2, 2025
3c9d161
add sampling log to interlead dataset
felipemello1 Jul 2, 2025
4804663
fix nested interleave
felipemello1 Jul 3, 2025
2fe4b40
changes to TuneIterableDataset
felipemello1 Jul 3, 2025
72211c9
add IterableDataset back
Jul 3, 2025
b350ac7
nested interleaved + dataset.info
felipemello1 Jul 6, 2025
f9a1aec
nits hf_iterable
felipemello1 Jul 6, 2025
f7a3aa7
update readme
felipemello1 Jul 6, 2025
17878bf
make metric dataset name explicit
felipemello1 Jul 6, 2025
101e96e
update recipe to share log freq + validagtion msg
felipemello1 Jul 6, 2025
1b3f3fc
update interleaved tests to do nesting
Jul 6, 2025
fac3fd5
lint
Jul 6, 2025
29ba1cb
error if duplicated metric name
Jul 7, 2025
f89eefe
improve docs
Jul 7, 2025
de942bf
Merge branch 'iterable_dataset_final' into online_packing
felipemello1 Jul 7, 2025
d6680b7
rename from strategy to packer
felipemello1 Jul 7, 2025
d3be015
tensors instead of lists
felipemello1 Jul 7, 2025
c8bfbb2
tests
felipemello1 Jul 7, 2025
fd41842
docs
felipemello1 Jul 7, 2025
734128e
tests + lint pass
Jul 7, 2025
23bd9fb
test collate + dataloader
felipemello1 Jul 7, 2025
fb7b9aa
clean up
Jul 7, 2025
4c505e0
improve packed testing
Jul 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 30 additions & 20 deletions recipes/configs/llama3_2/3B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,32 @@ output_dir: /tmp/torchtune/llama3_2_3B/full # /tmp may be deleted by your system
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Llama-3.2-3B-Instruct/original/tokenizer.model
max_seq_len: null
max_seq_len: 4096

# Dataset and Sampler
# Dataloader
dataloader:
batch_size: 16
# num_workers and pin_memory can be added here if needed

# Dataset - now a list to support multiple weighted sources
dataset:
_component_: torchtune.datasets.alpaca_cleaned_dataset
packed: False # True increases speed
split: train[:95%]
seed: null
shuffle: True
batch_size: 4

# Validation
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
dataset_val:
_component_: torchtune.datasets.alpaca_cleaned_dataset
split: train[95%:]
batch_size_val: ${batch_size}
- _component_: torchtune.datasets.slimorca_iterable_dataset
shuffle_buffer_size: 1000
weight: 0.8
split: train[:5%] # simulate 1 epoch quickly
- _component_: torchtune.datasets.alpaca_iterable_dataset
shuffle_buffer_size: 1000
weight: 0.2
split: train[:5%] # simulate 1 epoch quickly

# On-the-fly packing
# Set packing_strategy: null to disable packing
packing_strategy:
_component_: torchtune.datasets.TextPacker

seed: 42

# Validation not supported yet with iterable datasets

# Model Arguments
model:
Expand All @@ -65,10 +74,11 @@ optimizer:
loss:
_component_: torchtune.modules.loss.LinearCrossEntropyLoss

# Training
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
# Training - now step-based
num_training_steps: 100 # Total number of training steps to run
save_every_n_steps: 200 # Save a checkpoint every N steps. Using 200 to avoid ckpt.
gradient_accumulation_steps: 1
dataset_metrics_log_freq: 5 # Log dataset-specific metrics every N steps

# Environment
device: cuda
Expand All @@ -83,7 +93,7 @@ optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_ste

# Logging
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.WandBLogger
log_dir: ${output_dir}/logs
log_every_n_steps: 1
log_peak_memory_stats: True
Expand Down
Loading
Loading