Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10
25 changes: 16 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,33 @@ rLLM is an open-source project to fully democratize reinforcement learning (RL)

## Getting Started 🎯
### Installation
```bash
# Installing Python 3.10 Environment.
conda create -n rllm python=3.10 -y
conda activate rllm

# Installing RLLM dependencies.
First, make sure [uv](https://docs.astral.sh/uv/getting-started/installation/) is installed.

Then, clone the repository and install dependencies with `uv`:

```bash
git clone [email protected]:agentica-project/rllm.git
cd rllm
pip install -e ./verl
pip install -e .
uv sync --extra build
uv sync --extra build --extra compile
```

Next, log into wandb:

```bash
uv run wandb login
```

### Data
Our raw training data is in `rllm/data/[train|test]/[code|math]/`, along with preprocessing scripts in `rllm/data/preprocess`. To convert the raw data into Parquet files for training, run:

```bash
# Download datasets from GDrive, populates rllm/data/[train|test]/[math|code]/*.json
python scripts/data/download_datasets.py
uv run scripts/data/download_datasets.py

# Generate parquet files for Deepcoder/DeepscaleR in data/*.parquet
python scripts/data/[deepcoder|deepscaler]_dataset.py
uv run scripts/data/[deepcoder|deepscaler]_dataset.py
```

### Training Scripts
Expand Down
57 changes: 57 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
[project]
name = "rllm"
version = "0.0.0"
description = "Distributed Post-Training RL Library for LLMs"
authors = [{ name = "Agentica Team", email = "[email protected]" }]
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
'torch==2.4.0',
'torchaudio',
'torchvision',
'hydra-core==1.3.2',
'deepspeed',
'dm-tree',
'gdown',
'google-cloud-aiplatform',
'latex2sympy2',
'pybind11',
'pylatexenc',
'pytest',
'sentence_transformers',
'sortedcontainers',
'tabulate',
'torchmetrics',
'gym',
'selenium',
'PyMuPDF',
'nltk',
'browsergym',
'gradio',
'fire',
'vertexai',
'e2b_code_interpreter',
'firecrawl',
'wandb',
'verl',
'datasets==3.5.0'
]

[project.optional-dependencies]
build = ["torch", "setuptools", "packaging"]
compile = ["flash-attn"]

[tool.uv]
no-build-isolation-package = ["flash-attn"]

[tool.uv.sources]
verl = { path = "verl", editable = true }

[[tool.uv.dependency-metadata]]
name = "flash-attn"
version = "2.7.4.post1"
requires-dist = ["torch", "einops"]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
2 changes: 1 addition & 1 deletion scripts/deepscaler/train/deepscaler_1.5b_16k.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if [ -z "$MODEL_PATH" ]; then
fi

# Train over 4 nodes, 8 A100-80GB GPUs per node.
python3 -m verl.trainer.main_ppo \
uv run -m verl.trainer.main_ppo \
algorithm.adv_estimator=grpo \
data.train_files=$HOME/rllm/data/deepscaler_train.parquet \
data.val_files=$HOME/rllm/data/aime.parquet \
Expand Down
2 changes: 1 addition & 1 deletion scripts/deepscaler/train/deepscaler_1.5b_24k.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if [ -z "$MODEL_PATH" ]; then
fi

# Train over 4 nodes, 8 A100-80GB GPUs per node.
python3 -m verl.trainer.main_ppo \
uv run -m verl.trainer.main_ppo \
algorithm.adv_estimator=grpo \
data.train_files=$HOME/rllm/data/deepscaler_train.parquet \
data.val_files=$HOME/rllm/data/aime.parquet \
Expand Down
2 changes: 1 addition & 1 deletion scripts/deepscaler/train/deepscaler_1.5b_8k.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if [ -z "$MODEL_PATH" ]; then
fi

# Train over a single node, 8 A100-80GB GPUs.
python3 -m verl.trainer.main_ppo \
uv run -m verl.trainer.main_ppo \
algorithm.adv_estimator=grpo \
data.train_files=$HOME/rllm/data/deepscaler_train.parquet \
data.val_files=$HOME/rllm/data/aime.parquet \
Expand Down
Loading