diff --git a/.github/actions/action.yml b/.github/actions/action.yml index b9a02e1e3f..b15e49666e 100644 --- a/.github/actions/action.yml +++ b/.github/actions/action.yml @@ -15,6 +15,9 @@ name: "Test Template" description: "Template for running NeMo tests in a containerized environment" inputs: + container-image: + description: "Container image to use for test" + required: true timeout: description: "Max runtime of test in minutes" required: false @@ -46,83 +49,44 @@ inputs: runs: using: "composite" steps: - - name: Copy data - shell: bash - if: inputs.is_unit_test == 'false' - env: - SOURCE_DIR: /mnt/datadrive/TestData/megatron-lm/artifacts - TARGET_DIR: /home/runner/_work/TestData/megatron-lm/artifacts - MODEL: ${{ inputs.model }} - run: | - mkdir -p $TARGET_DIR/text/data/ - - if [[ "$MODEL" == "bert" ]]; then - mkdir -p $TARGET_DIR/text/the_pile/bert_shard00/ - cp -a $SOURCE_DIR/text/the_pile/bert_shard00/. $TARGET_DIR/text/data/ - elif [[ "$MODEL" == "gpt" ]] || [[ "$MODEL" == "moe" ]]; then - cp -a $SOURCE_DIR/text/the_pile/shard00/. $TARGET_DIR/text/data/ - fi - - - name: Install curl, sudo - shell: bash - run: | - sudo apt-get update - sudo apt-get install -y curl uuid-runtime - - name: Checkout repository uses: actions/checkout@v2 - with: - path: ${{ github.workspace }}/Megatron-LM - - - name: Cache uv - uses: actions/cache@v4 - id: cache - with: - path: cache-mount - key: ${{ runner.os }}-uv-${{ hashFiles('**/uv.lock') }} - restore-keys: | - ${{ runner.os }}-uv- - - name: Restore Docker cache mounts - uses: reproducible-containers/buildkit-cache-dance@5b81f4d29dc8397a7d341dba3aeecc7ec54d6361 - with: - cache-dir: cache-mount - dockerfile: docker/Dockerfile.ci.dev - skip-extraction: ${{ steps.cache.outputs.cache-hit }} + - name: Change ownership of /home/runner/ + shell: bash + run: sudo chown -R $(whoami) /home/runner/ - name: Setup python uses: actions/setup-python@v5 with: python-version: 3.12 - - name: Download test data - shell: bash - env: - GH_TOKEN: ${{ inputs.PAT }} - TIMEOUT: ${{ inputs.timeout }} - IS_UNIT_TEST: ${{ inputs.is_unit_test == 'true' }} + - name: Install uuidgen + shell: bash -x -e -u -o pipefail {0} run: | - echo "::group::Download test data" - pip install --no-cache-dir pygithub click - python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets - echo "::endgroup::" + apt-get update + apt-get install -y uuid-runtime - name: Create run-script (unit test) - shell: bash + shell: bash -x -e -u -o pipefail {0} if: inputs.is_unit_test == 'true' run: | echo "::group::Create run-script" cmd=$(cat <<'RUN_TEST_EOF' #!/bin/bash - docker exec -t test_container_${{ github.run_id }} bash -c ' - set -e - bash /opt/megatron-lm/tests/unit_tests/run_ci_test.sh \ - --tag ${{ inputs.tag }} \ - --environment dev \ - --bucket '\''${{ inputs.test_case }}'\'' \ - --log-dir /opt/megatron-lm/outputs/logs - ' + export PYTHONPATH=$(pwd) + export NEMORUN_HOME=$(pwd) + pip install --no-cache-dir uv + uv sync --only-group test + uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \ + --scope unit-tests \ + --model unit-tests \ + --test-case '${{ inputs.test_case }}' \ + --environment dev \ + --platform dgx_h100 \ + --tag ${{ inputs.tag }} \ + --container-image ${{ inputs.container-image }} RUN_TEST_EOF ) @@ -130,7 +94,7 @@ runs: echo "::endgroup::" - name: Create run-script (e2e test) - shell: bash + shell: bash -x -e -u -o pipefail {0} if: inputs.is_unit_test == 'false' env: MODEL: ${{ inputs.model }} @@ -138,118 +102,65 @@ runs: echo "::group::Create run-script" cmd=$(cat <<'RUN_TEST_EOF' #!/bin/bash - - - - docker exec -t test_container_${{ github.run_id }} bash -c ' - - set -e - ls -al /workspace/data - - if [[ "${{ inputs.model }}" == "bert" ]]; then - TRAINING_SCRIPT_PATH=pretrain_bert.py - elif [[ "${{ inputs.model }}" == "gpt" ]] || [[ "${{ inputs.model }}" == "moe" ]]; then - TRAINING_SCRIPT_PATH=pretrain_gpt.py - fi - - ARGUMENTS=( - "DATA_PATH=/workspace/data" - "DATA_CACHE_PATH=/workspace/data/cache" - "OUTPUT_PATH=$(pwd)/outputs/" - "TENSORBOARD_PATH=$(pwd)/tensorboard" - "CHECKPOINT_SAVE_PATH=$(pwd)/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME" - "TRAINING_SCRIPT_PATH=$TRAINING_SCRIPT_PATH" - "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/${{inputs.model}}/${{inputs.test_case}}/model_config.yaml" - "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/${{inputs.model}}/${{inputs.test_case}}/golden_values_dev_dgx_h100.json" - "N_REPEAT=5" - "ENABLE_LIGHTWEIGHT_MODE=false" - "RECORD_CHECKPOINTS=false" - ) - - bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${ARGUMENTS[@]} - ' + set -euxo pipefail + + export PYTHONPATH=$(pwd) + export NEMORUN_HOME=$(pwd) + pip install --no-cache-dir uv + uv sync --only-group test + uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \ + --scope mr \ + --model ${{ inputs.model }} \ + --test-case ${{ inputs.test_case }} \ + --environment dev \ + --platform dgx_h100 \ + --container-image ${{ inputs.container-image }} \ + --data-dir /mnt/datadrive/TestData/megatron-lm/artifacts RUN_TEST_EOF ) echo "$cmd" | tee "job.sh" echo "::endgroup::" - - name: Build container - shell: bash - env: - GH_TOKEN: ${{ inputs.PAT }} - run: | - echo "::group::Build test container" - docker build -f docker/Dockerfile.ci.dev --build-arg FROM_IMAGE_NAME="nvcr.io/nvidia/pytorch:25.06-py3" --target=main -t megatron-core . - echo "::endgroup::" - - - name: Start container - shell: bash - run: | - echo "::group::Start test container" - set -x - - cmd=$(cat < functional-tests.json + + echo "functional-tests=$(cat functional-tests.json)" | tee -a "$GITHUB_OUTPUT" cicd-functional-tests-latest: strategy: fail-fast: false matrix: - include: - - model: "gpt" - test_case: "gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G" - - model: "gpt" - test_case: "gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G" - - model: "moe" - test_case: "gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer" - - model: "moe" - test_case: "gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed" + include: ${{ fromJson(needs.cicd-parse-functional-tests.outputs.functional-tests) }} needs: - pre-flight - cicd-wait-in-queue - - cicd-unit-tests-latest + - cicd-parse-functional-tests + # - cicd-unit-tests-latest runs-on: nvidia-ci-aws-gpu-x8 name: "${{ matrix.model }}/${{ matrix.test_case }} - latest" environment: nemo-ci + env: + PIP_DISABLE_PIP_VERSION_CHECK: 1 + PIP_NO_PYTHON_VERSION_WARNING: 1 + PIP_ROOT_USER_ACTION: ignore if: | ( success() || needs.pre-flight.outputs.is_ci_workload == 'true' || needs.pre-flight.outputs.force_run_all == 'true' ) - && !needs.pre-flight.outputs.is_merge_group == 'true' + && needs.pre-flight.outputs.is_merge_group == 'false' && !cancelled() steps: - name: Checkout @@ -163,6 +271,7 @@ jobs: timeout: ${{ matrix.timeout || 30 }} is_unit_test: "false" PAT: ${{ secrets.PAT }} + container-image: ${{ env.container-registry }}/megatron-lm:1864 # ${{ github.sha }} Nemo_CICD_Test: needs: @@ -243,7 +352,7 @@ jobs: && !cancelled() strategy: matrix: - flag: [unit-test, e2e] + flag: [unit-test] steps: - name: Checkout uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index 052f5f5c1d..2fe684b496 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -167,6 +167,7 @@ flash_mla = [ { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" }, ] transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "release_v2.8" } # on `release_v2.8` +nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" } [tool.isort] profile = "black" # black-compatible diff --git a/tests/functional_tests/python_test_utils/common.py b/tests/functional_tests/python_test_utils/common.py index ccae60cc3f..8fb3ebc511 100644 --- a/tests/functional_tests/python_test_utils/common.py +++ b/tests/functional_tests/python_test_utils/common.py @@ -19,14 +19,6 @@ logger = logging.getLogger(__name__) -def approximate_threshold(rtol: float) -> Callable: - def _func(y_pred: List[Union[float, int]], y_true: List[Union[float, int]]): - - return np.mean([np.mean(y_pred), np.mean(y_true)]) * rtol - - return _func - - class TypeOfTestResult(enum.Enum): APPROXIMATE = 1 DETERMINISTIC = 2 @@ -46,7 +38,6 @@ class NotDeterminsticError(Exception): class ApproximateTest(Test): atol: Union[int, float] = 0 - atol_func: Optional[Callable] = None rtol: float = 1e-5 @property @@ -58,16 +49,14 @@ def error_message(self, metric_name: str) -> NotApproximateError: class DeterministicTest(Test): - @property - def atol(self) -> Union[int, float]: - return 0 - - atol_func: Optional[Callable] = None - @property def rtol(self) -> float: return 0.0 + @property + def atol(self) -> Union[int, float]: + return 0 + @property def type_of_test_result(self) -> TypeOfTestResult: return TypeOfTestResult.DETERMINISTIC @@ -235,8 +224,7 @@ def pipeline( golden = np.array(golden_value_list) # Tolerance check - rtol = 0 if test.type_of_test_result == TypeOfTestResult.DETERMINISTIC else 0.10 - is_close = np.isclose(actual, golden, rtol=rtol, atol=0) + is_close = np.isclose(actual, golden, rtol=test.rtol, atol=test.atol) num_failing_steps_allowed = min(max(total_steps_evaluated // 100, 1), 50) passing = np.mean(is_close) >= (num_failing_steps_allowed / total_steps_evaluated) diff --git a/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py b/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py index a38016d1e5..db03d30f65 100644 --- a/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py +++ b/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py @@ -9,35 +9,14 @@ logger = logging.getLogger(__name__) CHECK_THRESHOLDS = { - "iteration-time": [ - common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0) - ], - "mem-allocated-bytes": [ - common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0) - ], - "mem-max-allocated-bytes": [ - common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0) - ], - "lm loss": [ - common.DeterministicTest(), - common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0), - ], - "mtp_1 loss": [ - common.DeterministicTest(), - common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0), - ], - "num-zeros": [ - common.DeterministicTest(), - common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.20), rtol=0), - ], - "generated_tokens": [ - common.DeterministicTest(), - common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0), - ], - "logprobs": [ - common.DeterministicTest(), - common.ApproximateTest(atol_func=common.approximate_threshold(rtol=0.05), rtol=0), - ], + "iteration-time": [common.ApproximateTest(atol=0, rtol=0.25)], + "mem-allocated-bytes": [common.ApproximateTest(atol=0, rtol=0.05)], + "mem-max-allocated-bytes": [common.ApproximateTest(atol=0, rtol=0.05)], + "lm loss": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], + "mtp_1 loss": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], + "num-zeros": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], + "generated_tokens": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], + "logprobs": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], } diff --git a/tests/functional_tests/shell_test_utils/_run_training.sh b/tests/functional_tests/shell_test_utils/_run_training.sh index 5179c02c3b..1d0e77a347 100644 --- a/tests/functional_tests/shell_test_utils/_run_training.sh +++ b/tests/functional_tests/shell_test_utils/_run_training.sh @@ -8,7 +8,7 @@ set -euxo pipefail -echo "------ARGUMENTS LIST --------" +set +x for ARGUMENT in "$@"; do KEY=$(echo $ARGUMENT | cut -f1 -d=) @@ -18,7 +18,7 @@ for ARGUMENT in "$@"; do export "$KEY"="$VALUE" echo "$KEY=$VALUE" done -echo "---------------------------------" +set -x # Check that mandatory vars are set MANDATORY_VARS=( @@ -39,9 +39,11 @@ for mandatory_var in "${MANDATORY_VARS[@]}"; do fi done +set +x # Envsubst model_params cat $TRAINING_PARAMS_PATH | envsubst "$(env | cut -d= -f1 | sed -e 's/^/$/')" >$TRAINING_PARAMS_PATH.tmp TRAINING_PARAMS_PATH="$TRAINING_PARAMS_PATH.tmp" +set -x # Pull env vars to export ENV_VARS=$(/usr/local/bin/yq '... comments="" | .ENV_VARS | to_entries | .[] | [.key + "=" + .value] | join(" ")' "$TRAINING_PARAMS_PATH") diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh index b24423773e..5a6ea64f42 100644 --- a/tests/functional_tests/shell_test_utils/run_ci_test.sh +++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh @@ -8,9 +8,7 @@ ulimit -Sn $(ulimit -Hn) # Increase soft limit for number of processes to match hard limit ulimit -Su $(ulimit -Hu) -echo "------ARGUMENTS LIST --------" -# Use eval to properly handle quoted arguments -eval "set -- $@" +set +x for ARGUMENT in "$@"; do # Split on first = only, preserving any subsequent = signs in the value KEY="${ARGUMENT%%=*}" @@ -26,7 +24,7 @@ for ARGUMENT in "$@"; do export "$KEY"="$(eval echo $VALUE)" echo "$KEY=$VALUE" done -echo "---------------------------------" +set -x # Check that mandatory vars are set MANDATORY_VARS=( @@ -48,6 +46,8 @@ for mandatory_var in "${MANDATORY_VARS[@]}"; do fi done +set -exo pipefail + # Extract settings from params file TEST_TYPE=$(cat $TRAINING_PARAMS_PATH | /usr/local/bin/yq '.TEST_TYPE') @@ -64,7 +64,7 @@ else fi mkdir -p $CHECKPOINT_SAVE_PATH -mkdir -p $CHECKPOINT_LOAD_PATH +mkdir -p $CHECKPOINT_LOAD_PATH || true _CHECKPOINT_LOAD_PATH=$CHECKPOINT_LOAD_PATH _CHECKPOINT_SAVE_PATH=$CHECKPOINT_SAVE_PATH @@ -103,6 +103,10 @@ if [[ "$MODE" == "pretraining" && "$TEST_TYPE" != "release" ]]; then TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH | /usr/local/bin/yq '.MODEL_ARGS."--exit-interval" // "100"') fi +elif [[ "$MODE" == "inference" && "$TEST_TYPE" != "release" ]]; then + if [[ "$ENABLE_LIGHTWEIGHT_MODE" == "true" && "$IS_NEMO_TEST" == "false" ]]; then + /usr/local/bin/yq -i '.ENV_VARS."SKIP_PYTEST" = 1' $TRAINING_PARAMS_PATH + fi fi if [[ "$MODE" == "pretraining" && "$TEST_TYPE" = "release" ]]; then @@ -300,7 +304,7 @@ for i in $(seq 1 $N_REPEAT); do fi # For inference jobs - if [[ "$MODE" == "inference" ]]; then + if [[ "$MODE" == "inference" && ("$TRAINING_EXIT_CODE" -eq 0 || "$TEST_TYPE" == "release") ]]; then if [[ "$TEST_TYPE" == "frozen-start" ]]; then uv run --no-sync pytest -s -o log_cli=true --log-cli-level=info $ROOT_DIR/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py \ --golden-values-path $GOLDEN_VALUES_PATH \ diff --git a/tests/functional_tests/shell_test_utils/start_interactive_job.sh b/tests/functional_tests/shell_test_utils/start_interactive_job.sh index d3b6055e55..0b30fc0128 100644 --- a/tests/functional_tests/shell_test_utils/start_interactive_job.sh +++ b/tests/functional_tests/shell_test_utils/start_interactive_job.sh @@ -78,56 +78,8 @@ if [ -z "$PARTITION" ] || [ -z "$SLURM_ACCOUNT" ] || [ -z "$IMAGE" ] || [ -z "$D exit 1 fi -# Check if recipes directory exists -if [ ! -d "$RECIPES_DIR" ]; then - echo "Error: Recipes directory '$RECIPES_DIR' does not exist" - exit 1 -fi - -# Create copy of recipes with interpolated artifacts -python -m tests.test_utils.python_scripts.common --recipes-dir $RECIPES_DIR --output-dir $RECIPES_DIR/interpolated - # Add current directory to container mounts -CONTAINER_MOUNTS="$(pwd):/opt/megatron-lm" - -# Process each YAML file in the recipes directory -if [ ! -f "$YAML_FILE" ]; then - continue -fi - -echo "Processing $(basename "$YAML_FILE")..." -YAML_FILE=workflows.yaml -# Extract artifacts from YAML file -while IFS=: read -r value key; do - # Skip empty or malformed entries - if [ -z "$value" ] || [ -z "$key" ] || [ "$value" = "/data/" ] || [ "$key" = "/data/" ]; then - continue - fi - - # Skip entries that don't start with a forward slash - if [[ ! "$key" =~ ^/ ]]; then - continue - fi - - # Create the mount string - mount="${DATASET_DIR}/${value}:${key}" - - # Skip if we've seen this mount before - if [ "${seen_mounts[$mount]}" = "1" ]; then - echo "Skipping duplicate mount: $mount" - continue - fi - - # Mark this mount as seen - seen_mounts[$mount]=1 - - if [ -z "$CONTAINER_MOUNTS" ]; then - CONTAINER_MOUNTS="$mount" - else - CONTAINER_MOUNTS="${CONTAINER_MOUNTS},$mount" - fi -done < <(yq eval '.[].spec.artifacts | to_entries | .[] | "\(.value):\(.key)"' "$YAML_FILE") -rm $YAML_FILE +CONTAINER_MOUNTS="$DATASET_DIR:/mnt/artifacts,$(pwd):/opt/megatron-lm" # Build the final srun command SRUN_CMD="srun \ diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml index 7ccfd215dc..60537ce877 100644 --- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml @@ -18,12 +18,12 @@ MODEL_ARGS: --seq-length: 512 --max-position-embeddings: 512 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml rename to tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml index b4c5decf82..0e90838145 100644 --- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml @@ -18,12 +18,12 @@ MODEL_ARGS: --seq-length: 512 --max-position-embeddings: 512 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml index 11909062fb..f965ee1d9e 100644 --- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml @@ -18,12 +18,12 @@ MODEL_ARGS: --seq-length: 512 --max-position-embeddings: 512 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml index 09864ee106..fc4c836c98 100644 --- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml @@ -18,12 +18,12 @@ MODEL_ARGS: --seq-length: 512 --max-position-embeddings: 512 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml index 7eeac331ad..8974bc1ea2 100644 --- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml @@ -18,12 +18,12 @@ MODEL_ARGS: --seq-length: 512 --max-position-embeddings: 512 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml index 94d9cbfd83..4913568412 100644 --- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml @@ -18,12 +18,12 @@ MODEL_ARGS: --seq-length: 512 --max-position-embeddings: 512 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 @@ -42,6 +42,6 @@ MODEL_ARGS: --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --ckpt-format: torch - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --attention-backend: unfused TEST_TYPE: ckpt-resume diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml index c496f84f19..6c0dc55051 100644 --- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml @@ -18,12 +18,12 @@ MODEL_ARGS: --seq-length: 512 --max-position-embeddings: 512 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 @@ -40,7 +40,7 @@ MODEL_ARGS: --use-checkpoint-args: true --use-checkpoint-opt_param-scheduler: true --no-gradient-accumulation-fusion: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --ckpt-format: torch diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml rename to tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml index 59607ba28d..e001ea4dc0 100644 --- a/tests/functional_tests/test_cases/bert/bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml @@ -18,12 +18,12 @@ MODEL_ARGS: --seq-length: 512 --max-position-embeddings: 512 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml index c4b80767c6..68cbb23099 100644 --- a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml @@ -27,7 +27,7 @@ MODEL_ARGS: --pipeline-model-parallel-size: 8 # Data args --data-path: ${DATA_BLEND} - --vocab-file: ${DATA_PATH}/vocab.txt + --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt --split: 949,50,1 --data-cache-path: ${DATA_CACHE_PATH} # EVAL_AND_LOGGING_ARGS diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/model_config.yaml similarity index 98% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/model_config.yaml index 48cf5e1cfa..c2a26a070f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/model_config.yaml @@ -58,6 +58,7 @@ MODEL_ARGS: --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true + --exit-interval: 25 TEST_TYPE: regular METRICS: - "iteration-time" diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/model_config.yaml similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/model_config.yaml diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml similarity index 91% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml index 9b641b68d7..2026f11ade 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_disable/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml @@ -63,12 +63,12 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --log-params-norm: true --log-num-zeros-in-grad: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml similarity index 90% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml index d18a37d782..41cb656142 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_enable/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml @@ -62,12 +62,12 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --log-params-norm: true --log-num-zeros-in-grad: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml similarity index 92% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml index 3258e398b1..9cd921e983 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_1/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml @@ -63,12 +63,12 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --log-params-norm: true --log-num-zeros-in-grad: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml similarity index 91% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml index 5fd21f6175..f902393d04 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_persistent_2/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml @@ -62,12 +62,12 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --log-params-norm: true --log-num-zeros-in-grad: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml similarity index 91% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml index 65bdc72348..2e82cad10a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_reshard/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml @@ -63,12 +63,12 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --log-params-norm: true --log-num-zeros-in-grad: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml similarity index 91% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml index fd313d7a95..0abd4db698 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml @@ -62,12 +62,12 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --log-params-norm: true --log-num-zeros-in-grad: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/README.md b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/README.md similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/README.md rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/README.md diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml similarity index 93% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml index 476d0e08cf..582c9523f7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_resume_check_grads/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml @@ -59,7 +59,6 @@ BASE_MODEL_ARGS: &BASE_MODEL_ARGS --num-query-groups: 8 --seq-length: 512 --kv-channels: 128 - --ffn-hidden-size: 8192 --group-query-attention: true --normalization: RMSNorm --swiglu: true @@ -87,15 +86,15 @@ BASE_MODEL_ARGS: &BASE_MODEL_ARGS --ckpt-format: torch_dist --dist-ckpt-strictness: log_all # backward compatibility for TE changes --save: ${CHECKPOINT_SAVE_PATH} - --load: ${CHECKPOINT_LOAD_PATH} + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2 # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --log-params-norm: true --log-num-zeros-in-grad: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml similarity index 91% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml index 48d188d81c..59a57e2212 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_reruns_transient/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml @@ -64,12 +64,12 @@ MODEL_ARGS: --exit-interval: 4 # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --log-params-norm: true --log-num-zeros-in-grad: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml index fd43e99211..2d5e340fa6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml index 1e11b3ff94..c7b46ff9b8 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml index efe469636e..8250611596 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml index a0785630f3..4a5bf3d8fc 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml index ff347789ff..bb0708b11e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml index e09ac1ce49..a5dbe2157e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml index af2f93042e..4aeea406fb 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml index 3f6379f90f..6d6bf2b5b9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml index c49288bf93..5e4131a43c 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml index ef2d6010e6..c75d099790 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml index 4f3560b8c3..ffabf9583f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml index cb4e11e3d3..b391387f9f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml index 388afdaed4..5415e3de96 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml index 4defebeac3..8d372f5539 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml index 47ec5c2bdd..d91e9be4f5 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml index 89ff19ad1e..7d069ce9ec 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml index 58554cc112..ea882318c7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml index a63a24f6aa..d67dd6af76 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml index 7281f21ce9..1e25f4bd4e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml index b6527f0f7c..2d73490808 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml index f7822d5c5d..319164782f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml index d4fb79b2be..a3a1a45873 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml index ac8332843f..ea8f4bb71d 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml index 3dece98a52..ea869246a7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml index 2a13801a9d..767283cf2a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml similarity index 85% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml index 4a235266b1..46ff13cb9a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml index fbb85c1a7d..5a1b1ce289 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index b0fd77bb76..31ffc9c811 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -52,7 +52,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml index 170c1397ba..0bd25e7973 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml index 9473172d43..778e7d361b 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml index e64e70ae04..d502c3e1fe 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml index efbe0f3d7c..edc9bc1ff2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml index e28ce4aea7..1b9c96b3f7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml index 835e017ccc..fed75814df 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index f9b7400006..af06fe0690 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml index 13a92a6133..035549f8fb 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml index 89d3d84146..ef758e5639 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json new file mode 100644 index 0000000000..0c1ce6fced --- /dev/null +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_coreweave.json @@ -0,0 +1,287 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 50, + "step_interval": 1, + "values": { + "1": 10.83936, + "2": 10.8442, + "3": 10.86813, + "4": 10.86022, + "5": 10.87939, + "6": 10.85969, + "7": 10.86386, + "8": 10.8444, + "9": 10.88995, + "10": 10.8926, + "11": 10.89136, + "12": 10.85312, + "13": 10.87319, + "14": 10.83805, + "15": 10.83088, + "16": 10.82011, + "17": 10.79138, + "18": 10.81055, + "19": 10.77977, + "20": 10.6635, + "21": 10.69765, + "22": 10.67421, + "23": 10.77344, + "24": 10.63919, + "25": 10.50497, + "26": 10.61911, + "27": 10.56921, + "28": 10.46859, + "29": 10.41119, + "30": 10.42916, + "31": 10.52553, + "32": 10.34942, + "33": 10.2967, + "34": 10.46909, + "35": 9.99632, + "36": 10.13945, + "37": 10.0434, + "38": 10.4139, + "39": 9.80941, + "40": 10.12495, + "41": 10.14883, + "42": 10.04042, + "43": 10.22142, + "44": 10.07348, + "45": 9.71369, + "46": 10.00449, + "47": 9.94758, + "48": 9.68856, + "49": 9.93637, + "50": 9.96042 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 50, + "step_interval": 1, + "values": { + "1": 1026.0, + "2": 1184.0, + "3": 1226.0, + "4": 1248.0, + "5": 1259.0, + "6": 1421.0, + "7": 1182.0, + "8": 1036.0, + "9": 1293.0, + "10": 1319.0, + "11": 1212.0, + "12": 1373.0, + "13": 1327.0, + "14": 1121.0, + "15": 1217.0, + "16": 1163.0, + "17": 1246.0, + "18": 1280.0, + "19": 1128.0, + "20": 1019.0, + "21": 1147.0, + "22": 1156.0, + "23": 1341.0, + "24": 1312.0, + "25": 1066.0, + "26": 1138.0, + "27": 1270.0, + "28": 1260.0, + "29": 1292.0, + "30": 1532.0, + "31": 1477.0, + "32": 1460.0, + "33": 1537.0, + "34": 1513.0, + "35": 1235.0, + "36": 1316.0, + "37": 1466.0, + "38": 1564.0, + "39": 1380.0, + "40": 1513.0, + "41": 1633.0, + "42": 1509.0, + "43": 1731.0, + "44": 1636.0, + "45": 1501.0, + "46": 1884.0, + "47": 1567.0, + "48": 1631.0, + "49": 1825.0, + "50": 1639.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 50, + "step_interval": 1, + "values": { + "1": 759682560.0, + "2": 759682560.0, + "3": 759682560.0, + "4": 759682560.0, + "5": 759682560.0, + "6": 759682560.0, + "7": 759682560.0, + "8": 759682560.0, + "9": 759682560.0, + "10": 759682560.0, + "11": 759682560.0, + "12": 759682560.0, + "13": 759682560.0, + "14": 759682560.0, + "15": 759682560.0, + "16": 759682560.0, + "17": 759682560.0, + "18": 759682560.0, + "19": 759682560.0, + "20": 759682560.0, + "21": 759682560.0, + "22": 759682560.0, + "23": 759682560.0, + "24": 759682560.0, + "25": 759682560.0, + "26": 759682560.0, + "27": 759682560.0, + "28": 759682560.0, + "29": 759682560.0, + "30": 759682560.0, + "31": 759682560.0, + "32": 759682560.0, + "33": 759682560.0, + "34": 759682560.0, + "35": 759682560.0, + "36": 759682560.0, + "37": 759682560.0, + "38": 759682560.0, + "39": 759682560.0, + "40": 759682560.0, + "41": 759682560.0, + "42": 759682560.0, + "43": 759682560.0, + "44": 759682560.0, + "45": 759682560.0, + "46": 759682560.0, + "47": 759682560.0, + "48": 759682560.0, + "49": 759682560.0, + "50": 759682560.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 50, + "step_interval": 1, + "values": { + "1": 4340903936.0, + "2": 4622615552.0, + "3": 4622615552.0, + "4": 4622615552.0, + "5": 4622615552.0, + "6": 4622615552.0, + "7": 4622615552.0, + "8": 4622615552.0, + "9": 4622615552.0, + "10": 4622615552.0, + "11": 4622615552.0, + "12": 4622615552.0, + "13": 4622615552.0, + "14": 4622615552.0, + "15": 4622615552.0, + "16": 4622615552.0, + "17": 4622615552.0, + "18": 4622615552.0, + "19": 4622615552.0, + "20": 4622615552.0, + "21": 4622615552.0, + "22": 4622615552.0, + "23": 4622615552.0, + "24": 4622615552.0, + "25": 4622615552.0, + "26": 4622615552.0, + "27": 4622615552.0, + "28": 4622615552.0, + "29": 4622615552.0, + "30": 4622615552.0, + "31": 4622615552.0, + "32": 4622615552.0, + "33": 4622615552.0, + "34": 4622615552.0, + "35": 4622615552.0, + "36": 4622615552.0, + "37": 4622615552.0, + "38": 4622615552.0, + "39": 4622615552.0, + "40": 4622615552.0, + "41": 4622615552.0, + "42": 4622615552.0, + "43": 4622615552.0, + "44": 4622615552.0, + "45": 4622615552.0, + "46": 4622615552.0, + "47": 4622615552.0, + "48": 4622615552.0, + "49": 4622615552.0, + "50": 4622615552.0 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 50, + "step_interval": 1, + "values": { + "1": 14.98171, + "2": 0.13344, + "3": 0.10755, + "4": 0.10562, + "5": 0.10638, + "6": 0.10549, + "7": 0.10612, + "8": 0.10814, + "9": 0.10654, + "10": 0.10633, + "11": 0.10725, + "12": 0.10667, + "13": 0.10769, + "14": 0.10593, + "15": 0.10694, + "16": 0.10715, + "17": 0.1064, + "18": 0.10706, + "19": 0.10964, + "20": 0.1054, + "21": 0.10752, + "22": 0.10979, + "23": 0.10834, + "24": 0.10667, + "25": 0.10762, + "26": 0.10605, + "27": 0.10756, + "28": 0.1059, + "29": 0.10662, + "30": 0.10738, + "31": 0.1065, + "32": 0.1074, + "33": 0.10712, + "34": 0.10631, + "35": 0.10672, + "36": 0.10785, + "37": 0.10664, + "38": 0.1064, + "39": 0.10666, + "40": 0.10518, + "41": 0.10655, + "42": 0.10605, + "43": 0.10563, + "44": 0.1064, + "45": 0.10629, + "46": 0.10691, + "47": 0.10711, + "48": 0.10618, + "49": 0.10991, + "50": 0.10529 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml index 4fba5fca3a..0654517964 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml index 9e8d9b8746..8710e92a13 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --decoder-first-pipeline-num-layers: 2 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml index dd5d83e060..dea9b4aad9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --account-for-embedding-in-pipeline-split: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml index 325268c5a9..5394f9d007 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml index 85ff6feb92..4bd321b43d 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml index 0ef2b56600..1229288b9b 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml index b267aa17fd..556fcfbcf1 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml @@ -10,7 +10,7 @@ BEFORE_SCRIPT: | mkdir -p ${DATA_CACHE_PATH}/distill && echo $DISTILL_CONFIG | yq -P > ${DATA_CACHE_PATH}/distill/distill_config.yaml MODEL_ARGS: --export-te-mcore-model: true - --export-kd-teacher-load: ${ARTIFACTS_ROOT}/gpt_teacher + --export-kd-teacher-load: ${DATA_PATH}/model/gpt_dummy_pyt/ckpt/24.10.0_bf16_teacher --export-kd-cfg: ${DATA_CACHE_PATH}/distill/distill_config.yaml --auto-detect-ckpt-format: true --num-layers: 12 @@ -33,13 +33,13 @@ MODEL_ARGS: --untie-embeddings-and-output-weights: true --disable-bias-linear: true --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -62,7 +62,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml index 5f76e8f8b1..3175a07cc8 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml index c03a621f91..3f427a04f9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml index d853b772bb..d3446e92c2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -50,7 +50,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml index e97bc5217c..05b166f0a7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml index c28625ec1f..70155c2ff8 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml similarity index 85% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml index 8af4e99634..92f4bfb1cd 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml similarity index 85% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml index a168bf941f..b4d6376260 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml index 3a1f90a927..880d7fc7ce 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml index d2e2e266ff..013569c588 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml index 683a855ab8..6aad7304c1 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml index f35f4f3d99..8866fa6717 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml index 5a5d023dbf..f4649e2d30 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml similarity index 79% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml index 98fca77b1b..a77cd63780 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -42,7 +42,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml index f68e6657c2..9f416e7488 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml index e800a1bb0e..2622612205 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml index 8fa925d715..00f01d3bac 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml @@ -23,13 +23,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml index b0aa1f6623..3d1b350ced 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml index 8d7abbe27d..d4939a8c2c 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml index fcc217aa47..af4aa0bf4f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml index b31c1bc3ef..9fbe95431e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml index aac3d65eb8..54d49da6c1 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -44,7 +44,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml index a12763a211..f906e5f843 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml index 9d8400459f..7e2261ae51 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml index b9d5f466af..ea5523e1d2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml index 5d78d653aa..afbc17a030 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml similarity index 80% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml index b19f7ffcb9..bcbfdad661 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -43,7 +43,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml index ac68729bd5..ecc62315f9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml index 6fee917227..89c6943100 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index dea5ced008..9d8de380f8 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml index 1c2e8ff630..18a7195b43 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml index 73f311df45..fe8e0f493d 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index 83a671b2c2..136c696ef2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml index eee1bb896f..0f842738f6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml similarity index 85% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml index 1c83796b11..4aa0b36a84 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml index 8543a37af4..620eeaeff4 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index c2d1487092..b8a79c7a08 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --no-gradient-accumulation-fusion: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml index 3b8c3563f4..4febeeb3ac 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index 4c7132e2d1..8793230c3c 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml similarity index 80% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml index 0be73f09e6..4cc6e53b8c 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -44,7 +44,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml similarity index 80% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml index eac35eeb2a..f4c058fb0a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -43,7 +43,7 @@ MODEL_ARGS: --no-gradient-accumulation-fusion: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml index d5960cff7a..e2a0f1f1f6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -44,7 +44,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml similarity index 80% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml index ee577dda37..b9b786ee24 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -44,7 +44,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml index 60bf33c7e7..b4991e3621 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -45,7 +45,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml index 46dfa98592..cc6a76a97d 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -46,7 +46,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml index 6f776fc09b..7601d0188a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml @@ -21,13 +21,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --no-gradient-accumulation-fusion: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml similarity index 80% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml index 33da65bd2b..a365aae908 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -43,7 +43,7 @@ MODEL_ARGS: --no-gradient-accumulation-fusion: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml index b57638bcd8..c9473f99f9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml index 6070ad5e03..23b58cdc78 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -44,7 +44,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json deleted file mode 100644 index 67c8ef8abf..0000000000 --- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json +++ /dev/null @@ -1,287 +0,0 @@ -{ - "lm loss": { - "start_step": 1, - "end_step": 50, - "step_interval": 1, - "values": { - "1": 10.83936, - "2": 10.8442, - "3": 10.86813, - "4": 10.86022, - "5": 10.87939, - "6": 10.85969, - "7": 10.86386, - "8": 10.8444, - "9": 10.88995, - "10": 10.8926, - "11": 10.89136, - "12": 10.85312, - "13": 10.87319, - "14": 10.83805, - "15": 10.83088, - "16": 10.82011, - "17": 10.79138, - "18": 10.81055, - "19": 10.77977, - "20": 10.6635, - "21": 10.69765, - "22": 10.67421, - "23": 10.77344, - "24": 10.63919, - "25": 10.50497, - "26": 10.61911, - "27": 10.56921, - "28": 10.46859, - "29": 10.41119, - "30": 10.42916, - "31": 10.52553, - "32": 10.34942, - "33": 10.2967, - "34": 10.46909, - "35": 9.99632, - "36": 10.13945, - "37": 10.0434, - "38": 10.4139, - "39": 9.80941, - "40": 10.12495, - "41": 10.14883, - "42": 10.04042, - "43": 10.22142, - "44": 10.07348, - "45": 9.71369, - "46": 10.00449, - "47": 9.94758, - "48": 9.68856, - "49": 9.93637, - "50": 9.96042 - } - }, - "num-zeros": { - "start_step": 1, - "end_step": 50, - "step_interval": 1, - "values": { - "1": 1026.0, - "2": 1184.0, - "3": 1226.0, - "4": 1248.0, - "5": 1259.0, - "6": 1421.0, - "7": 1182.0, - "8": 1036.0, - "9": 1293.0, - "10": 1319.0, - "11": 1212.0, - "12": 1373.0, - "13": 1327.0, - "14": 1121.0, - "15": 1217.0, - "16": 1163.0, - "17": 1246.0, - "18": 1280.0, - "19": 1128.0, - "20": 1019.0, - "21": 1147.0, - "22": 1156.0, - "23": 1341.0, - "24": 1312.0, - "25": 1066.0, - "26": 1138.0, - "27": 1270.0, - "28": 1260.0, - "29": 1292.0, - "30": 1532.0, - "31": 1477.0, - "32": 1460.0, - "33": 1537.0, - "34": 1513.0, - "35": 1235.0, - "36": 1316.0, - "37": 1466.0, - "38": 1564.0, - "39": 1380.0, - "40": 1513.0, - "41": 1633.0, - "42": 1509.0, - "43": 1731.0, - "44": 1636.0, - "45": 1501.0, - "46": 1884.0, - "47": 1567.0, - "48": 1631.0, - "49": 1825.0, - "50": 1639.0 - } - }, - "mem-allocated-bytes": { - "start_step": 1, - "end_step": 50, - "step_interval": 1, - "values": { - "1": 759681536.0, - "2": 759681536.0, - "3": 759681536.0, - "4": 759681536.0, - "5": 759681536.0, - "6": 759681536.0, - "7": 759681536.0, - "8": 759681536.0, - "9": 759681536.0, - "10": 759681536.0, - "11": 759681536.0, - "12": 759681536.0, - "13": 759681536.0, - "14": 759681536.0, - "15": 759681536.0, - "16": 759681536.0, - "17": 759681536.0, - "18": 759681536.0, - "19": 759681536.0, - "20": 759681536.0, - "21": 759681536.0, - "22": 759681536.0, - "23": 759681536.0, - "24": 759681536.0, - "25": 759681536.0, - "26": 759681536.0, - "27": 759681536.0, - "28": 759681536.0, - "29": 759681536.0, - "30": 759681536.0, - "31": 759681536.0, - "32": 759681536.0, - "33": 759681536.0, - "34": 759681536.0, - "35": 759681536.0, - "36": 759681536.0, - "37": 759681536.0, - "38": 759681536.0, - "39": 759681536.0, - "40": 759681536.0, - "41": 759681536.0, - "42": 759681536.0, - "43": 759681536.0, - "44": 759681536.0, - "45": 759681536.0, - "46": 759681536.0, - "47": 759681536.0, - "48": 759681536.0, - "49": 759681536.0, - "50": 759681536.0 - } - }, - "mem-max-allocated-bytes": { - "start_step": 1, - "end_step": 50, - "step_interval": 1, - "values": { - "1": 4340902912.0, - "2": 4622614528.0, - "3": 4622614528.0, - "4": 4622614528.0, - "5": 4622614528.0, - "6": 4622614528.0, - "7": 4622614528.0, - "8": 4622614528.0, - "9": 4622614528.0, - "10": 4622614528.0, - "11": 4622614528.0, - "12": 4622614528.0, - "13": 4622614528.0, - "14": 4622614528.0, - "15": 4622614528.0, - "16": 4622614528.0, - "17": 4622614528.0, - "18": 4622614528.0, - "19": 4622614528.0, - "20": 4622614528.0, - "21": 4622614528.0, - "22": 4622614528.0, - "23": 4622614528.0, - "24": 4622614528.0, - "25": 4622614528.0, - "26": 4622614528.0, - "27": 4622614528.0, - "28": 4622614528.0, - "29": 4622614528.0, - "30": 4622614528.0, - "31": 4622614528.0, - "32": 4622614528.0, - "33": 4622614528.0, - "34": 4622614528.0, - "35": 4622614528.0, - "36": 4622614528.0, - "37": 4622614528.0, - "38": 4622614528.0, - "39": 4622614528.0, - "40": 4622614528.0, - "41": 4622614528.0, - "42": 4622614528.0, - "43": 4622614528.0, - "44": 4622614528.0, - "45": 4622614528.0, - "46": 4622614528.0, - "47": 4622614528.0, - "48": 4622614528.0, - "49": 4622614528.0, - "50": 4622614528.0 - } - }, - "iteration-time": { - "start_step": 1, - "end_step": 50, - "step_interval": 1, - "values": { - "1": 13.91724, - "2": 0.27573, - "3": 0.23467, - "4": 0.23594, - "5": 0.23302, - "6": 0.23216, - "7": 0.23399, - "8": 0.23423, - "9": 0.23365, - "10": 0.23211, - "11": 0.2332, - "12": 0.23283, - "13": 0.23445, - "14": 0.23405, - "15": 0.23349, - "16": 0.23298, - "17": 0.23305, - "18": 0.23251, - "19": 0.23322, - "20": 0.23348, - "21": 0.23189, - "22": 0.23316, - "23": 0.2316, - "24": 0.23233, - "25": 0.23512, - "26": 0.23232, - "27": 0.23306, - "28": 0.23244, - "29": 0.23331, - "30": 0.23258, - "31": 0.23311, - "32": 0.23326, - "33": 0.23418, - "34": 0.23411, - "35": 0.23489, - "36": 0.2317, - "37": 0.23483, - "38": 0.23235, - "39": 0.23511, - "40": 0.23413, - "41": 0.23395, - "42": 0.23405, - "43": 0.23331, - "44": 0.23297, - "45": 0.23473, - "46": 0.23192, - "47": 0.23377, - "48": 0.23322, - "49": 0.23042, - "50": 0.23263 - } - } -} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml index 387f03d450..f6892ae5c2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml index 967567958f..9c23cb7938 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml index 1b5de4373f..4727007ffe 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml similarity index 85% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml index ccff1cf44f..bba1f1ad19 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml index 7fe999b2a6..5ac9b7b470 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml @@ -16,13 +16,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml similarity index 80% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml index 0e243b6113..0e70965cb2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -44,7 +44,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/model_config.yaml similarity index 80% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/model_config.yaml index 453c506742..db5dea3ae6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -42,7 +42,7 @@ MODEL_ARGS: --deterministic-mode: true --no-gradient-accumulation-fusion: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml index 8211c7f40f..12063418ad 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --fp8-amax-compute-algo: max --attention-softmax-in-fp32: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/model_config.yaml index cf4fe01721..51a2f6cfc7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml index 51475b1a65..5668a7575e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml index 02db21e947..66c9f171be 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml index 3f650edfa8..5423730964 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -48,7 +48,7 @@ MODEL_ARGS: --fp8-amax-compute-algo: max --attention-softmax-in-fp32: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml index 95e4fd5b48..a5903e51b6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -51,7 +51,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml similarity index 86% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml index a38d289752..ac5ff6cfbb 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp/model_config.yaml @@ -16,13 +16,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml index bbbcf96b67..3963a359ea 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml index 01736c6899..ddb34ad850 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml similarity index 81% rename from tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml rename to tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml index 9bd15f9887..cf9f6b6ceb 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 2000 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -47,7 +47,7 @@ MODEL_ARGS: --fp8-amax-compute-algo: max --attention-softmax-in-fp32: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --log-memory-to-tensorboard: true diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml index 363f31519f..0675b04746 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1 --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml index 67c9de2080..2ba9050cea 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1 --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml index c0b563c663..96ada2bf1e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1 --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine @@ -49,7 +49,7 @@ MODEL_ARGS: --inference-ckpt-non-strict: true # To handle the extra_state errors --output-path: ${TENSORBOARD_PATH} --output-every-n-results: 32 - --prompt-file: ${DATA_PATH}/sharegpt/filtered-benchmark/processed.jsonl + --prompt-file: ${DATA_PATH}/text/sharegpt-vicuna/filtered/processed.jsonl --prompt-file-num-truncate: 128 # originally 1024 --num-tokens-to-generate: 128 # originally 512 --incoming-requests-per-step: 32 diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml index 024d2ede3d..a4f47d3705 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/ --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine @@ -51,7 +51,7 @@ MODEL_ARGS: --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies." --incoming-requests-per-step: 32 --use-flashinfer-fused-rope: true - + METRICS: - "generated_tokens" - "logprobs" diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml index f2d3dee390..59186f8d53 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/ --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml index 5fe1ecf5c8..612e621534 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/ --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml index 90e93dfdcd..cb06eae2e7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml @@ -10,10 +10,10 @@ MODEL_ARGS: --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true --log-memory-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 # See the mount paths defined in the top level tests/test_utils/recipes/gpt-static-inference.yaml - --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints - --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --tokenizer-type: TikTokenizer --tiktoken-pattern: v2 --distributed-backend: nccl diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml index 18fe5beff9..c080cd5f5a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/ --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml index d03c69f832..e3a4d695ea 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/ --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine diff --git a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml index d78c45e380..90a1836347 100644 --- a/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml @@ -9,7 +9,7 @@ MODEL_ARGS: --tiktoken-pattern: v2 --use-mcore-models: true --tokenizer-type: TikTokenizer - --tokenizer-model: ${DATA_PATH}/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --auto-detect-ckpt-format: true --max-tokens-to-oom: 3600000 --inference-max-seq-length: 4096 @@ -18,8 +18,8 @@ MODEL_ARGS: --micro-batch-size: 1 --no-load-optim: true --no-use-tokenizer-model-from-checkpoint-args: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mcore_mistral/model + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_mistral/nemo_minitron-0.5b/v1/ --distributed-backend: nccl --log-interval: 1 --transformer-impl: transformer_engine diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml index 3de471e8f8..199cf809ba 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml @@ -22,13 +22,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml index 21fa690e66..0983337bec 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml @@ -22,13 +22,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml index f3942d7ae4..7f7aac5d78 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml @@ -22,13 +22,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml index 76891deaa8..93418f580f 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml @@ -22,13 +22,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml index 4e55935511..7702274db5 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml @@ -10,9 +10,9 @@ MODEL_ARGS: --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true --log-memory-to-tensorboard: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mamba_hybrid_2b/checkpoint - --tokenizer-model: ${DATA_PATH}/mamba_hybrid_2b/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --tokenizer-type: TikTokenizer --tiktoken-pattern: v2 --distributed-backend: nccl diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml index 2af1fa222c..9a7769eb43 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml @@ -10,9 +10,9 @@ MODEL_ARGS: --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true --log-memory-to-tensorboard: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/mamba_hybrid_2b/checkpoint - --tokenizer-model: ${DATA_PATH}/mamba_hybrid_2b/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/checkpoint + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/mamba_hybrid_2b/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --tokenizer-type: TikTokenizer --tiktoken-pattern: v2 --distributed-backend: nccl diff --git a/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml b/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml index 447b5a094e..2daf74b89a 100644 --- a/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml +++ b/tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml @@ -3,7 +3,6 @@ ENV_VARS: NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0 NCCL_ALGO: Ring CUBLAS_WORKSPACE_CONFIG: :4096:8 - ARTIFACTS_ROOT: /workspace/checkpoints MODEL_ARGS: --num-layers: 32 --hidden-size: 4096 @@ -20,7 +19,7 @@ MODEL_ARGS: --seq-length: 4096 --max-position-embeddings: 4096 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 2200 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} @@ -48,8 +47,8 @@ MODEL_ARGS: --deterministic-mode: true --log-memory-to-tensorboard: true --dataloader-type: external - --data-path: ${DATA_PATH} - --language-model-checkpoint: ${ARTIFACTS_ROOT}/vicuna_7b_pyt/dcp/mcore-v1.5_fp32/weights + --data-path: ${DATA_PATH}/mixed/mcore_mimo_vlm/llava_pretrain_energon + --language-model-checkpoint: ${CHECKPOINT_LOAD_PATH}/model/vicuna_7b_pyt/dcp/mcore-v1.5_fp32/weights --auto-detect-ckpt-format: true --accumulate-allreduce-grads-in-fp32: true --position-embedding-type: rope diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml index f955dbf17a..cdabc4b622 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml index f5014a23b5..731ff82d8d 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml index 8e98f65315..f7fd8b2963 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml @@ -15,13 +15,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -50,7 +50,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --disable-bias-linear: true diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml index 27b2db92ca..61b5c9339b 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml index e5dd41580d..a3995df962 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml index f78250b86e..8672163186 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -53,7 +53,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --ckpt-assume-constant-structure: true --data-cache-path: ${DATA_CACHE_PATH} --bf16: true diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml similarity index 87% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml index e970e1e020..4ed0bb8900 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml index be2a2cb6a6..8e267b178b 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -52,7 +52,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --ckpt-assume-constant-structure: true --data-cache-path: ${DATA_CACHE_PATH} --bf16: true diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml index 0888531f33..9490d832f7 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -54,7 +54,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --no-bias-gelu-fusion: true diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml index 19a8b4fc63..b84bf45b89 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml index 12c43095c4..b5c774d4d3 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -51,7 +51,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml index a88a8b74b9..d02951177b 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -56,7 +56,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml index b22cd9ba9b..8c75b0a2e7 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -52,7 +52,7 @@ MODEL_ARGS: --attention-softmax-in-fp32: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml index 91a908a4fc..978babb72f 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -55,7 +55,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml index f27db4a802..b6a7c223ac 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml index 5390afcd09..4c991767ca 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml @@ -38,9 +38,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 16 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml similarity index 94% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml index 7ebd9f0d1a..a1a5219ecb 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml @@ -38,9 +38,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 16 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml similarity index 95% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml index 11d62eb149..bd56583097 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml @@ -38,9 +38,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 16 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml similarity index 95% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml index 0a37ee0849..efb1fedf93 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml @@ -39,9 +39,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 16 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml similarity index 85% rename from tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml index e46fc9246b..3ecd68b984 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -58,7 +58,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --no-bias-gelu-fusion: true diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml index 7cb050257a..c147b689e7 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -50,7 +50,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml index 2354ecd7fd..f77c2a41f6 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --no-gradient-accumulation-fusion: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: flash diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml index 7c0a103200..12e6698a5f 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --no-gradient-accumulation-fusion: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml index a01439c83c..c714e05865 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -50,7 +50,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml similarity index 82% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml index 984e8bd51f..86a05a9356 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-checkpoint-opt_param-scheduler: true --use-mcore-models: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --disable-bias-linear: true diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml index 617d2a70b5..5020d9d939 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-optim-fully-reshardable: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --bf16: true --attention-backend: unfused diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml similarity index 83% rename from tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml index 34070006ad..d763069b56 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml @@ -17,13 +17,13 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 @@ -49,7 +49,7 @@ MODEL_ARGS: --use-mcore-models: true --ckpt-format: torch_dist --dist-ckpt-save-pre-mcore-014: true - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --data-cache-path: ${DATA_CACHE_PATH} --fp16: true --apply-query-key-layer-scaling: true diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml index 8dcf744be8..cd7656d240 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --max-position-embeddings: 1024 --disable-bias-linear: true --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml rename to tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml index 2dd0fda1c2..fb438f0edd 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml @@ -18,13 +18,13 @@ MODEL_ARGS: --max-position-embeddings: 1024 --disable-bias-linear: true --train-iters: 100 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/bpe/vocab.json - --merge-file: ${DATA_PATH}/bpe/merges.txt + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml index c6e7916ea7..0e1f911079 100644 --- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml @@ -10,9 +10,9 @@ MODEL_ARGS: --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true --log-memory-to-tensorboard: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints - --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --tokenizer-type: TikTokenizer --tiktoken-pattern: v2 --distributed-backend: nccl @@ -82,4 +82,4 @@ MODEL_ARGS: --inference-repeat-n: 8 METRICS: - "generated_tokens" - - "logprobs" \ No newline at end of file + - "logprobs" diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml index df6ca00d00..1b9eaaf1f6 100644 --- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml @@ -10,9 +10,9 @@ MODEL_ARGS: --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true --log-memory-to-tensorboard: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints - --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --tokenizer-type: TikTokenizer --tiktoken-pattern: v2 --distributed-backend: nccl diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml index 2289078dd5..3ba1205619 100644 --- a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml @@ -10,9 +10,9 @@ MODEL_ARGS: --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true --log-memory-to-tensorboard: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints - --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --tokenizer-type: TikTokenizer --tiktoken-pattern: v2 --distributed-backend: nccl diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml index 3f09b79d8e..569eb969d7 100644 --- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml @@ -10,9 +10,9 @@ MODEL_ARGS: --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true --log-memory-to-tensorboard: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints - --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --tokenizer-type: TikTokenizer --tiktoken-pattern: v2 --distributed-backend: nccl diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml index 26a9f7afc1..366d2f2357 100644 --- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml @@ -10,9 +10,9 @@ MODEL_ARGS: --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true --log-memory-to-tensorboard: true - --timing-log-level: 2 - --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints - --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + --timing-log-level: 0 + --load: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/checkpoints + --tokenizer-model: ${CHECKPOINT_LOAD_PATH}/model/deepseek_16b_pyt/dcp/mcore-v1_bf16/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json --tokenizer-type: TikTokenizer --tiktoken-pattern: v2 --distributed-backend: nccl diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/model_config.yaml similarity index 98% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/model_config.yaml index 4b59ffaca8..2898070f95 100644 --- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/model_config.yaml @@ -19,7 +19,7 @@ MODEL_ARGS: --seq-length: 1024 --max-position-embeddings: 1024 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/model_config.yaml similarity index 98% rename from tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/model_config.yaml index a13b09397e..23bdaac501 100644 --- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/model_config.yaml @@ -21,7 +21,7 @@ MODEL_ARGS: --disable-vision-class-token: true --max-position-embeddings: 4096 --train-iters: 50 - --timing-log-level: 2 + --timing-log-level: 0 --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml similarity index 89% rename from tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml index e9556f5f36..c2798ecf6a 100644 --- a/tests/functional_tests/test_cases/t5/t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 10000 --eval-interval: 1000 diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml index ae465aecc6..aa0f67ff31 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 50 --eval-interval: 1000 diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml index 4df31e32ed..59c1d0f280 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 10000 --eval-interval: 1000 diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml similarity index 89% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml index 6a5a701a77..80a84a26e0 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 10000 --eval-interval: 1000 diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml index 48f79ab997..047280dec3 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 10000 --eval-interval: 1000 @@ -50,7 +50,7 @@ MODEL_ARGS: --data-cache-path: ${DATA_CACHE_PATH} --deterministic-mode: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --attention-backend: unfused --log-memory-to-tensorboard: true TEST_TYPE: regular diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml index 941f616134..1611c02251 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 50 --eval-interval: 1000 @@ -50,7 +50,7 @@ MODEL_ARGS: --data-cache-path: ${DATA_CACHE_PATH} --deterministic-mode: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --attention-backend: unfused --log-memory-to-tensorboard: true TEST_TYPE: ckpt-resume diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml index 268cd275db..12ccecb588 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 10000 --eval-interval: 1000 diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml index 8d87179647..8559fd587d 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 50 --eval-interval: 1000 diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml similarity index 88% rename from tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml index d315b91295..9c6a835571 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 10000 --eval-interval: 1000 diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml index 4a1f05c07a..dd3896ad88 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 10000 --eval-interval: 1000 @@ -50,6 +50,6 @@ MODEL_ARGS: --data-cache-path: ${DATA_CACHE_PATH} --deterministic-mode: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --log-memory-to-tensorboard: true TEST_TYPE: regular diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgx_h100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_coreweave.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_coreweave.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_dev_dgxh100_eos.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgxh100_eos.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml similarity index 84% rename from tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml rename to tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml index 9bd3c8b887..4c955dd544 100644 --- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 @@ -41,7 +41,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-validation-ppl-to-tensorboard: true --log-timers-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --log-interval: 1 --save-interval: 50 --eval-interval: 1000 @@ -50,6 +50,6 @@ MODEL_ARGS: --data-cache-path: ${DATA_CACHE_PATH} --deterministic-mode: true --ckpt-format: torch_dist - --dist-ckpt-strictness: log_all # backward compatibility for TE changes + --dist-ckpt-strictness: log_all # backward compatibility for TE changes --log-memory-to-tensorboard: true TEST_TYPE: ckpt-resume diff --git a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml index de1e2d982e..964acdba5c 100644 --- a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml @@ -37,7 +37,7 @@ MODEL_ARGS: --pipeline-model-parallel-size: 1 # Data args --data-path: ${DATA_BLEND} - --vocab-file: ${DATA_PATH}/bert-large-cased-vocab.txt + --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --split: 99982,9,9 --data-cache-path: ${DATA_CACHE_PATH} @@ -55,7 +55,7 @@ MODEL_ARGS: --log-num-zeros-in-grad: true --log-params-norm: true --log-validation-ppl-to-tensorboard: true - --timing-log-level: 2 + --timing-log-level: 0 --wandb-project: megatron-core-release-runs --wandb-exp-name: ${WANDB_EXPERIMENT} METRICS: diff --git a/tests/functional_tests/test_cases/t5/t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json diff --git a/tests/functional_tests/test_cases/t5/t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json b/tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json similarity index 100% rename from tests/functional_tests/test_cases/t5/t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json rename to tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json diff --git a/tests/test_utils/python_scripts/generate_jet_trigger_job.py b/tests/test_utils/python_scripts/generate_jet_trigger_job.py index 9c6edc0565..50d8598ae6 100644 --- a/tests/test_utils/python_scripts/generate_jet_trigger_job.py +++ b/tests/test_utils/python_scripts/generate_jet_trigger_job.py @@ -4,7 +4,7 @@ import click import yaml -from tests.test_utils.python_scripts import common +from tests.test_utils.python_scripts import recipe_parser BASE_PATH = pathlib.Path(__file__).parent.resolve() @@ -81,7 +81,7 @@ def main( ): list_of_test_cases = [ test_case - for test_case in common.load_workloads( + for test_case in recipe_parser.load_workloads( scope=scope, container_tag=container_tag, environment=environment, @@ -158,7 +158,7 @@ def main( for test_idx, test_case in enumerate(list_of_test_cases): job_tags = list(tags) - job_tags.append(f"cluster/{common.resolve_cluster_config(cluster)}") + job_tags.append(f"cluster/{recipe_parser.resolve_cluster_config(cluster)}") script = [ "export PYTHONPATH=$(pwd); " diff --git a/tests/test_utils/python_scripts/generate_local_jobs.py b/tests/test_utils/python_scripts/generate_local_jobs.py index 6a16af24a3..4a7cf2d7c1 100644 --- a/tests/test_utils/python_scripts/generate_local_jobs.py +++ b/tests/test_utils/python_scripts/generate_local_jobs.py @@ -11,7 +11,7 @@ import click import yaml -from tests.test_utils.python_scripts import common +from tests.test_utils.python_scripts import recipe_parser def load_script(config_path: str) -> str: @@ -68,7 +68,7 @@ def main( enable_lightweight_mode: bool = False, record_checkpoints: bool = False, ): - workloads = common.load_workloads( + workloads = recipe_parser.load_workloads( container_image="none", scope=scope, model=model, @@ -77,6 +77,8 @@ def main( container_tag="none", ) + print(workloads) + for workload in workloads: if workload.type == "build": continue diff --git a/tests/test_utils/python_scripts/launch_jet_workload.py b/tests/test_utils/python_scripts/launch_jet_workload.py index 254f522c6f..7dc4a7357a 100644 --- a/tests/test_utils/python_scripts/launch_jet_workload.py +++ b/tests/test_utils/python_scripts/launch_jet_workload.py @@ -17,7 +17,7 @@ from jetclient.facades.objects import log as jet_log from jetclient.services.dtos.pipeline import PipelineStatus -from tests.test_utils.python_scripts import common +from tests.test_utils.python_scripts import recipe_parser BASE_PATH = pathlib.Path(__file__).parent.resolve() DASHBOARD_ENDPOINT = os.getenv("DASHBOARD_ENDPOINT") @@ -70,7 +70,7 @@ def launch_and_wait_for_completion( ).workloads.submit( workloads=[ jetclient.JETWorkloadManifest(**workload) - for workload in common.load_workloads( + for workload in recipe_parser.load_workloads( test_case=test_case, n_repeat=n_repeat, time_limit=(1200 if enable_lightweight_mode else time_limit), @@ -83,7 +83,7 @@ def launch_and_wait_for_completion( record_checkpoints=record_checkpoints, ) ], - config_id=f"mcore/{common.resolve_cluster_config(cluster)}", + config_id=f"mcore/{recipe_parser.resolve_cluster_config(cluster)}", custom_config={ "launchers": {cluster: cluster_config}, "executors": { @@ -116,7 +116,7 @@ def launch_and_wait_for_completion( }, "outputs": { "enabled": True, - "artifacts_storages": [common.resolve_artifact_config(cluster)], + "artifacts_storages": [recipe_parser.resolve_artifact_config(cluster)], }, }, wait_for_validation=True, @@ -288,6 +288,7 @@ def is_flaky_failure(concat_allranks_logs: str) -> bool: or "Unpack failed: incomplete input" in concat_allranks_logs or "unspecified launch failure" in concat_allranks_logs or "free(): corrupted unsorted chunks" in concat_allranks_logs + or "Segfault encountered" in concat_allranks_logs ) diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py index d0ba6c4fe8..b068374e30 100644 --- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py +++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py @@ -1,10 +1,44 @@ +import logging import os import pathlib +import sys +from typing import Optional import click import nemo_run as run -from tests.test_utils.python_scripts import common +from tests.test_utils.python_scripts import recipe_parser + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def is_flaky_failure(concat_allranks_logs: str) -> bool: + """Assumes that certain keywords hint towards intermittent failures""" + + return ( + "The server socket has failed to listen on any local network address." + in concat_allranks_logs + or "Some NCCL operations have failed or timed out." in concat_allranks_logs + or "uncorrectable ECC error encountered" in concat_allranks_logs + or "illegal memory access" in concat_allranks_logs + or "illegal instruction" in concat_allranks_logs + or "torch.distributed.DistNetworkError" in concat_allranks_logs + or "Segmentation fault" in concat_allranks_logs + or "found NaN in" in concat_allranks_logs + or "For debugging consider passing CUDA_LAUNCH_BLOCKING=1" in concat_allranks_logs + or "double free or corruption" in concat_allranks_logs + or "Call to CUDA function failed." in concat_allranks_logs + or "Connection reset by peer" in concat_allranks_logs + or "invalid pointer" in concat_allranks_logs + or "malloc(): unaligned tcache chunk detected" in concat_allranks_logs + or "zmq.error.ZMQError: Address already in use" in concat_allranks_logs + or "We couldn't connect to 'https://huggingface.co'" in concat_allranks_logs + or "Unpack failed: incomplete input" in concat_allranks_logs + or "unspecified launch failure" in concat_allranks_logs + or "free(): corrupted unsorted chunks" in concat_allranks_logs + or "Segfault encountered" in concat_allranks_logs + ) @click.command() @@ -13,8 +47,20 @@ @click.option("--test-case", required=True, type=str, help="Test case of the workload") @click.option("--environment", required=True, type=str, help="Environment of the workload") @click.option("--platform", required=True, type=str, help="Platform of the workload") -def main(scope, model, test_case, environment, platform): - workloads = common.load_workloads( +@click.option("--container-image", required=True, type=str, help="Container image of the workload") +@click.option("--data-dir", required=False, type=str, help="Data directory of the workload") +@click.option("--tag", required=False, type=str, help="Tag of the workload") +def main( + scope, + model, + test_case, + environment, + platform, + container_image, + data_dir: Optional[str] = None, + tag: Optional[str] = None, +): + workloads = recipe_parser.load_workloads( container_image="none", scope=scope, model=model, @@ -22,17 +68,17 @@ def main(scope, model, test_case, environment, platform): environment=environment, container_tag="none", platform=platform, + tag=tag, ) workloads = [workload for workload in workloads if workload.type != "build"] - print(workloads) assert len(workloads) == 1, f"Expected exactly one workload, got {len(workloads)}" workload = workloads[0] magic_values = dict(workload.spec) - magic_values["assets_dir"] = "$OUTPUT_PATH" - magic_values["artifacts_dir"] = "$OUTPUT_PATH" + magic_values["assets_dir"] = "/opt/megatron-lm/assets_dir" + magic_values["artifacts_dir"] = "/opt/megatron-lm/artifacts_dir" magic_values["environment"] = environment magic_values["test_case"] = workload.spec["test_case"] magic_values["name"] = workload.spec["name"].format(**magic_values) @@ -40,17 +86,13 @@ def main(scope, model, test_case, environment, platform): inline_script = run.Script(inline=workload.spec["script"]) - artifacts = [ - "{host_path}:{mount_path}".format( - mount_path=mount_path, host_path=str(pathlib.Path("/root") / host_path) - ) - for mount_path, host_path in workload.spec["artifacts"].items() - ] + artifacts = [] artifacts.append(f"{os.getcwd()}:/opt/megatron-lm") - print(artifacts) + if data_dir: + artifacts.append(f"{pathlib.Path(data_dir)}:/mnt/artifacts") executor = run.DockerExecutor( - container_image="megatron-core", + container_image=container_image, num_gpus=-1, runtime="nvidia", ipc_mode="host", @@ -59,14 +101,44 @@ def main(scope, model, test_case, environment, platform): "PYTHONUNBUFFERED": "1", "OUTPUT_PATH": os.getcwd(), "ENABLE_LIGHTWEIGHT_MODE": "true", + "N_REPEAT": "1", }, packager=run.Packager(), volumes=artifacts, ) - with run.Experiment("docker-experiment", executor=executor, log_level="INFO") as exp: - _ = exp.add([inline_script], tail_logs=False, name="task-1") - exp.run(detach=False, tail_logs=True, sequential=False) + n_attempts = 0 + while n_attempts < 3: + with run.Experiment("mcore-ci-test", executor=executor, log_level="INFO") as exp: + _ = exp.add([inline_script], tail_logs=False, name="task-1") + + exp.dryrun(log=True) + exp.run(detach=False, tail_logs=True, sequential=False) + + result_dict = exp.status(return_dict=True) + _, job_dict = list(result_dict.items())[0] + succeeded = str(job_dict["status"]) == "SUCCEEDED" + + if succeeded: + logger.info(f"Job succeeded with status: {job_dict["status"]}") + sys.exit(0) + + logger.error(f"Job failed with status: {job_dict["status"]}") + log_file_paths = pathlib.Path(os.getcwd()).glob("assets_dir/logs/*/*/attempt_0/*/std*.log") + all_ranks_all_logs = [] + for log_file_path in log_file_paths: + with open(log_file_path, "r") as f: + all_logs = f.readlines() + all_ranks_all_logs.extend(all_logs) + all_ranks_all_logs_string = "\n".join(all_ranks_all_logs) + if is_flaky_failure(all_ranks_all_logs_string): + logger.warning("Detected flaky failure, attempt restart.") + n_attempts += 1 + continue + + sys.exit(1) + + sys.exit(1) if __name__ == "__main__": diff --git a/tests/test_utils/python_scripts/notify.py b/tests/test_utils/python_scripts/notify.py index 4cff0db7f6..7da00dc401 100644 --- a/tests/test_utils/python_scripts/notify.py +++ b/tests/test_utils/python_scripts/notify.py @@ -22,17 +22,6 @@ def get_gitlab_handle(): return gitlab.Gitlab(f"https://{GITLAB_ENDPOINT}", private_token=os.getenv("RO_API_TOKEN")) -def extract_surrounding_text(text, keyword="error", context=400, fallback_length=800): - index = text.rfind(keyword) # Find the last occurrence - if index == -1: - return text[-fallback_length:] # Return last 800 chars if keyword is not found - - start = max(0, index - context) # Ensure we don't go below 0 - end = min(len(text), index + len(keyword)) # Ensure we don't exceed the text length - - return text[start:end] - - def get_jobs_per_bridge(pipeline_id: int, type_of_job: str): bridge = {} for pipeline_bridge in ( diff --git a/tests/test_utils/python_scripts/common.py b/tests/test_utils/python_scripts/recipe_parser.py similarity index 89% rename from tests/test_utils/python_scripts/common.py rename to tests/test_utils/python_scripts/recipe_parser.py index 23c191cc39..e26d04d6f2 100644 --- a/tests/test_utils/python_scripts/common.py +++ b/tests/test_utils/python_scripts/recipe_parser.py @@ -1,12 +1,16 @@ import copy import itertools +import logging import pathlib from typing import List, Optional +import click import yaml BASE_PATH = pathlib.Path(__file__).parent.resolve() +logger = logging.getLogger(__name__) + class dotdict(dict): """dot.notation access to dictionary attributes""" @@ -25,6 +29,8 @@ def resolve_cluster_config(cluster: str) -> str: return "draco-oci-ord" if cluster == "dgxh100_coreweave": return "coreweave" + if cluster == "ghci": + return "ghci" raise ValueError(f"Unknown cluster {cluster} provided.") @@ -95,15 +101,15 @@ def filter_by_test_case(workload_manifests: List[dotdict], test_case: str) -> Op workload_manifests = list( workload_manifest for workload_manifest in workload_manifests - if workload_manifest.spec["test_case"] == test_case + if workload_manifest["spec"]["test_case"] == test_case ) if len(workload_manifests) > 1: - print("Duplicate test_case found!") + logger.info("Duplicate test_case found!") return None if len(workload_manifests) == 0: - print("No test_case found!") + logger.info("No test_case found!") return None return workload_manifests[0] @@ -118,7 +124,7 @@ def filter_by_scope(workload_manifests: List[dotdict], scope: str) -> List[dotdi ) if len(workload_manifests) == 0: - print("No test_case found!") + logger.info("No test_case found!") return [] return workload_manifests @@ -136,7 +142,7 @@ def filter_by_environment(workload_manifests: List[dotdict], environment: str) - ) if len(workload_manifests_copy) == 0: - print("No test_case found!") + logger.info("No test_case found!") return [] return workload_manifests_copy @@ -153,7 +159,7 @@ def filter_by_platform(workload_manifests: List[dotdict], platform: str) -> List ) if len(workload_manifests) == 0: - print("No test_case found!") + logger.info("No test_case found!") return [] return workload_manifests @@ -168,7 +174,7 @@ def filter_by_model(workload_manifests: List[dotdict], model: str) -> List[dotdi ) if len(workload_manifests) == 0: - print("No test_case found!") + logger.info("No test_case found!") return [] return workload_manifests @@ -184,7 +190,7 @@ def filter_by_tag(workload_manifests: List[dotdict], tag: str) -> List[dotdict]: ) if len(workload_manifests) == 0: - print("No test_case found!") + logger.info("No test_case found!") return [] return workload_manifests @@ -200,7 +206,7 @@ def filter_by_test_cases(workload_manifests: List[dotdict], test_cases: str) -> ) if len(workload_manifests) == 0: - print("No test_case found!") + logger.info("No test_case found!") return [] return workload_manifests @@ -269,7 +275,9 @@ def load_workloads( workload.spec["artifacts"] = { key: value.replace(r"{platforms}", workload.spec["platforms"]) for key, value in ( - workload.spec["artifacts"].items() if "artifacts" in workload.spec else {} + workload.spec["artifacts"].items() + if "artifacts" in workload.spec and workload.spec["artifacts"] is not None + else {} ) } @@ -288,9 +296,16 @@ def load_workloads( return workloads -if __name__ == "__main__": - workflows = load_workloads(container_tag="main") +@click.command() +@click.option("--model", required=False, type=str, default=None, help="Model to select") +@click.option("--test-case", required=False, type=str, default=None, help="Test case to select") +def main(model: Optional[str], test_case: Optional[str]): + workflows = load_workloads(container_tag="main", model=model, test_case=test_case) # Save workflows to YAML file output_file = "workflows.yaml" with open(output_file, "w") as f: yaml.dump([dict(workflow) for workflow in workflows], f) + + +if __name__ == "__main__": + main() diff --git a/tests/test_utils/recipes/bert.yaml b/tests/test_utils/recipes/bert.yaml index f0be62e470..e6d59f67fb 100644 --- a/tests/test_utils/recipes/bert.yaml +++ b/tests/test_utils/recipes/bert.yaml @@ -11,9 +11,6 @@ spec: platforms: dgx_a100 time_limit: n_repeat: - artifacts: - /workspace/data/bert_data: text/the_pile/bert_shard00 - /workspace/checkpoints/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G_dev: model/mcore_bert/bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_{platforms}_1N8G_dev/28359448 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -42,12 +39,12 @@ spec: cd /opt/megatron-lm NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g') ARGUMENTS=( - "DATA_PATH=/workspace/data/bert_data" + "DATA_PATH=/mnt/artifacts" "DATA_CACHE_PATH=/workspace/data/cache" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "TRAINING_SCRIPT_PATH=pretrain_bert.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" @@ -59,43 +56,38 @@ spec: bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}} products: - - test_case: [bert_mr_mcore_tp2_pp2_dgx_a100_1N8G] + - test_case: [bert_mcore_tp2_pp2] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G] + - test_case: [bert_mcore_tp2_pp2_local_spec] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G] + - test_case: [bert_mcore_tp2_pp2_resume_torch_dist] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G] + - test_case: [bert_mcore_tp2_pp2_resume_torch_dist_local_spec] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2] + - test_case: [bert_mcore_tp1_pp2] products: - environment: [dev] scope: [nightly] platforms: [dgx_h100] - - test_case: [bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2] + - test_case: [bert_mcore_tp1_pp4_vp2] products: - environment: [dev] scope: [nightly] platforms: [dgx_h100] - - test_case: [bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1] + - test_case: [bert_mcore_tp4_pp1] products: - environment: [dev] scope: [nightly] platforms: [dgx_h100] - # - test_case: [bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G] - # products: - # - environment: [dev] - # scope: [mr] - # platforms: [dgx_h100] diff --git a/tests/test_utils/recipes/common.yaml b/tests/test_utils/recipes/ckpt_converter.yaml similarity index 99% rename from tests/test_utils/recipes/common.yaml rename to tests/test_utils/recipes/ckpt_converter.yaml index 5d70586995..f78f184a32 100644 --- a/tests/test_utils/recipes/common.yaml +++ b/tests/test_utils/recipes/ckpt_converter.yaml @@ -34,6 +34,7 @@ spec: rm -rf megatron; cp -a /opt/megatron-lm/megatron ./ script: |- ls + cd /opt/megatron-lm torchrun \ diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml index e96bcaa4ee..47b8d34615 100644 --- a/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml +++ b/tests/test_utils/recipes/gpt-dynamic-inference-cuda-graphs.yaml @@ -10,9 +10,6 @@ spec: gpus: 1 n_repeat: 1 platforms: dgx_a100 - artifacts: - /workspace/data/mcore_mistral/model: model/mcore_mistral/nemo_minitron-0.5b/v1 - /workspace/data/mcore_mistral/tokenizer: model/mcore_mistral/nemo_minitron-0.5b/v1 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -44,7 +41,7 @@ spec: --tee "0:3,7:3" \ --redirects "3" \ --nproc_per_node 1 \ - tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py --checkpoint-dir /workspace/data/mcore_mistral/model --tokenizer-model /workspace/data/mcore_mistral/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json + tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py --checkpoint-dir /workspace/data/model/mcore_mistral --tokenizer-model /workspace/data/model/mcore_mistral/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json products: - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation] diff --git a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml index b276ac66d8..dd8cf6b945 100644 --- a/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml +++ b/tests/test_utils/recipes/gpt-dynamic-inference-with-coordinator.yaml @@ -10,9 +10,6 @@ spec: gpus: 1 n_repeat: 1 platforms: dgx_a100 - artifacts: - /workspace/data/mcore_mistral/model: model/mcore_mistral/nemo_minitron-0.5b/v1 - /workspace/data/mcore_mistral/tokenizer: model/mcore_mistral/nemo_minitron-0.5b/v1 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -40,9 +37,9 @@ spec: cd /opt/megatron-lm ARGUMENTS=( - "CHECKPOINT_LOAD_PATH=/workspace/data" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "CHECKPOINT_SAVE_PATH=/tmp/checkpoints" - "DATA_PATH=/workspace/data" + "DATA_PATH=null" "DATA_CACHE_PATH=/workspace/data/cache" "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" @@ -50,7 +47,7 @@ spec: "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json" "N_REPEAT={n_repeat}" - "ENABLE_LIGHTWEIGHT_MODE=false" + "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}" "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}" ) @@ -65,5 +62,5 @@ products: - test_case: [gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] diff --git a/tests/test_utils/recipes/gpt-dynamic-inference.yaml b/tests/test_utils/recipes/gpt-dynamic-inference.yaml index cd7bfd3fbe..914d3c0a75 100644 --- a/tests/test_utils/recipes/gpt-dynamic-inference.yaml +++ b/tests/test_utils/recipes/gpt-dynamic-inference.yaml @@ -10,10 +10,6 @@ spec: gpus: 1 n_repeat: 1 platforms: dgx_a100 - artifacts: - /workspace/data/mcore_mistral/model: model/mcore_mistral/nemo_minitron-0.5b/v1 - /workspace/data/mcore_mistral/tokenizer: model/mcore_mistral/nemo_minitron-0.5b/v1 - /workspace/data/sharegpt/filtered-benchmark: text/sharegpt-vicuna/filtered script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -41,17 +37,17 @@ spec: cd /opt/megatron-lm ARGUMENTS=( - "CHECKPOINT_LOAD_PATH=/workspace/data" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "CHECKPOINT_SAVE_PATH=/tmp/checkpoints" - "DATA_PATH=/workspace/data" - "DATA_CACHE_PATH=/workspace/data/cache" + "DATA_PATH=/mnt/artifacts/" + "DATA_CACHE_PATH=/workspace/data/cache" "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json" "N_REPEAT={n_repeat}" - "ENABLE_LIGHTWEIGHT_MODE=false" + "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}" "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}" ) @@ -61,17 +57,17 @@ products: - test_case: [gpt_dynamic_inference_tp1_pp1_583m_logitsmatch] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [gpt_dynamic_inference_tp8_pp1_583m_logitsmatch] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only] products: diff --git a/tests/test_utils/recipes/gpt-grads.yaml b/tests/test_utils/recipes/gpt-grads.yaml index ea56936231..cdd3a050ff 100644 --- a/tests/test_utils/recipes/gpt-grads.yaml +++ b/tests/test_utils/recipes/gpt-grads.yaml @@ -10,11 +10,6 @@ spec: gpus: 8 n_repeat: 1 platforms: dgx_h100 - artifacts: - /workspace/data/gpt3_data: text/the_pile/shard00 - /workspace/checkpoints/gpt3_mr_mcore_reruns_resume_check_grads_dev: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2 - /workspace/checkpoints/gpt3_mr_mcore_reruns_resume_check_grads_lts: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2 - /workspace/checkpoints/gpt_teacher: model/gpt_dummy_pyt/ckpt/24.10.0_bf16_teacher script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -47,12 +42,12 @@ spec: # Note: This test is very expensive, so we hardcode N_REPEAT=1 ARGUMENTS=( - "DATA_PATH=/workspace/data/gpt3_data" + "DATA_PATH=/mnt/artifacts" "DATA_CACHE_PATH=/workspace/data/cache" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "TRAINING_SCRIPT_PATH=pretrain_gpt.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" @@ -64,8 +59,8 @@ spec: bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}} products: - - test_case: [gpt3_mr_mcore_reruns_resume_check_grads] + - test_case: [gpt3_mcore_reruns_resume_check_grads] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] diff --git a/tests/test_utils/recipes/gpt-nemo.yaml b/tests/test_utils/recipes/gpt-nemo.yaml index 848c1a5607..14c2106ed3 100644 --- a/tests/test_utils/recipes/gpt-nemo.yaml +++ b/tests/test_utils/recipes/gpt-nemo.yaml @@ -44,7 +44,7 @@ spec: "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}" "TRAINING_SCRIPT_PATH=\"nemo llm pretrain -y --factory {nemo_model}\"" "TRAINING_PARAMS_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=/opt/megatron-lm/tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" @@ -57,36 +57,36 @@ products: - test_case: [llama3-nemo_8b_mr_mbs1_gbs8_mcore_te_8experts_tp2_ep2_pp2_dgx_a100_1N8G] products: - environment: [dev] - scope: [mr] + scope: [deprecated] platforms: [dgx_h100] nemo_model: [llama3_8b] - test_case: [llama3-nemo_8b_mr_mbs4_gbs64_mcore_te_tp1_pp1_cp2_dgx_a100_1N8G] products: - environment: [dev] - scope: [mr] + scope: [deprecated] platforms: [dgx_h100] nemo_model: [llama3_8b] - test_case: [mixtral-nemo_8x7b_mr_mbs1_gbs8_mcore_te_tp2_pp1_ep2_1N8G] products: - environment: [dev] - scope: [mr] + scope: [deprecated] platforms: [dgx_h100] nemo_model: [mixtral_8x7b] - test_case: [gemma2-nemo_2b_mr_mbs1_gbs8_mcore_te_tp4_pp1_cp1_1N8G] products: - environment: [dev] - scope: [mr] + scope: [deprecated] platforms: [dgx_h100] nemo_model: [gemma2_2b] - test_case: [bert-nemo_340m_mr_mbs2_gbs32_mcore_te_tp2_pp2_1N8G] products: - environment: [dev] - scope: [mr] + scope: [deprecated] platforms: [dgx_h100] nemo_model: [bert_340m] - test_case: [t5-nemo_220m_mr_mbs4_gbs64_te_tp1_pp1_1N8G] products: - environment: [dev] - scope: [mr] + scope: [deprecated] platforms: [dgx_h100] nemo_model: [t5_220m] diff --git a/tests/test_utils/recipes/gpt-static-inference.yaml b/tests/test_utils/recipes/gpt-static-inference.yaml index 424c424bbb..9ed7f6c09f 100644 --- a/tests/test_utils/recipes/gpt-static-inference.yaml +++ b/tests/test_utils/recipes/gpt-static-inference.yaml @@ -10,11 +10,6 @@ spec: gpus: 1 n_repeat: 1 platforms: dgx_a100 - artifacts: - /workspace/data/mcore_mistral/model: model/mcore_mistral/nemo_minitron-0.5b/v1 - /workspace/data/mcore_mistral/tokenizer: model/mcore_mistral/nemo_minitron-0.5b/v1 - /workspace/data/deepseek_16b_pyt/model: model/deepseek_16b_pyt/dcp/mcore-v1_bf16 - /workspace/data/deepseek_16b_pyt/tokenizer: model/deepseek_16b_pyt/dcp/mcore-v1_bf16 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -42,17 +37,17 @@ spec: cd /opt/megatron-lm ARGUMENTS=( - "CHECKPOINT_LOAD_PATH=/workspace/data" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts/" "CHECKPOINT_SAVE_PATH=/tmp/checkpoints" - "DATA_PATH=/workspace/data" - "DATA_CACHE_PATH=/workspace/data/cache" + "DATA_PATH=null" + "DATA_CACHE_PATH=/workspace/data/cache" "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json" "N_REPEAT={n_repeat}" - "ENABLE_LIGHTWEIGHT_MODE=false" + "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}" "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}" ) @@ -62,20 +57,20 @@ products: - test_case: [gpt_static_inference_tp1_pp1_583m_logitsmatch] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [gpt_static_inference_tp1_pp1_583m_cudagraphs] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] diff --git a/tests/test_utils/recipes/gpt.yaml b/tests/test_utils/recipes/gpt.yaml index b29fc21e87..cef0745f12 100644 --- a/tests/test_utils/recipes/gpt.yaml +++ b/tests/test_utils/recipes/gpt.yaml @@ -10,19 +10,6 @@ spec: gpus: 8 n_repeat: 5 platforms: dgx_a100 - artifacts: - /workspace/data/gpt3_data: text/the_pile/shard00 - /workspace/checkpoints/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_{platforms}_1N8G_dev/24475828 - /workspace/checkpoints/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_{platforms}_1N8G_dev/28359448 - /workspace/checkpoints/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_{platforms}_1N8G_dev/28359448 - /workspace/checkpoints/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_{platforms}_1N8G_dev/28359448 - /workspace/checkpoints/gpt3_mr_mcore_reruns_resume_dev: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2 - /workspace/checkpoints/gpt3_mr_mcore_reruns_resume_lts: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2 - /workspace/checkpoints/gpt3_mr_mcore_reruns_reshard_dev: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2 - /workspace/checkpoints/gpt3_mr_mcore_reruns_reshard_lts: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2 - /workspace/checkpoints/gpt3_mr_mcore_reruns_persistent_2_dev: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-persistent_v2 - /workspace/checkpoints/gpt3_mr_mcore_reruns_persistent_2_lts: model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-persistent_v2 - /workspace/checkpoints/gpt_teacher: model/gpt_dummy_pyt/ckpt/24.10.0_bf16_teacher script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -53,12 +40,12 @@ spec: NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g') ARGUMENTS=( - "DATA_PATH=/workspace/data/gpt3_data" + "DATA_PATH=/mnt/artifacts" "DATA_CACHE_PATH=/workspace/data/cache" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts/" "TRAINING_SCRIPT_PATH=pretrain_gpt.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" @@ -73,43 +60,43 @@ products: ####################################################################### # Nightly tests: Run both DEV and LTS unless something is flaky # ####################################################################### - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather] + - test_case: [gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather] products: - environment: [dev] scope: [nightly] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2] + - test_case: [gpt3_mcore_tp1_pp2] products: - environment: [lts] scope: [nightly] - environment: [dev] scope: [nightly] platforms: [dgx_h100] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist] + - test_case: [gpt3_mcore_tp1_pp2_resume_torch_dist] products: - environment: [dev, lts] scope: [nightly] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4] + - test_case: [gpt3_mcore_tp1_pp4] products: - environment: [lts] scope: [nightly] - environment: [dev] scope: [nightly] platforms: [dgx_h100] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist] + - test_case: [gpt3_mcore_tp1_pp4_resume_torch_dist] products: - environment: [dev, lts] scope: [nightly] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch] + - test_case: [gpt3_mcore_tp4_pp1_resume_torch] products: - environment: [lts] scope: [nightly] - environment: [dev] scope: [nightly] platforms: [dgx_h100] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist] + - test_case: [gpt3_mcore_tp4_pp1_resume_torch_dist] products: - environment: [lts] scope: [nightly] @@ -120,326 +107,326 @@ products: # MR tests: Mostly DEV on MR, and LTS on nightly cadence, except for # # some very important tests. # ####################################################################### - - test_case: [gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - # - test_case: [gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic] # products: # - environment: [dev] # scope: [mr] # - environment: [lts] # scope: [nightly] # Non-deterministic: #487 - - test_case: [gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] # outdated TE: #501 - - test_case: [gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] # non-determinism: #436 - - test_case: [gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] # non-determinism: #437 - - test_case: [gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - # - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] # Hangs: #513 # - environment: [lts] # scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather] products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] # Hangs: #513 - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied] products: # - environment: [dev] # scope: [mr] # Hangs: #513 # platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap] products: # - environment: [dev] # scope: [mr] # Hangs: #513 # platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cp2] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_dp_last_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cp2_nondeterministic] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_mla_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_mla] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader] products: # - environment: [dev] # scope: [mr] # Hangs: #513 # platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G] + - test_case: [gpt3_mcore_tp2_pp2_uninstall_te] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed] + - test_case: [gpt3_7b_tp1_pp4_memory_speed] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] # OOM: #434 - - test_case: [gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed] + - test_case: [gpt3_7b_tp4_pp1_memory_speed] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] # OOM: #434 - - test_case: [gpt3_mr_mcore_te_tp2_zp_z3_resume_fsdp_dtensor_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp1_modelopt_distill_resume] + - test_case: [gpt3_mcore_te_tp2_pp1_modelopt_distill_resume] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] # Outdated: #502 - # - test_case: [gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist] # products: # - environment: [dev] # scope: [mr] # Broken: #484 @@ -448,120 +435,120 @@ products: ####################################################################### # Super important MR tests that run for both DEV and LTS per MR # ####################################################################### - - test_case: [gpt3_mr_mcore_reruns_persistent_1] + - test_case: [gpt3_mcore_reruns_persistent_1] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] - # - test_case: [gpt3_mr_mcore_reruns_persistent_2] + # - test_case: [gpt3_mcore_reruns_persistent_2] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer] products: - environment: [lts] scope: [mr] - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [dev] scope: [mr-slim] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [mr] - - test_case: [gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather] products: - environment: [lts] scope: [mr] - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [dev] scope: [mr-slim] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - environment: [lts] scope: [mr] - # - test_case: [gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] - # - test_case: [gpt3_mr_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] - # - test_case: [gpt3_mr_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] - # - test_case: [gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_a100, dgx_h100] - # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap] + # - test_case: [gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap] # products: # - environment: [dev] # scope: [weekly] # platforms: [dgx_b200] - # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp] + # - test_case: [gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp] # products: # - environment: [dev] # scope: [weekly] # platforms: [dgx_b200] - # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap] + # - test_case: [gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap] # products: # - environment: [dev] # scope: [weekly] # platforms: [dgx_b200] - # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_mxfp8_tp_sp_cp] + # - test_case: [gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp] # products: # - environment: [dev] # scope: [weekly] # platforms: [dgx_b200] - # - test_case: [gpt3_345m_weekly_dgx_b200_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap] + # - test_case: [gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap] # products: # - environment: [dev] # scope: [weekly] # platforms: [dgx_b200] - - test_case: [gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap] + - test_case: [gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap] products: - environment: [dev] scope: [weekly] platforms: [dgx_h100] - # - test_case: [gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp] + # - test_case: [gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp] # products: # - environment: [dev] # scope: [weekly] # platforms: [dgx_h100] - # - test_case: [gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap] + # - test_case: [gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap] # products: # - environment: [dev] # scope: [weekly] # platforms: [dgx_h100] - - test_case: [gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap] + - test_case: [gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap] products: - environment: [dev] scope: [weekly] platforms: [dgx_h100] - # - test_case: [gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te] # products: # - environment: [dev, lts] # scope: [mr] # Non-deterministic: #483 diff --git a/tests/test_utils/recipes/mamba-static-inference.yaml b/tests/test_utils/recipes/mamba-static-inference.yaml index f0e29999d4..e727c4db5e 100644 --- a/tests/test_utils/recipes/mamba-static-inference.yaml +++ b/tests/test_utils/recipes/mamba-static-inference.yaml @@ -10,8 +10,6 @@ spec: gpus: 1 n_repeat: 1 platforms: dgx_a100 - artifacts: - /workspace/data/mamba_hybrid_2b: model/mamba_hybrid_2b/dcp/mcore-v1_bf16 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -39,9 +37,9 @@ spec: cd /opt/megatron-lm ARGUMENTS=( - "CHECKPOINT_LOAD_PATH=/workspace/data" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "CHECKPOINT_SAVE_PATH=/tmp/checkpoints" - "DATA_PATH=/workspace/data" + "DATA_PATH=null" "DATA_CACHE_PATH=/workspace/data/cache" "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" @@ -49,7 +47,7 @@ spec: "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json" "N_REPEAT={n_repeat}" - "ENABLE_LIGHTWEIGHT_MODE=false" + "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}" "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}" ) @@ -59,10 +57,10 @@ products: - test_case: [hybrid_static_inference_tp1_pp1_2B_logitsmatch] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs] products: - environment: [dev] scope: [mr] - platforms: [dgx_h100] + platforms: [dg x_h100] diff --git a/tests/test_utils/recipes/mamba.yaml b/tests/test_utils/recipes/mamba.yaml index 7c1f9a3627..0f8a4085ea 100644 --- a/tests/test_utils/recipes/mamba.yaml +++ b/tests/test_utils/recipes/mamba.yaml @@ -10,8 +10,6 @@ spec: gpus: 8 n_repeat: 5 platforms: dgx_a100 - artifacts: - /workspace/data/gpt3_data: text/the_pile/shard00 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -40,12 +38,12 @@ spec: cd /opt/megatron-lm ARGUMENTS=( - "DATA_PATH=/workspace/data/gpt3_data" + "DATA_PATH=/mnt/artifacts" "DATA_CACHE_PATH=/workspace/data/cache" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}" "TRAINING_SCRIPT_PATH=pretrain_mamba.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" @@ -60,7 +58,7 @@ products: - test_case: [hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] # - environment: [lts] # disabled until triton is bumped # scope: [nightly] @@ -76,7 +74,7 @@ products: - test_case: [hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] # - environment: [lts] # disabled until triton is bumped # scope: [nightly] @@ -84,7 +82,7 @@ products: - test_case: [hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] # - environment: [lts] # disabled until triton is bumped # scope: [nightly] diff --git a/tests/test_utils/recipes/mimo.yaml b/tests/test_utils/recipes/mimo.yaml index dfde82656d..41e735776f 100644 --- a/tests/test_utils/recipes/mimo.yaml +++ b/tests/test_utils/recipes/mimo.yaml @@ -11,7 +11,7 @@ spec: platforms: dgx_h100 artifacts: /workspace/data/llava_pretrain_energon: mixed/mcore_mimo_vlm/llava_pretrain_energon - /workspace/checkpoints/vicuna_7b_pyt/dcp/mcore-v1.5_fp32: model/vicuna_7b_pyt/dcp/mcore-v1.5_fp32 + /mnt/artifacts/model/vicuna_7b_pyt/dcp/mcore-v1.5_fp32: model/vicuna_7b_pyt/dcp/mcore-v1.5_fp32 time_limit: n_repeat: test_case: @@ -44,12 +44,12 @@ spec: cd /opt/megatron-lm NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g') ARGUMENTS=( - "DATA_PATH='/workspace/data/llava_pretrain_energon/'" - "DATA_CACHE_PATH='-'" + "DATA_PATH=/mnt/artifacts" + "DATA_CACHE_PATH=/workspace/data/cache" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}/checkpoints" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "TRAINING_SCRIPT_PATH=./examples/mimo/train.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" diff --git a/tests/test_utils/recipes/moe-dynamic-inference.yaml b/tests/test_utils/recipes/moe-dynamic-inference.yaml index 36d09cb36c..c9d1be57ad 100644 --- a/tests/test_utils/recipes/moe-dynamic-inference.yaml +++ b/tests/test_utils/recipes/moe-dynamic-inference.yaml @@ -10,9 +10,6 @@ spec: gpus: 8 n_repeat: 1 platforms: dgx_a100 - artifacts: - /workspace/data/deepseek_16b_pyt/model: model/deepseek_16b_pyt/dcp/mcore-v1_bf16 - /workspace/data/deepseek_16b_pyt/tokenizer: model/deepseek_16b_pyt/dcp/mcore-v1_bf16 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -40,9 +37,9 @@ spec: cd /opt/megatron-lm ARGUMENTS=( - "CHECKPOINT_LOAD_PATH=/workspace/data" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "CHECKPOINT_SAVE_PATH=/tmp/checkpoints" - "DATA_PATH=/workspace/data" + "DATA_PATH=null" "DATA_CACHE_PATH=/workspace/data/cache" "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_dynamic_inference.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" @@ -50,7 +47,7 @@ spec: "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json" "N_REPEAT={n_repeat}" - "ENABLE_LIGHTWEIGHT_MODE=false" + "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}" "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}" ) @@ -60,7 +57,7 @@ products: - test_case: [gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch] products: diff --git a/tests/test_utils/recipes/moe-static-inference.yaml b/tests/test_utils/recipes/moe-static-inference.yaml index c1411283ad..c11cd29459 100644 --- a/tests/test_utils/recipes/moe-static-inference.yaml +++ b/tests/test_utils/recipes/moe-static-inference.yaml @@ -10,9 +10,6 @@ spec: gpus: 8 n_repeat: 1 platforms: dgx_a100 - artifacts: - /workspace/data/deepseek_16b_pyt/model: model/deepseek_16b_pyt/dcp/mcore-v1_bf16 - /workspace/data/deepseek_16b_pyt/tokenizer: model/deepseek_16b_pyt/dcp/mcore-v1_bf16 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -40,9 +37,9 @@ spec: cd /opt/megatron-lm ARGUMENTS=( - "CHECKPOINT_LOAD_PATH=/workspace/data" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "CHECKPOINT_SAVE_PATH=/tmp/checkpoints" - "DATA_PATH=/workspace/data" + "DATA_PATH=null" "DATA_CACHE_PATH=/workspace/data/cache" "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" @@ -50,7 +47,7 @@ spec: "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/generations_{environment}_$CLUSTER.json" "N_REPEAT={n_repeat}" - "ENABLE_LIGHTWEIGHT_MODE=false" + "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}" "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}" ) diff --git a/tests/test_utils/recipes/moe.yaml b/tests/test_utils/recipes/moe.yaml index 972288bd90..c26faf9109 100644 --- a/tests/test_utils/recipes/moe.yaml +++ b/tests/test_utils/recipes/moe.yaml @@ -10,10 +10,6 @@ spec: gpus: 8 n_repeat: 5 platforms: dgx_a100 - artifacts: - /workspace/data/gpt3_data: text/the_pile/shard00 - /workspace/checkpoints/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_{platforms}_1N8G_dev/28359448 - /workspace/checkpoints/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G_dev: model/mcore_gpt/gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_{platforms}_1N8G_dev/28359448 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -44,12 +40,12 @@ spec: NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g') ARGUMENTS=( - "DATA_PATH=/workspace/data/gpt3_data" + "DATA_PATH=/mnt/artifacts" "DATA_CACHE_PATH=/workspace/data/cache" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "TRAINING_SCRIPT_PATH=pretrain_gpt.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" @@ -64,52 +60,52 @@ products: ####################################################################### # Nightly tests: Run both DEV and LTS unless something is flaky # ####################################################################### - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel] + - test_case: [gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel] + - test_case: [gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel] + - test_case: [gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last] + - test_case: [gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last] + - test_case: [gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic] + - test_case: [gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last] + - test_case: [gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - environment: [lts] scope: [nightly] - # - test_case: [gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts] + # - test_case: [gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts] # products: # non-determinism: #478 # - environment: [dev, lts] # scope: [nightly] @@ -120,69 +116,69 @@ products: # MR tests: Mostly DEV on MR, and LTS on nightly cadence, except for # # some very important tests. # ####################################################################### - - test_case: [gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - # - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8] + # - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] # hang: #513 - - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph] + - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] # hang: #513 - # - test_case: [gpt3_mr_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental] + # - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] # hang: #513 - - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4] products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] # hang: #513 - environment: [lts] scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] # TODO: The migration of custom fsdp causes EP + FSDP to be temporarily unavailable, which will be fixed in a subsequent MR. - # - test_case: [gpt3_mr_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] # - environment: [lts] # scope: [nightly] - - test_case: [gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G] + - test_case: [gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [lts] scope: [nightly] - - test_case: [gpt3_moe_mr_mcore_te_ep8_resume_torch_dist_dist_optimizer] + - test_case: [gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer] products: - environment: [dev] scope: [mr] @@ -190,12 +186,12 @@ products: ####################################################################### # Super important MR tests that run for both DEV and LTS per MR # ####################################################################### - # - test_case: [gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer] # products: # - environment: [dev] # scope: [mr] # platforms: [dgx_h100] - # - test_case: [gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G] + # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM] # products: # - environment: [dev] # scope: [mr] @@ -203,7 +199,7 @@ products: ########################### # Merge train tests # ########################### - - test_case: [gpt3_moe_mr_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer] + - test_case: [gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer] products: - environment: [dev] scope: [mr] @@ -214,7 +210,7 @@ products: - test_case: [gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - environment: [dev] scope: [mr-slim] diff --git a/tests/test_utils/recipes/multimodal-llava.yaml b/tests/test_utils/recipes/multimodal-llava.yaml index 4de7f0a9c0..80a30f050b 100644 --- a/tests/test_utils/recipes/multimodal-llava.yaml +++ b/tests/test_utils/recipes/multimodal-llava.yaml @@ -46,7 +46,7 @@ spec: "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/{name}/checkpoints" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts/model/{name}/checkpoints" "TRAINING_SCRIPT_PATH=pretrain_vlm.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" @@ -58,13 +58,13 @@ spec: bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}} products: - - test_case: [multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G] + - test_case: [multimodal_llava_mcore_te_tp1_pp1] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [multimodal_llava_mr_mcore_te_tp4_sp_cp2_dgx_a100_1N8G] + - test_case: [multimodal_llava_mcore_te_tp4_sp_cp2] products: - environment: [dev] - scope: [mr] + scope: [mr, mr-github] platforms: [dgx_h100] diff --git a/tests/test_utils/recipes/t5.yaml b/tests/test_utils/recipes/t5.yaml index 31a72e9b5a..c6c4fc7cb1 100644 --- a/tests/test_utils/recipes/t5.yaml +++ b/tests/test_utils/recipes/t5.yaml @@ -9,9 +9,6 @@ spec: nodes: 1 gpus: 8 platforms: dgx_a100 - artifacts: - /workspace/data/t5_data: text/the_pile/t5_shard00 - /workspace/checkpoints/t5_220m_mr_mcore_te_tp2_pp2_frozen_resume_torch_dgx_a100_1N8G_dev: model/mcore_t5/t5_220m_mr_mcore_te_tp2_pp2_frozen_resume_torch_{platforms}_1N8G_dev/28359448 script_setup: | unset https_proxy echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc @@ -42,12 +39,12 @@ spec: NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g') ARGUMENTS=( - "DATA_PATH=/workspace/data/t5_data" + "DATA_PATH=/mnt/artifacts" "DATA_CACHE_PATH=/workspace/data/cache" "OUTPUT_PATH={assets_dir}" "TENSORBOARD_PATH={assets_dir}/tensorboard" "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints" - "CHECKPOINT_LOAD_PATH=/workspace/checkpoints/$NAME" + "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "TRAINING_SCRIPT_PATH=pretrain_t5.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_$CLUSTER.json" @@ -59,57 +56,57 @@ spec: bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}} products: - - test_case: [t5_11b_mr_mcore_tp4_pp1_dgx_a100_1N8G] + - test_case: [t5_11b_mcore_tp4_pp1] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G] + - test_case: [t5_mcore_te_tp4_pp1] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G] + - test_case: [t5_mcore_te_tp4_pp1_resume_torch_dist] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G] + - test_case: [t5_mcore_tp4_pp1] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G] + - test_case: [t5_mcore_tp4_pp1_resume_torch_dist] products: - environment: [dev] scope: [mr] platforms: [dgx_h100] - - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch] + - test_case: [t5_mcore_te_tp1_pp1_vp1_resume_torch] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1] + - test_case: [t5_mcore_te_tp2_pp1_vp1] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel] + - test_case: [t5_mcore_te_tp2_pp1_vp1_sequence_parallel] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1] + - test_case: [t5_mcore_tp1_pp1_vp1] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch] + - test_case: [t5_mcore_tp1_pp1_vp1_resume_torch] products: - environment: [dev] scope: [nightly] platforms: [dgx_a100, dgx_h100] - - test_case: [t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1] + - test_case: [t5_mcore_tp2_pp1_vp1] products: - environment: [dev] scope: [nightly] diff --git a/uv.lock b/uv.lock index 20e4ab8afd..f3db23e7e1 100644 --- a/uv.lock +++ b/uv.lock @@ -898,101 +898,101 @@ wheels = [ [[package]] name = "coverage" -version = "7.10.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/6c/3a3f7a46888e69d18abe3ccc6fe4cb16cccb1e6a2f99698931dafca489e6/coverage-7.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc04cc7a3db33664e0c2d10eb8990ff6b3536f6842c9590ae8da4c614b9ed05a", size = 217987, upload-time = "2025-09-21T20:00:57.218Z" }, - { url = "https://files.pythonhosted.org/packages/03/94/952d30f180b1a916c11a56f5c22d3535e943aa22430e9e3322447e520e1c/coverage-7.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e201e015644e207139f7e2351980feb7040e6f4b2c2978892f3e3789d1c125e5", size = 218388, upload-time = "2025-09-21T20:01:00.081Z" }, - { url = "https://files.pythonhosted.org/packages/50/2b/9e0cf8ded1e114bcd8b2fd42792b57f1c4e9e4ea1824cde2af93a67305be/coverage-7.10.7-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:240af60539987ced2c399809bd34f7c78e8abe0736af91c3d7d0e795df633d17", size = 245148, upload-time = "2025-09-21T20:01:01.768Z" }, - { url = "https://files.pythonhosted.org/packages/19/20/d0384ac06a6f908783d9b6aa6135e41b093971499ec488e47279f5b846e6/coverage-7.10.7-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8421e088bc051361b01c4b3a50fd39a4b9133079a2229978d9d30511fd05231b", size = 246958, upload-time = "2025-09-21T20:01:03.355Z" }, - { url = "https://files.pythonhosted.org/packages/60/83/5c283cff3d41285f8eab897651585db908a909c572bdc014bcfaf8a8b6ae/coverage-7.10.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6be8ed3039ae7f7ac5ce058c308484787c86e8437e72b30bf5e88b8ea10f3c87", size = 248819, upload-time = "2025-09-21T20:01:04.968Z" }, - { url = "https://files.pythonhosted.org/packages/60/22/02eb98fdc5ff79f423e990d877693e5310ae1eab6cb20ae0b0b9ac45b23b/coverage-7.10.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e28299d9f2e889e6d51b1f043f58d5f997c373cc12e6403b90df95b8b047c13e", size = 245754, upload-time = "2025-09-21T20:01:06.321Z" }, - { url = "https://files.pythonhosted.org/packages/b4/bc/25c83bcf3ad141b32cd7dc45485ef3c01a776ca3aa8ef0a93e77e8b5bc43/coverage-7.10.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4e16bd7761c5e454f4efd36f345286d6f7c5fa111623c355691e2755cae3b9e", size = 246860, upload-time = "2025-09-21T20:01:07.605Z" }, - { url = "https://files.pythonhosted.org/packages/3c/b7/95574702888b58c0928a6e982038c596f9c34d52c5e5107f1eef729399b5/coverage-7.10.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b1c81d0e5e160651879755c9c675b974276f135558cf4ba79fee7b8413a515df", size = 244877, upload-time = "2025-09-21T20:01:08.829Z" }, - { url = "https://files.pythonhosted.org/packages/47/b6/40095c185f235e085df0e0b158f6bd68cc6e1d80ba6c7721dc81d97ec318/coverage-7.10.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:606cc265adc9aaedcc84f1f064f0e8736bc45814f15a357e30fca7ecc01504e0", size = 245108, upload-time = "2025-09-21T20:01:10.527Z" }, - { url = "https://files.pythonhosted.org/packages/c8/50/4aea0556da7a4b93ec9168420d170b55e2eb50ae21b25062513d020c6861/coverage-7.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:10b24412692df990dbc34f8fb1b6b13d236ace9dfdd68df5b28c2e39cafbba13", size = 245752, upload-time = "2025-09-21T20:01:11.857Z" }, - { url = "https://files.pythonhosted.org/packages/6a/28/ea1a84a60828177ae3b100cb6723838523369a44ec5742313ed7db3da160/coverage-7.10.7-cp310-cp310-win32.whl", hash = "sha256:b51dcd060f18c19290d9b8a9dd1e0181538df2ce0717f562fff6cf74d9fc0b5b", size = 220497, upload-time = "2025-09-21T20:01:13.459Z" }, - { url = "https://files.pythonhosted.org/packages/fc/1a/a81d46bbeb3c3fd97b9602ebaa411e076219a150489bcc2c025f151bd52d/coverage-7.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:3a622ac801b17198020f09af3eaf45666b344a0d69fc2a6ffe2ea83aeef1d807", size = 221392, upload-time = "2025-09-21T20:01:14.722Z" }, - { url = "https://files.pythonhosted.org/packages/d2/5d/c1a17867b0456f2e9ce2d8d4708a4c3a089947d0bec9c66cdf60c9e7739f/coverage-7.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a609f9c93113be646f44c2a0256d6ea375ad047005d7f57a5c15f614dc1b2f59", size = 218102, upload-time = "2025-09-21T20:01:16.089Z" }, - { url = "https://files.pythonhosted.org/packages/54/f0/514dcf4b4e3698b9a9077f084429681bf3aad2b4a72578f89d7f643eb506/coverage-7.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:65646bb0359386e07639c367a22cf9b5bf6304e8630b565d0626e2bdf329227a", size = 218505, upload-time = "2025-09-21T20:01:17.788Z" }, - { url = "https://files.pythonhosted.org/packages/20/f6/9626b81d17e2a4b25c63ac1b425ff307ecdeef03d67c9a147673ae40dc36/coverage-7.10.7-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5f33166f0dfcce728191f520bd2692914ec70fac2713f6bf3ce59c3deacb4699", size = 248898, upload-time = "2025-09-21T20:01:19.488Z" }, - { url = "https://files.pythonhosted.org/packages/b0/ef/bd8e719c2f7417ba03239052e099b76ea1130ac0cbb183ee1fcaa58aaff3/coverage-7.10.7-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35f5e3f9e455bb17831876048355dca0f758b6df22f49258cb5a91da23ef437d", size = 250831, upload-time = "2025-09-21T20:01:20.817Z" }, - { url = "https://files.pythonhosted.org/packages/a5/b6/bf054de41ec948b151ae2b79a55c107f5760979538f5fb80c195f2517718/coverage-7.10.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da86b6d62a496e908ac2898243920c7992499c1712ff7c2b6d837cc69d9467e", size = 252937, upload-time = "2025-09-21T20:01:22.171Z" }, - { url = "https://files.pythonhosted.org/packages/0f/e5/3860756aa6f9318227443c6ce4ed7bf9e70bb7f1447a0353f45ac5c7974b/coverage-7.10.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6b8b09c1fad947c84bbbc95eca841350fad9cbfa5a2d7ca88ac9f8d836c92e23", size = 249021, upload-time = "2025-09-21T20:01:23.907Z" }, - { url = "https://files.pythonhosted.org/packages/26/0f/bd08bd042854f7fd07b45808927ebcce99a7ed0f2f412d11629883517ac2/coverage-7.10.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4376538f36b533b46f8971d3a3e63464f2c7905c9800db97361c43a2b14792ab", size = 250626, upload-time = "2025-09-21T20:01:25.721Z" }, - { url = "https://files.pythonhosted.org/packages/8e/a7/4777b14de4abcc2e80c6b1d430f5d51eb18ed1d75fca56cbce5f2db9b36e/coverage-7.10.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:121da30abb574f6ce6ae09840dae322bef734480ceafe410117627aa54f76d82", size = 248682, upload-time = "2025-09-21T20:01:27.105Z" }, - { url = "https://files.pythonhosted.org/packages/34/72/17d082b00b53cd45679bad682fac058b87f011fd8b9fe31d77f5f8d3a4e4/coverage-7.10.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:88127d40df529336a9836870436fc2751c339fbaed3a836d42c93f3e4bd1d0a2", size = 248402, upload-time = "2025-09-21T20:01:28.629Z" }, - { url = "https://files.pythonhosted.org/packages/81/7a/92367572eb5bdd6a84bfa278cc7e97db192f9f45b28c94a9ca1a921c3577/coverage-7.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ba58bbcd1b72f136080c0bccc2400d66cc6115f3f906c499013d065ac33a4b61", size = 249320, upload-time = "2025-09-21T20:01:30.004Z" }, - { url = "https://files.pythonhosted.org/packages/2f/88/a23cc185f6a805dfc4fdf14a94016835eeb85e22ac3a0e66d5e89acd6462/coverage-7.10.7-cp311-cp311-win32.whl", hash = "sha256:972b9e3a4094b053a4e46832b4bc829fc8a8d347160eb39d03f1690316a99c14", size = 220536, upload-time = "2025-09-21T20:01:32.184Z" }, - { url = "https://files.pythonhosted.org/packages/fe/ef/0b510a399dfca17cec7bc2f05ad8bd78cf55f15c8bc9a73ab20c5c913c2e/coverage-7.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:a7b55a944a7f43892e28ad4bc0561dfd5f0d73e605d1aa5c3c976b52aea121d2", size = 221425, upload-time = "2025-09-21T20:01:33.557Z" }, - { url = "https://files.pythonhosted.org/packages/51/7f/023657f301a276e4ba1850f82749bc136f5a7e8768060c2e5d9744a22951/coverage-7.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:736f227fb490f03c6488f9b6d45855f8e0fd749c007f9303ad30efab0e73c05a", size = 220103, upload-time = "2025-09-21T20:01:34.929Z" }, - { url = "https://files.pythonhosted.org/packages/13/e4/eb12450f71b542a53972d19117ea5a5cea1cab3ac9e31b0b5d498df1bd5a/coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417", size = 218290, upload-time = "2025-09-21T20:01:36.455Z" }, - { url = "https://files.pythonhosted.org/packages/37/66/593f9be12fc19fb36711f19a5371af79a718537204d16ea1d36f16bd78d2/coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973", size = 218515, upload-time = "2025-09-21T20:01:37.982Z" }, - { url = "https://files.pythonhosted.org/packages/66/80/4c49f7ae09cafdacc73fbc30949ffe77359635c168f4e9ff33c9ebb07838/coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c", size = 250020, upload-time = "2025-09-21T20:01:39.617Z" }, - { url = "https://files.pythonhosted.org/packages/a6/90/a64aaacab3b37a17aaedd83e8000142561a29eb262cede42d94a67f7556b/coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7", size = 252769, upload-time = "2025-09-21T20:01:41.341Z" }, - { url = "https://files.pythonhosted.org/packages/98/2e/2dda59afd6103b342e096f246ebc5f87a3363b5412609946c120f4e7750d/coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6", size = 253901, upload-time = "2025-09-21T20:01:43.042Z" }, - { url = "https://files.pythonhosted.org/packages/53/dc/8d8119c9051d50f3119bb4a75f29f1e4a6ab9415cd1fa8bf22fcc3fb3b5f/coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59", size = 250413, upload-time = "2025-09-21T20:01:44.469Z" }, - { url = "https://files.pythonhosted.org/packages/98/b3/edaff9c5d79ee4d4b6d3fe046f2b1d799850425695b789d491a64225d493/coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b", size = 251820, upload-time = "2025-09-21T20:01:45.915Z" }, - { url = "https://files.pythonhosted.org/packages/11/25/9a0728564bb05863f7e513e5a594fe5ffef091b325437f5430e8cfb0d530/coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a", size = 249941, upload-time = "2025-09-21T20:01:47.296Z" }, - { url = "https://files.pythonhosted.org/packages/e0/fd/ca2650443bfbef5b0e74373aac4df67b08180d2f184b482c41499668e258/coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb", size = 249519, upload-time = "2025-09-21T20:01:48.73Z" }, - { url = "https://files.pythonhosted.org/packages/24/79/f692f125fb4299b6f963b0745124998ebb8e73ecdfce4ceceb06a8c6bec5/coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1", size = 251375, upload-time = "2025-09-21T20:01:50.529Z" }, - { url = "https://files.pythonhosted.org/packages/5e/75/61b9bbd6c7d24d896bfeec57acba78e0f8deac68e6baf2d4804f7aae1f88/coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256", size = 220699, upload-time = "2025-09-21T20:01:51.941Z" }, - { url = "https://files.pythonhosted.org/packages/ca/f3/3bf7905288b45b075918d372498f1cf845b5b579b723c8fd17168018d5f5/coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba", size = 221512, upload-time = "2025-09-21T20:01:53.481Z" }, - { url = "https://files.pythonhosted.org/packages/5c/44/3e32dbe933979d05cf2dac5e697c8599cfe038aaf51223ab901e208d5a62/coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf", size = 220147, upload-time = "2025-09-21T20:01:55.2Z" }, - { url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" }, - { url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" }, - { url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" }, - { url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" }, - { url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" }, - { url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" }, - { url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" }, - { url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" }, - { url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" }, - { url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" }, - { url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" }, - { url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" }, - { url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" }, - { url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" }, - { url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" }, - { url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" }, - { url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" }, - { url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" }, - { url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" }, - { url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" }, - { url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" }, - { url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" }, - { url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" }, - { url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" }, - { url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" }, - { url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" }, - { url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" }, - { url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" }, - { url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" }, - { url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" }, - { url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" }, - { url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" }, - { url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" }, - { url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" }, - { url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" }, - { url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" }, - { url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" }, - { url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" }, - { url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" }, - { url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" }, - { url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" }, - { url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" }, - { url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" }, - { url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" }, - { url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" }, - { url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" }, - { url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" }, - { url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" }, - { url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" }, - { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" }, - { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" }, - { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" }, - { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" }, +version = "7.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/38/ee22495420457259d2f3390309505ea98f98a5eed40901cf62196abad006/coverage-7.11.0.tar.gz", hash = "sha256:167bd504ac1ca2af7ff3b81d245dfea0292c5032ebef9d66cc08a7d28c1b8050", size = 811905, upload-time = "2025-10-15T15:15:08.542Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/95/c49df0aceb5507a80b9fe5172d3d39bf23f05be40c23c8d77d556df96cec/coverage-7.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb53f1e8adeeb2e78962bade0c08bfdc461853c7969706ed901821e009b35e31", size = 215800, upload-time = "2025-10-15T15:12:19.824Z" }, + { url = "https://files.pythonhosted.org/packages/dc/c6/7bb46ce01ed634fff1d7bb53a54049f539971862cc388b304ff3c51b4f66/coverage-7.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9a03ec6cb9f40a5c360f138b88266fd8f58408d71e89f536b4f91d85721d075", size = 216198, upload-time = "2025-10-15T15:12:22.549Z" }, + { url = "https://files.pythonhosted.org/packages/94/b2/75d9d8fbf2900268aca5de29cd0a0fe671b0f69ef88be16767cc3c828b85/coverage-7.11.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d7f0616c557cbc3d1c2090334eddcbb70e1ae3a40b07222d62b3aa47f608fab", size = 242953, upload-time = "2025-10-15T15:12:24.139Z" }, + { url = "https://files.pythonhosted.org/packages/65/ac/acaa984c18f440170525a8743eb4b6c960ace2dbad80dc22056a437fc3c6/coverage-7.11.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e44a86a47bbdf83b0a3ea4d7df5410d6b1a0de984fbd805fa5101f3624b9abe0", size = 244766, upload-time = "2025-10-15T15:12:25.974Z" }, + { url = "https://files.pythonhosted.org/packages/d8/0d/938d0bff76dfa4a6b228c3fc4b3e1c0e2ad4aa6200c141fcda2bd1170227/coverage-7.11.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:596763d2f9a0ee7eec6e643e29660def2eef297e1de0d334c78c08706f1cb785", size = 246625, upload-time = "2025-10-15T15:12:27.387Z" }, + { url = "https://files.pythonhosted.org/packages/38/54/8f5f5e84bfa268df98f46b2cb396b1009734cfb1e5d6adb663d284893b32/coverage-7.11.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ef55537ff511b5e0a43edb4c50a7bf7ba1c3eea20b4f49b1490f1e8e0e42c591", size = 243568, upload-time = "2025-10-15T15:12:28.799Z" }, + { url = "https://files.pythonhosted.org/packages/68/30/8ba337c2877fe3f2e1af0ed7ff4be0c0c4aca44d6f4007040f3ca2255e99/coverage-7.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cbabd8f4d0d3dc571d77ae5bdbfa6afe5061e679a9d74b6797c48d143307088", size = 244665, upload-time = "2025-10-15T15:12:30.297Z" }, + { url = "https://files.pythonhosted.org/packages/cc/fb/c6f1d6d9a665536b7dde2333346f0cc41dc6a60bd1ffc10cd5c33e7eb000/coverage-7.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e24045453384e0ae2a587d562df2a04d852672eb63051d16096d3f08aa4c7c2f", size = 242681, upload-time = "2025-10-15T15:12:32.326Z" }, + { url = "https://files.pythonhosted.org/packages/be/38/1b532319af5f991fa153c20373291dc65c2bf532af7dbcffdeef745c8f79/coverage-7.11.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:7161edd3426c8d19bdccde7d49e6f27f748f3c31cc350c5de7c633fea445d866", size = 242912, upload-time = "2025-10-15T15:12:34.079Z" }, + { url = "https://files.pythonhosted.org/packages/67/3d/f39331c60ef6050d2a861dc1b514fa78f85f792820b68e8c04196ad733d6/coverage-7.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d4ed4de17e692ba6415b0587bc7f12bc80915031fc9db46a23ce70fc88c9841", size = 243559, upload-time = "2025-10-15T15:12:35.809Z" }, + { url = "https://files.pythonhosted.org/packages/4b/55/cb7c9df9d0495036ce582a8a2958d50c23cd73f84a23284bc23bd4711a6f/coverage-7.11.0-cp310-cp310-win32.whl", hash = "sha256:765c0bc8fe46f48e341ef737c91c715bd2a53a12792592296a095f0c237e09cf", size = 218266, upload-time = "2025-10-15T15:12:37.429Z" }, + { url = "https://files.pythonhosted.org/packages/68/a8/b79cb275fa7bd0208767f89d57a1b5f6ba830813875738599741b97c2e04/coverage-7.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:24d6f3128f1b2d20d84b24f4074475457faedc3d4613a7e66b5e769939c7d969", size = 219169, upload-time = "2025-10-15T15:12:39.25Z" }, + { url = "https://files.pythonhosted.org/packages/49/3a/ee1074c15c408ddddddb1db7dd904f6b81bc524e01f5a1c5920e13dbde23/coverage-7.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d58ecaa865c5b9fa56e35efc51d1014d4c0d22838815b9fce57a27dd9576847", size = 215912, upload-time = "2025-10-15T15:12:40.665Z" }, + { url = "https://files.pythonhosted.org/packages/70/c4/9f44bebe5cb15f31608597b037d78799cc5f450044465bcd1ae8cb222fe1/coverage-7.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b679e171f1c104a5668550ada700e3c4937110dbdd153b7ef9055c4f1a1ee3cc", size = 216310, upload-time = "2025-10-15T15:12:42.461Z" }, + { url = "https://files.pythonhosted.org/packages/42/01/5e06077cfef92d8af926bdd86b84fb28bf9bc6ad27343d68be9b501d89f2/coverage-7.11.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ca61691ba8c5b6797deb221a0d09d7470364733ea9c69425a640f1f01b7c5bf0", size = 246706, upload-time = "2025-10-15T15:12:44.001Z" }, + { url = "https://files.pythonhosted.org/packages/40/b8/7a3f1f33b35cc4a6c37e759137533119560d06c0cc14753d1a803be0cd4a/coverage-7.11.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:aef1747ede4bd8ca9cfc04cc3011516500c6891f1b33a94add3253f6f876b7b7", size = 248634, upload-time = "2025-10-15T15:12:45.768Z" }, + { url = "https://files.pythonhosted.org/packages/7a/41/7f987eb33de386bc4c665ab0bf98d15fcf203369d6aacae74f5dd8ec489a/coverage-7.11.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1839d08406e4cba2953dcc0ffb312252f14d7c4c96919f70167611f4dee2623", size = 250741, upload-time = "2025-10-15T15:12:47.222Z" }, + { url = "https://files.pythonhosted.org/packages/23/c1/a4e0ca6a4e83069fb8216b49b30a7352061ca0cb38654bd2dc96b7b3b7da/coverage-7.11.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e0eb0a2dcc62478eb5b4cbb80b97bdee852d7e280b90e81f11b407d0b81c4287", size = 246837, upload-time = "2025-10-15T15:12:48.904Z" }, + { url = "https://files.pythonhosted.org/packages/5d/03/ced062a17f7c38b4728ff76c3acb40d8465634b20b4833cdb3cc3a74e115/coverage-7.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bc1fbea96343b53f65d5351d8fd3b34fd415a2670d7c300b06d3e14a5af4f552", size = 248429, upload-time = "2025-10-15T15:12:50.73Z" }, + { url = "https://files.pythonhosted.org/packages/97/af/a7c6f194bb8c5a2705ae019036b8fe7f49ea818d638eedb15fdb7bed227c/coverage-7.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:214b622259dd0cf435f10241f1333d32caa64dbc27f8790ab693428a141723de", size = 246490, upload-time = "2025-10-15T15:12:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/ab/c3/aab4df02b04a8fde79068c3c41ad7a622b0ef2b12e1ed154da986a727c3f/coverage-7.11.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:258d9967520cca899695d4eb7ea38be03f06951d6ca2f21fb48b1235f791e601", size = 246208, upload-time = "2025-10-15T15:12:54.586Z" }, + { url = "https://files.pythonhosted.org/packages/30/d8/e282ec19cd658238d60ed404f99ef2e45eed52e81b866ab1518c0d4163cf/coverage-7.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cf9e6ff4ca908ca15c157c409d608da77a56a09877b97c889b98fb2c32b6465e", size = 247126, upload-time = "2025-10-15T15:12:56.485Z" }, + { url = "https://files.pythonhosted.org/packages/d1/17/a635fa07fac23adb1a5451ec756216768c2767efaed2e4331710342a3399/coverage-7.11.0-cp311-cp311-win32.whl", hash = "sha256:fcc15fc462707b0680cff6242c48625da7f9a16a28a41bb8fd7a4280920e676c", size = 218314, upload-time = "2025-10-15T15:12:58.365Z" }, + { url = "https://files.pythonhosted.org/packages/2a/29/2ac1dfcdd4ab9a70026edc8d715ece9b4be9a1653075c658ee6f271f394d/coverage-7.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:865965bf955d92790f1facd64fe7ff73551bd2c1e7e6b26443934e9701ba30b9", size = 219203, upload-time = "2025-10-15T15:12:59.902Z" }, + { url = "https://files.pythonhosted.org/packages/03/21/5ce8b3a0133179115af4c041abf2ee652395837cb896614beb8ce8ddcfd9/coverage-7.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:5693e57a065760dcbeb292d60cc4d0231a6d4b6b6f6a3191561e1d5e8820b745", size = 217879, upload-time = "2025-10-15T15:13:01.35Z" }, + { url = "https://files.pythonhosted.org/packages/c4/db/86f6906a7c7edc1a52b2c6682d6dd9be775d73c0dfe2b84f8923dfea5784/coverage-7.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9c49e77811cf9d024b95faf86c3f059b11c0c9be0b0d61bc598f453703bd6fd1", size = 216098, upload-time = "2025-10-15T15:13:02.916Z" }, + { url = "https://files.pythonhosted.org/packages/21/54/e7b26157048c7ba555596aad8569ff903d6cd67867d41b75287323678ede/coverage-7.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a61e37a403a778e2cda2a6a39abcc895f1d984071942a41074b5c7ee31642007", size = 216331, upload-time = "2025-10-15T15:13:04.403Z" }, + { url = "https://files.pythonhosted.org/packages/b9/19/1ce6bf444f858b83a733171306134a0544eaddf1ca8851ede6540a55b2ad/coverage-7.11.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c79cae102bb3b1801e2ef1511fb50e91ec83a1ce466b2c7c25010d884336de46", size = 247825, upload-time = "2025-10-15T15:13:05.92Z" }, + { url = "https://files.pythonhosted.org/packages/71/0b/d3bcbbc259fcced5fb67c5d78f6e7ee965f49760c14afd931e9e663a83b2/coverage-7.11.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:16ce17ceb5d211f320b62df002fa7016b7442ea0fd260c11cec8ce7730954893", size = 250573, upload-time = "2025-10-15T15:13:07.471Z" }, + { url = "https://files.pythonhosted.org/packages/58/8d/b0ff3641a320abb047258d36ed1c21d16be33beed4152628331a1baf3365/coverage-7.11.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:80027673e9d0bd6aef86134b0771845e2da85755cf686e7c7c59566cf5a89115", size = 251706, upload-time = "2025-10-15T15:13:09.4Z" }, + { url = "https://files.pythonhosted.org/packages/59/c8/5a586fe8c7b0458053d9c687f5cff515a74b66c85931f7fe17a1c958b4ac/coverage-7.11.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4d3ffa07a08657306cd2215b0da53761c4d73cb54d9143b9303a6481ec0cd415", size = 248221, upload-time = "2025-10-15T15:13:10.964Z" }, + { url = "https://files.pythonhosted.org/packages/d0/ff/3a25e3132804ba44cfa9a778cdf2b73dbbe63ef4b0945e39602fc896ba52/coverage-7.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a3b6a5f8b2524fd6c1066bc85bfd97e78709bb5e37b5b94911a6506b65f47186", size = 249624, upload-time = "2025-10-15T15:13:12.5Z" }, + { url = "https://files.pythonhosted.org/packages/c5/12/ff10c8ce3895e1b17a73485ea79ebc1896a9e466a9d0f4aef63e0d17b718/coverage-7.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fcc0a4aa589de34bc56e1a80a740ee0f8c47611bdfb28cd1849de60660f3799d", size = 247744, upload-time = "2025-10-15T15:13:14.554Z" }, + { url = "https://files.pythonhosted.org/packages/16/02/d500b91f5471b2975947e0629b8980e5e90786fe316b6d7299852c1d793d/coverage-7.11.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dba82204769d78c3fd31b35c3d5f46e06511936c5019c39f98320e05b08f794d", size = 247325, upload-time = "2025-10-15T15:13:16.438Z" }, + { url = "https://files.pythonhosted.org/packages/77/11/dee0284fbbd9cd64cfce806b827452c6df3f100d9e66188e82dfe771d4af/coverage-7.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:81b335f03ba67309a95210caf3eb43bd6fe75a4e22ba653ef97b4696c56c7ec2", size = 249180, upload-time = "2025-10-15T15:13:17.959Z" }, + { url = "https://files.pythonhosted.org/packages/59/1b/cdf1def928f0a150a057cab03286774e73e29c2395f0d30ce3d9e9f8e697/coverage-7.11.0-cp312-cp312-win32.whl", hash = "sha256:037b2d064c2f8cc8716fe4d39cb705779af3fbf1ba318dc96a1af858888c7bb5", size = 218479, upload-time = "2025-10-15T15:13:19.608Z" }, + { url = "https://files.pythonhosted.org/packages/ff/55/e5884d55e031da9c15b94b90a23beccc9d6beee65e9835cd6da0a79e4f3a/coverage-7.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:d66c0104aec3b75e5fd897e7940188ea1892ca1d0235316bf89286d6a22568c0", size = 219290, upload-time = "2025-10-15T15:13:21.593Z" }, + { url = "https://files.pythonhosted.org/packages/23/a8/faa930cfc71c1d16bc78f9a19bb73700464f9c331d9e547bfbc1dbd3a108/coverage-7.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:d91ebeac603812a09cf6a886ba6e464f3bbb367411904ae3790dfe28311b15ad", size = 217924, upload-time = "2025-10-15T15:13:23.39Z" }, + { url = "https://files.pythonhosted.org/packages/60/7f/85e4dfe65e400645464b25c036a26ac226cf3a69d4a50c3934c532491cdd/coverage-7.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc3f49e65ea6e0d5d9bd60368684fe52a704d46f9e7fc413918f18d046ec40e1", size = 216129, upload-time = "2025-10-15T15:13:25.371Z" }, + { url = "https://files.pythonhosted.org/packages/96/5d/dc5fa98fea3c175caf9d360649cb1aa3715e391ab00dc78c4c66fabd7356/coverage-7.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f39ae2f63f37472c17b4990f794035c9890418b1b8cca75c01193f3c8d3e01be", size = 216380, upload-time = "2025-10-15T15:13:26.976Z" }, + { url = "https://files.pythonhosted.org/packages/b2/f5/3da9cc9596708273385189289c0e4d8197d37a386bdf17619013554b3447/coverage-7.11.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7db53b5cdd2917b6eaadd0b1251cf4e7d96f4a8d24e174bdbdf2f65b5ea7994d", size = 247375, upload-time = "2025-10-15T15:13:28.923Z" }, + { url = "https://files.pythonhosted.org/packages/65/6c/f7f59c342359a235559d2bc76b0c73cfc4bac7d61bb0df210965cb1ecffd/coverage-7.11.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10ad04ac3a122048688387828b4537bc9cf60c0bf4869c1e9989c46e45690b82", size = 249978, upload-time = "2025-10-15T15:13:30.525Z" }, + { url = "https://files.pythonhosted.org/packages/e7/8c/042dede2e23525e863bf1ccd2b92689692a148d8b5fd37c37899ba882645/coverage-7.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4036cc9c7983a2b1f2556d574d2eb2154ac6ed55114761685657e38782b23f52", size = 251253, upload-time = "2025-10-15T15:13:32.174Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a9/3c58df67bfa809a7bddd786356d9c5283e45d693edb5f3f55d0986dd905a/coverage-7.11.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7ab934dd13b1c5e94b692b1e01bd87e4488cb746e3a50f798cb9464fd128374b", size = 247591, upload-time = "2025-10-15T15:13:34.147Z" }, + { url = "https://files.pythonhosted.org/packages/26/5b/c7f32efd862ee0477a18c41e4761305de6ddd2d49cdeda0c1116227570fd/coverage-7.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59a6e5a265f7cfc05f76e3bb53eca2e0dfe90f05e07e849930fecd6abb8f40b4", size = 249411, upload-time = "2025-10-15T15:13:38.425Z" }, + { url = "https://files.pythonhosted.org/packages/76/b5/78cb4f1e86c1611431c990423ec0768122905b03837e1b4c6a6f388a858b/coverage-7.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:df01d6c4c81e15a7c88337b795bb7595a8596e92310266b5072c7e301168efbd", size = 247303, upload-time = "2025-10-15T15:13:40.464Z" }, + { url = "https://files.pythonhosted.org/packages/87/c9/23c753a8641a330f45f221286e707c427e46d0ffd1719b080cedc984ec40/coverage-7.11.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8c934bd088eed6174210942761e38ee81d28c46de0132ebb1801dbe36a390dcc", size = 247157, upload-time = "2025-10-15T15:13:42.087Z" }, + { url = "https://files.pythonhosted.org/packages/c5/42/6e0cc71dc8a464486e944a4fa0d85bdec031cc2969e98ed41532a98336b9/coverage-7.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a03eaf7ec24078ad64a07f02e30060aaf22b91dedf31a6b24d0d98d2bba7f48", size = 248921, upload-time = "2025-10-15T15:13:43.715Z" }, + { url = "https://files.pythonhosted.org/packages/e8/1c/743c2ef665e6858cccb0f84377dfe3a4c25add51e8c7ef19249be92465b6/coverage-7.11.0-cp313-cp313-win32.whl", hash = "sha256:695340f698a5f56f795b2836abe6fb576e7c53d48cd155ad2f80fd24bc63a040", size = 218526, upload-time = "2025-10-15T15:13:45.336Z" }, + { url = "https://files.pythonhosted.org/packages/ff/d5/226daadfd1bf8ddbccefbd3aa3547d7b960fb48e1bdac124e2dd13a2b71a/coverage-7.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2727d47fce3ee2bac648528e41455d1b0c46395a087a229deac75e9f88ba5a05", size = 219317, upload-time = "2025-10-15T15:13:47.401Z" }, + { url = "https://files.pythonhosted.org/packages/97/54/47db81dcbe571a48a298f206183ba8a7ba79200a37cd0d9f4788fcd2af4a/coverage-7.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:0efa742f431529699712b92ecdf22de8ff198df41e43aeaaadf69973eb93f17a", size = 217948, upload-time = "2025-10-15T15:13:49.096Z" }, + { url = "https://files.pythonhosted.org/packages/e5/8b/cb68425420154e7e2a82fd779a8cc01549b6fa83c2ad3679cd6c088ebd07/coverage-7.11.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:587c38849b853b157706407e9ebdca8fd12f45869edb56defbef2daa5fb0812b", size = 216837, upload-time = "2025-10-15T15:13:51.09Z" }, + { url = "https://files.pythonhosted.org/packages/33/55/9d61b5765a025685e14659c8d07037247de6383c0385757544ffe4606475/coverage-7.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b971bdefdd75096163dd4261c74be813c4508477e39ff7b92191dea19f24cd37", size = 217061, upload-time = "2025-10-15T15:13:52.747Z" }, + { url = "https://files.pythonhosted.org/packages/52/85/292459c9186d70dcec6538f06ea251bc968046922497377bf4a1dc9a71de/coverage-7.11.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:269bfe913b7d5be12ab13a95f3a76da23cf147be7fa043933320ba5625f0a8de", size = 258398, upload-time = "2025-10-15T15:13:54.45Z" }, + { url = "https://files.pythonhosted.org/packages/1f/e2/46edd73fb8bf51446c41148d81944c54ed224854812b6ca549be25113ee0/coverage-7.11.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dadbcce51a10c07b7c72b0ce4a25e4b6dcb0c0372846afb8e5b6307a121eb99f", size = 260574, upload-time = "2025-10-15T15:13:56.145Z" }, + { url = "https://files.pythonhosted.org/packages/07/5e/1df469a19007ff82e2ca8fe509822820a31e251f80ee7344c34f6cd2ec43/coverage-7.11.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ed43fa22c6436f7957df036331f8fe4efa7af132054e1844918866cd228af6c", size = 262797, upload-time = "2025-10-15T15:13:58.635Z" }, + { url = "https://files.pythonhosted.org/packages/f9/50/de216b31a1434b94d9b34a964c09943c6be45069ec704bfc379d8d89a649/coverage-7.11.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9516add7256b6713ec08359b7b05aeff8850c98d357784c7205b2e60aa2513fa", size = 257361, upload-time = "2025-10-15T15:14:00.409Z" }, + { url = "https://files.pythonhosted.org/packages/82/1e/3f9f8344a48111e152e0fd495b6fff13cc743e771a6050abf1627a7ba918/coverage-7.11.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb92e47c92fcbcdc692f428da67db33337fa213756f7adb6a011f7b5a7a20740", size = 260349, upload-time = "2025-10-15T15:14:02.188Z" }, + { url = "https://files.pythonhosted.org/packages/65/9b/3f52741f9e7d82124272f3070bbe316006a7de1bad1093f88d59bfc6c548/coverage-7.11.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d06f4fc7acf3cabd6d74941d53329e06bab00a8fe10e4df2714f0b134bfc64ef", size = 258114, upload-time = "2025-10-15T15:14:03.907Z" }, + { url = "https://files.pythonhosted.org/packages/0b/8b/918f0e15f0365d50d3986bbd3338ca01178717ac5678301f3f547b6619e6/coverage-7.11.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:6fbcee1a8f056af07ecd344482f711f563a9eb1c2cad192e87df00338ec3cdb0", size = 256723, upload-time = "2025-10-15T15:14:06.324Z" }, + { url = "https://files.pythonhosted.org/packages/44/9e/7776829f82d3cf630878a7965a7d70cc6ca94f22c7d20ec4944f7148cb46/coverage-7.11.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dbbf012be5f32533a490709ad597ad8a8ff80c582a95adc8d62af664e532f9ca", size = 259238, upload-time = "2025-10-15T15:14:08.002Z" }, + { url = "https://files.pythonhosted.org/packages/9a/b8/49cf253e1e7a3bedb85199b201862dd7ca4859f75b6cf25ffa7298aa0760/coverage-7.11.0-cp313-cp313t-win32.whl", hash = "sha256:cee6291bb4fed184f1c2b663606a115c743df98a537c969c3c64b49989da96c2", size = 219180, upload-time = "2025-10-15T15:14:09.786Z" }, + { url = "https://files.pythonhosted.org/packages/ac/e1/1a541703826be7ae2125a0fb7f821af5729d56bb71e946e7b933cc7a89a4/coverage-7.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a386c1061bf98e7ea4758e4313c0ab5ecf57af341ef0f43a0bf26c2477b5c268", size = 220241, upload-time = "2025-10-15T15:14:11.471Z" }, + { url = "https://files.pythonhosted.org/packages/d5/d1/5ee0e0a08621140fd418ec4020f595b4d52d7eb429ae6a0c6542b4ba6f14/coverage-7.11.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f9ea02ef40bb83823b2b04964459d281688fe173e20643870bb5d2edf68bc836", size = 218510, upload-time = "2025-10-15T15:14:13.46Z" }, + { url = "https://files.pythonhosted.org/packages/f4/06/e923830c1985ce808e40a3fa3eb46c13350b3224b7da59757d37b6ce12b8/coverage-7.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c770885b28fb399aaf2a65bbd1c12bf6f307ffd112d6a76c5231a94276f0c497", size = 216110, upload-time = "2025-10-15T15:14:15.157Z" }, + { url = "https://files.pythonhosted.org/packages/42/82/cdeed03bfead45203fb651ed756dfb5266028f5f939e7f06efac4041dad5/coverage-7.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a3d0e2087dba64c86a6b254f43e12d264b636a39e88c5cc0a01a7c71bcfdab7e", size = 216395, upload-time = "2025-10-15T15:14:16.863Z" }, + { url = "https://files.pythonhosted.org/packages/fc/ba/e1c80caffc3199aa699813f73ff097bc2df7b31642bdbc7493600a8f1de5/coverage-7.11.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73feb83bb41c32811973b8565f3705caf01d928d972b72042b44e97c71fd70d1", size = 247433, upload-time = "2025-10-15T15:14:18.589Z" }, + { url = "https://files.pythonhosted.org/packages/80/c0/5b259b029694ce0a5bbc1548834c7ba3db41d3efd3474489d7efce4ceb18/coverage-7.11.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c6f31f281012235ad08f9a560976cc2fc9c95c17604ff3ab20120fe480169bca", size = 249970, upload-time = "2025-10-15T15:14:20.307Z" }, + { url = "https://files.pythonhosted.org/packages/8c/86/171b2b5e1aac7e2fd9b43f7158b987dbeb95f06d1fbecad54ad8163ae3e8/coverage-7.11.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9570ad567f880ef675673992222746a124b9595506826b210fbe0ce3f0499cd", size = 251324, upload-time = "2025-10-15T15:14:22.419Z" }, + { url = "https://files.pythonhosted.org/packages/1a/7e/7e10414d343385b92024af3932a27a1caf75c6e27ee88ba211221ff1a145/coverage-7.11.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8badf70446042553a773547a61fecaa734b55dc738cacf20c56ab04b77425e43", size = 247445, upload-time = "2025-10-15T15:14:24.205Z" }, + { url = "https://files.pythonhosted.org/packages/c4/3b/e4f966b21f5be8c4bf86ad75ae94efa0de4c99c7bbb8114476323102e345/coverage-7.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a09c1211959903a479e389685b7feb8a17f59ec5a4ef9afde7650bd5eabc2777", size = 249324, upload-time = "2025-10-15T15:14:26.234Z" }, + { url = "https://files.pythonhosted.org/packages/00/a2/8479325576dfcd909244d0df215f077f47437ab852ab778cfa2f8bf4d954/coverage-7.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:5ef83b107f50db3f9ae40f69e34b3bd9337456c5a7fe3461c7abf8b75dd666a2", size = 247261, upload-time = "2025-10-15T15:14:28.42Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d8/3a9e2db19d94d65771d0f2e21a9ea587d11b831332a73622f901157cc24b/coverage-7.11.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f91f927a3215b8907e214af77200250bb6aae36eca3f760f89780d13e495388d", size = 247092, upload-time = "2025-10-15T15:14:30.784Z" }, + { url = "https://files.pythonhosted.org/packages/b3/b1/bbca3c472544f9e2ad2d5116b2379732957048be4b93a9c543fcd0207e5f/coverage-7.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cdbcd376716d6b7fbfeedd687a6c4be019c5a5671b35f804ba76a4c0a778cba4", size = 248755, upload-time = "2025-10-15T15:14:32.585Z" }, + { url = "https://files.pythonhosted.org/packages/89/49/638d5a45a6a0f00af53d6b637c87007eb2297042186334e9923a61aa8854/coverage-7.11.0-cp314-cp314-win32.whl", hash = "sha256:bab7ec4bb501743edc63609320aaec8cd9188b396354f482f4de4d40a9d10721", size = 218793, upload-time = "2025-10-15T15:14:34.972Z" }, + { url = "https://files.pythonhosted.org/packages/30/cc/b675a51f2d068adb3cdf3799212c662239b0ca27f4691d1fff81b92ea850/coverage-7.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d4ba9a449e9364a936a27322b20d32d8b166553bfe63059bd21527e681e2fad", size = 219587, upload-time = "2025-10-15T15:14:37.047Z" }, + { url = "https://files.pythonhosted.org/packages/93/98/5ac886876026de04f00820e5094fe22166b98dcb8b426bf6827aaf67048c/coverage-7.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:ce37f215223af94ef0f75ac68ea096f9f8e8c8ec7d6e8c346ee45c0d363f0479", size = 218168, upload-time = "2025-10-15T15:14:38.861Z" }, + { url = "https://files.pythonhosted.org/packages/14/d1/b4145d35b3e3ecf4d917e97fc8895bcf027d854879ba401d9ff0f533f997/coverage-7.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f413ce6e07e0d0dc9c433228727b619871532674b45165abafe201f200cc215f", size = 216850, upload-time = "2025-10-15T15:14:40.651Z" }, + { url = "https://files.pythonhosted.org/packages/ca/d1/7f645fc2eccd318369a8a9948acc447bb7c1ade2911e31d3c5620544c22b/coverage-7.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:05791e528a18f7072bf5998ba772fe29db4da1234c45c2087866b5ba4dea710e", size = 217071, upload-time = "2025-10-15T15:14:42.755Z" }, + { url = "https://files.pythonhosted.org/packages/54/7d/64d124649db2737ceced1dfcbdcb79898d5868d311730f622f8ecae84250/coverage-7.11.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cacb29f420cfeb9283b803263c3b9a068924474ff19ca126ba9103e1278dfa44", size = 258570, upload-time = "2025-10-15T15:14:44.542Z" }, + { url = "https://files.pythonhosted.org/packages/6c/3f/6f5922f80dc6f2d8b2c6f974835c43f53eb4257a7797727e6ca5b7b2ec1f/coverage-7.11.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314c24e700d7027ae3ab0d95fbf8d53544fca1f20345fd30cd219b737c6e58d3", size = 260738, upload-time = "2025-10-15T15:14:46.436Z" }, + { url = "https://files.pythonhosted.org/packages/0e/5f/9e883523c4647c860b3812b417a2017e361eca5b635ee658387dc11b13c1/coverage-7.11.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:630d0bd7a293ad2fc8b4b94e5758c8b2536fdf36c05f1681270203e463cbfa9b", size = 262994, upload-time = "2025-10-15T15:14:48.3Z" }, + { url = "https://files.pythonhosted.org/packages/07/bb/43b5a8e94c09c8bf51743ffc65c4c841a4ca5d3ed191d0a6919c379a1b83/coverage-7.11.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e89641f5175d65e2dbb44db15fe4ea48fade5d5bbb9868fdc2b4fce22f4a469d", size = 257282, upload-time = "2025-10-15T15:14:50.236Z" }, + { url = "https://files.pythonhosted.org/packages/aa/e5/0ead8af411411330b928733e1d201384b39251a5f043c1612970310e8283/coverage-7.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c9f08ea03114a637dab06cedb2e914da9dc67fa52c6015c018ff43fdde25b9c2", size = 260430, upload-time = "2025-10-15T15:14:52.413Z" }, + { url = "https://files.pythonhosted.org/packages/ae/66/03dd8bb0ba5b971620dcaac145461950f6d8204953e535d2b20c6b65d729/coverage-7.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce9f3bde4e9b031eaf1eb61df95c1401427029ea1bfddb8621c1161dcb0fa02e", size = 258190, upload-time = "2025-10-15T15:14:54.268Z" }, + { url = "https://files.pythonhosted.org/packages/45/ae/28a9cce40bf3174426cb2f7e71ee172d98e7f6446dff936a7ccecee34b14/coverage-7.11.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:e4dc07e95495923d6fd4d6c27bf70769425b71c89053083843fd78f378558996", size = 256658, upload-time = "2025-10-15T15:14:56.436Z" }, + { url = "https://files.pythonhosted.org/packages/5c/7c/3a44234a8599513684bfc8684878fd7b126c2760f79712bb78c56f19efc4/coverage-7.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:424538266794db2861db4922b05d729ade0940ee69dcf0591ce8f69784db0e11", size = 259342, upload-time = "2025-10-15T15:14:58.538Z" }, + { url = "https://files.pythonhosted.org/packages/e1/e6/0108519cba871af0351725ebdb8660fd7a0fe2ba3850d56d32490c7d9b4b/coverage-7.11.0-cp314-cp314t-win32.whl", hash = "sha256:4c1eeb3fb8eb9e0190bebafd0462936f75717687117339f708f395fe455acc73", size = 219568, upload-time = "2025-10-15T15:15:00.382Z" }, + { url = "https://files.pythonhosted.org/packages/c9/76/44ba876e0942b4e62fdde23ccb029ddb16d19ba1bef081edd00857ba0b16/coverage-7.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b56efee146c98dbf2cf5cffc61b9829d1e94442df4d7398b26892a53992d3547", size = 220687, upload-time = "2025-10-15T15:15:02.322Z" }, + { url = "https://files.pythonhosted.org/packages/b9/0c/0df55ecb20d0d0ed5c322e10a441775e1a3a5d78c60f0c4e1abfe6fcf949/coverage-7.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:b5c2705afa83f49bd91962a4094b6b082f94aef7626365ab3f8f4bd159c5acf3", size = 218711, upload-time = "2025-10-15T15:15:04.575Z" }, + { url = "https://files.pythonhosted.org/packages/5f/04/642c1d8a448ae5ea1369eac8495740a79eb4e581a9fb0cbdce56bbf56da1/coverage-7.11.0-py3-none-any.whl", hash = "sha256:4b7589765348d78fb4e5fb6ea35d07564e387da2fc5efff62e0222971f155f68", size = 207761, upload-time = "2025-10-15T15:15:06.439Z" }, ] [package.optional-dependencies] @@ -2473,7 +2473,7 @@ linting = [ ] test = [ { name = "coverage" }, - { name = "nemo-run" }, + { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run.git?rev=01a9a8ba360f7b2908728ad0516e0ad9d936966d" }, { name = "nltk" }, { name = "pydantic" }, { name = "pygithub" }, @@ -2872,8 +2872,8 @@ wheels = [ [[package]] name = "nemo-run" -version = "0.6.0" -source = { registry = "https://pypi.org/simple" } +version = "0.7.0rc0.dev0" +source = { git = "https://github.com/NVIDIA-NeMo/Run.git?rev=01a9a8ba360f7b2908728ad0516e0ad9d936966d#01a9a8ba360f7b2908728ad0516e0ad9d936966d" } dependencies = [ { name = "catalogue" }, { name = "cryptography" }, @@ -2891,10 +2891,6 @@ dependencies = [ { name = "torchx" }, { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/0a/161c5f9534946f096d7ba16e40874cf9ebbff17d57c1f88173b4b32cf067/nemo_run-0.6.0.tar.gz", hash = "sha256:8c2ec0a87a0e4df799ee527422fd2df366926cdc4cc8e0b666df98b550cd9bb7", size = 2284395, upload-time = "2025-10-09T16:07:25.718Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/2e/56750d75ec35a692e9eb0ac0f780da9f12c8e599b8273b9eabc33ae0ca30/nemo_run-0.6.0-py3-none-any.whl", hash = "sha256:7b6473aded379e9c793b7f1f64c7f44ce3ef70b4ea27dad95fd84523531ac403", size = 235439, upload-time = "2025-10-09T16:07:24.46Z" }, -] [[package]] name = "networkx" @@ -3111,59 +3107,6 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/05/79/87c45f32e661b25e0aaa1e325ba166511f57be5dff8f0fcabc12d3e73b64/nv_grouped_gemm-1.1.4.post6.tar.gz", hash = "sha256:dad6115f4b4ff7ceb0bc40ad44e923c13a24fc88cfe1e20b1a6b4c9cf24c445c", size = 26508, upload-time = "2025-10-10T18:52:29.508Z" } -[[package]] -name = "nvidia-cublas-cu12" -version = "12.8.4.1" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" }, - { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, - { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" }, -] - -[[package]] -name = "nvidia-cuda-cupti-cu12" -version = "12.8.90" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" }, - { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, - { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" }, -] - -[[package]] -name = "nvidia-cuda-nvrtc-cu12" -version = "12.8.93" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, - { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" }, -] - -[[package]] -name = "nvidia-cuda-runtime-cu12" -version = "12.8.90" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" }, - { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, - { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" }, -] - -[[package]] -name = "nvidia-cudnn-cu12" -version = "9.10.2.21" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" }, - { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, - { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" }, -] - [[package]] name = "nvidia-cudnn-frontend" version = "1.15.0" @@ -3183,76 +3126,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/04/519fd6e3ea12fe7fe98c497c4d51f6c5c87763d02e90ea3102cef32a6ef1/nvidia_cudnn_frontend-1.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:7c8c6f12534b73b0cd55956c5e9419b7840a01e4c260837606112450ce1ca0d9", size = 1297324, upload-time = "2025-10-10T18:46:53.104Z" }, ] -[[package]] -name = "nvidia-cufft-cu12" -version = "11.3.3.83" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" }, - { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, - { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" }, -] - -[[package]] -name = "nvidia-cufile-cu12" -version = "1.13.1.3" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, - { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" }, -] - -[[package]] -name = "nvidia-curand-cu12" -version = "10.3.9.90" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" }, - { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, - { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" }, -] - -[[package]] -name = "nvidia-cusolver-cu12" -version = "11.7.3.90" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" }, - { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, - { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" }, -] - -[[package]] -name = "nvidia-cusparse-cu12" -version = "12.5.8.93" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" }, - { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, - { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" }, -] - -[[package]] -name = "nvidia-cusparselt-cu12" -version = "0.7.1" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" }, - { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, - { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" }, -] - [[package]] name = "nvidia-cutlass-dsl" version = "4.2.1" @@ -3330,35 +3203,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/40/4427583475dfd8eb1b8c7522d75d4d059f0512ff03dcc62d6986a22ab918/nvidia_modelopt_core-0.33.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:69d5ace564f2b056c916117be2023f2b7fc01cd1501073915e6b2ced2b8a5394", size = 1363366, upload-time = "2025-08-12T18:39:28.854Z" }, ] -[[package]] -name = "nvidia-nccl-cu12" -version = "2.27.3" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/7b/8354b784cf73b0ba51e566b4baba3ddd44fe8288a3d39ef1e06cd5417226/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ddf1a245abc36c550870f26d537a9b6087fb2e2e3d6e0ef03374c6fd19d984f", size = 322397768, upload-time = "2025-06-03T21:57:30.234Z" }, - { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" }, -] - -[[package]] -name = "nvidia-nvjitlink-cu12" -version = "12.8.93" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, - { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" }, - { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" }, -] - -[[package]] -name = "nvidia-nvtx-cu12" -version = "12.8.90" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" }, - { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, - { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" }, -] - [[package]] name = "nvidia-resiliency-ext" version = "0.4.1" @@ -3741,104 +3585,100 @@ wheels = [ [[package]] name = "pillow" -version = "11.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523", size = 47113069, upload-time = "2025-07-01T09:16:30.666Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/5d/45a3553a253ac8763f3561371432a90bdbe6000fbdcf1397ffe502aa206c/pillow-11.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b9c17fd4ace828b3003dfd1e30bff24863e0eb59b535e8f80194d9cc7ecf860", size = 5316554, upload-time = "2025-07-01T09:13:39.342Z" }, - { url = "https://files.pythonhosted.org/packages/7c/c8/67c12ab069ef586a25a4a79ced553586748fad100c77c0ce59bb4983ac98/pillow-11.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:65dc69160114cdd0ca0f35cb434633c75e8e7fad4cf855177a05bf38678f73ad", size = 4686548, upload-time = "2025-07-01T09:13:41.835Z" }, - { url = "https://files.pythonhosted.org/packages/2f/bd/6741ebd56263390b382ae4c5de02979af7f8bd9807346d068700dd6d5cf9/pillow-11.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7107195ddc914f656c7fc8e4a5e1c25f32e9236ea3ea860f257b0436011fddd0", size = 5859742, upload-time = "2025-07-03T13:09:47.439Z" }, - { url = "https://files.pythonhosted.org/packages/ca/0b/c412a9e27e1e6a829e6ab6c2dca52dd563efbedf4c9c6aa453d9a9b77359/pillow-11.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc3e831b563b3114baac7ec2ee86819eb03caa1a2cef0b481a5675b59c4fe23b", size = 7633087, upload-time = "2025-07-03T13:09:51.796Z" }, - { url = "https://files.pythonhosted.org/packages/59/9d/9b7076aaf30f5dd17e5e5589b2d2f5a5d7e30ff67a171eb686e4eecc2adf/pillow-11.3.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1f182ebd2303acf8c380a54f615ec883322593320a9b00438eb842c1f37ae50", size = 5963350, upload-time = "2025-07-01T09:13:43.865Z" }, - { url = "https://files.pythonhosted.org/packages/f0/16/1a6bf01fb622fb9cf5c91683823f073f053005c849b1f52ed613afcf8dae/pillow-11.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4445fa62e15936a028672fd48c4c11a66d641d2c05726c7ec1f8ba6a572036ae", size = 6631840, upload-time = "2025-07-01T09:13:46.161Z" }, - { url = "https://files.pythonhosted.org/packages/7b/e6/6ff7077077eb47fde78739e7d570bdcd7c10495666b6afcd23ab56b19a43/pillow-11.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:71f511f6b3b91dd543282477be45a033e4845a40278fa8dcdbfdb07109bf18f9", size = 6074005, upload-time = "2025-07-01T09:13:47.829Z" }, - { url = "https://files.pythonhosted.org/packages/c3/3a/b13f36832ea6d279a697231658199e0a03cd87ef12048016bdcc84131601/pillow-11.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:040a5b691b0713e1f6cbe222e0f4f74cd233421e105850ae3b3c0ceda520f42e", size = 6708372, upload-time = "2025-07-01T09:13:52.145Z" }, - { url = "https://files.pythonhosted.org/packages/6c/e4/61b2e1a7528740efbc70b3d581f33937e38e98ef3d50b05007267a55bcb2/pillow-11.3.0-cp310-cp310-win32.whl", hash = "sha256:89bd777bc6624fe4115e9fac3352c79ed60f3bb18651420635f26e643e3dd1f6", size = 6277090, upload-time = "2025-07-01T09:13:53.915Z" }, - { url = "https://files.pythonhosted.org/packages/a9/d3/60c781c83a785d6afbd6a326ed4d759d141de43aa7365725cbcd65ce5e54/pillow-11.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:19d2ff547c75b8e3ff46f4d9ef969a06c30ab2d4263a9e287733aa8b2429ce8f", size = 6985988, upload-time = "2025-07-01T09:13:55.699Z" }, - { url = "https://files.pythonhosted.org/packages/9f/28/4f4a0203165eefb3763939c6789ba31013a2e90adffb456610f30f613850/pillow-11.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:819931d25e57b513242859ce1876c58c59dc31587847bf74cfe06b2e0cb22d2f", size = 2422899, upload-time = "2025-07-01T09:13:57.497Z" }, - { url = "https://files.pythonhosted.org/packages/db/26/77f8ed17ca4ffd60e1dcd220a6ec6d71210ba398cfa33a13a1cd614c5613/pillow-11.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1cd110edf822773368b396281a2293aeb91c90a2db00d78ea43e7e861631b722", size = 5316531, upload-time = "2025-07-01T09:13:59.203Z" }, - { url = "https://files.pythonhosted.org/packages/cb/39/ee475903197ce709322a17a866892efb560f57900d9af2e55f86db51b0a5/pillow-11.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c412fddd1b77a75aa904615ebaa6001f169b26fd467b4be93aded278266b288", size = 4686560, upload-time = "2025-07-01T09:14:01.101Z" }, - { url = "https://files.pythonhosted.org/packages/d5/90/442068a160fd179938ba55ec8c97050a612426fae5ec0a764e345839f76d/pillow-11.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1aa4de119a0ecac0a34a9c8bde33f34022e2e8f99104e47a3ca392fd60e37d", size = 5870978, upload-time = "2025-07-03T13:09:55.638Z" }, - { url = "https://files.pythonhosted.org/packages/13/92/dcdd147ab02daf405387f0218dcf792dc6dd5b14d2573d40b4caeef01059/pillow-11.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:91da1d88226663594e3f6b4b8c3c8d85bd504117d043740a8e0ec449087cc494", size = 7641168, upload-time = "2025-07-03T13:10:00.37Z" }, - { url = "https://files.pythonhosted.org/packages/6e/db/839d6ba7fd38b51af641aa904e2960e7a5644d60ec754c046b7d2aee00e5/pillow-11.3.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:643f189248837533073c405ec2f0bb250ba54598cf80e8c1e043381a60632f58", size = 5973053, upload-time = "2025-07-01T09:14:04.491Z" }, - { url = "https://files.pythonhosted.org/packages/f2/2f/d7675ecae6c43e9f12aa8d58b6012683b20b6edfbdac7abcb4e6af7a3784/pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106064daa23a745510dabce1d84f29137a37224831d88eb4ce94bb187b1d7e5f", size = 6640273, upload-time = "2025-07-01T09:14:06.235Z" }, - { url = "https://files.pythonhosted.org/packages/45/ad/931694675ede172e15b2ff03c8144a0ddaea1d87adb72bb07655eaffb654/pillow-11.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd8ff254faf15591e724dc7c4ddb6bf4793efcbe13802a4ae3e863cd300b493e", size = 6082043, upload-time = "2025-07-01T09:14:07.978Z" }, - { url = "https://files.pythonhosted.org/packages/3a/04/ba8f2b11fc80d2dd462d7abec16351b45ec99cbbaea4387648a44190351a/pillow-11.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:932c754c2d51ad2b2271fd01c3d121daaa35e27efae2a616f77bf164bc0b3e94", size = 6715516, upload-time = "2025-07-01T09:14:10.233Z" }, - { url = "https://files.pythonhosted.org/packages/48/59/8cd06d7f3944cc7d892e8533c56b0acb68399f640786313275faec1e3b6f/pillow-11.3.0-cp311-cp311-win32.whl", hash = "sha256:b4b8f3efc8d530a1544e5962bd6b403d5f7fe8b9e08227c6b255f98ad82b4ba0", size = 6274768, upload-time = "2025-07-01T09:14:11.921Z" }, - { url = "https://files.pythonhosted.org/packages/f1/cc/29c0f5d64ab8eae20f3232da8f8571660aa0ab4b8f1331da5c2f5f9a938e/pillow-11.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1a992e86b0dd7aeb1f053cd506508c0999d710a8f07b4c791c63843fc6a807ac", size = 6986055, upload-time = "2025-07-01T09:14:13.623Z" }, - { url = "https://files.pythonhosted.org/packages/c6/df/90bd886fabd544c25addd63e5ca6932c86f2b701d5da6c7839387a076b4a/pillow-11.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:30807c931ff7c095620fe04448e2c2fc673fcbb1ffe2a7da3fb39613489b1ddd", size = 2423079, upload-time = "2025-07-01T09:14:15.268Z" }, - { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4", size = 5278800, upload-time = "2025-07-01T09:14:17.648Z" }, - { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69", size = 4686296, upload-time = "2025-07-01T09:14:19.828Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d", size = 5871726, upload-time = "2025-07-03T13:10:04.448Z" }, - { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6", size = 7644652, upload-time = "2025-07-03T13:10:10.391Z" }, - { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7", size = 5977787, upload-time = "2025-07-01T09:14:21.63Z" }, - { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024", size = 6645236, upload-time = "2025-07-01T09:14:23.321Z" }, - { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809", size = 6086950, upload-time = "2025-07-01T09:14:25.237Z" }, - { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d", size = 6723358, upload-time = "2025-07-01T09:14:27.053Z" }, - { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149", size = 6275079, upload-time = "2025-07-01T09:14:30.104Z" }, - { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d", size = 6986324, upload-time = "2025-07-01T09:14:31.899Z" }, - { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542", size = 2423067, upload-time = "2025-07-01T09:14:33.709Z" }, - { url = "https://files.pythonhosted.org/packages/1e/93/0952f2ed8db3a5a4c7a11f91965d6184ebc8cd7cbb7941a260d5f018cd2d/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd", size = 2128328, upload-time = "2025-07-01T09:14:35.276Z" }, - { url = "https://files.pythonhosted.org/packages/4b/e8/100c3d114b1a0bf4042f27e0f87d2f25e857e838034e98ca98fe7b8c0a9c/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8", size = 2170652, upload-time = "2025-07-01T09:14:37.203Z" }, - { url = "https://files.pythonhosted.org/packages/aa/86/3f758a28a6e381758545f7cdb4942e1cb79abd271bea932998fc0db93cb6/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f", size = 2227443, upload-time = "2025-07-01T09:14:39.344Z" }, - { url = "https://files.pythonhosted.org/packages/01/f4/91d5b3ffa718df2f53b0dc109877993e511f4fd055d7e9508682e8aba092/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c", size = 5278474, upload-time = "2025-07-01T09:14:41.843Z" }, - { url = "https://files.pythonhosted.org/packages/f9/0e/37d7d3eca6c879fbd9dba21268427dffda1ab00d4eb05b32923d4fbe3b12/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd", size = 4686038, upload-time = "2025-07-01T09:14:44.008Z" }, - { url = "https://files.pythonhosted.org/packages/ff/b0/3426e5c7f6565e752d81221af9d3676fdbb4f352317ceafd42899aaf5d8a/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e", size = 5864407, upload-time = "2025-07-03T13:10:15.628Z" }, - { url = "https://files.pythonhosted.org/packages/fc/c1/c6c423134229f2a221ee53f838d4be9d82bab86f7e2f8e75e47b6bf6cd77/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1", size = 7639094, upload-time = "2025-07-03T13:10:21.857Z" }, - { url = "https://files.pythonhosted.org/packages/ba/c9/09e6746630fe6372c67c648ff9deae52a2bc20897d51fa293571977ceb5d/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805", size = 5973503, upload-time = "2025-07-01T09:14:45.698Z" }, - { url = "https://files.pythonhosted.org/packages/d5/1c/a2a29649c0b1983d3ef57ee87a66487fdeb45132df66ab30dd37f7dbe162/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8", size = 6642574, upload-time = "2025-07-01T09:14:47.415Z" }, - { url = "https://files.pythonhosted.org/packages/36/de/d5cc31cc4b055b6c6fd990e3e7f0f8aaf36229a2698501bcb0cdf67c7146/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2", size = 6084060, upload-time = "2025-07-01T09:14:49.636Z" }, - { url = "https://files.pythonhosted.org/packages/d5/ea/502d938cbaeec836ac28a9b730193716f0114c41325db428e6b280513f09/pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b", size = 6721407, upload-time = "2025-07-01T09:14:51.962Z" }, - { url = "https://files.pythonhosted.org/packages/45/9c/9c5e2a73f125f6cbc59cc7087c8f2d649a7ae453f83bd0362ff7c9e2aee2/pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3", size = 6273841, upload-time = "2025-07-01T09:14:54.142Z" }, - { url = "https://files.pythonhosted.org/packages/23/85/397c73524e0cd212067e0c969aa245b01d50183439550d24d9f55781b776/pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51", size = 6978450, upload-time = "2025-07-01T09:14:56.436Z" }, - { url = "https://files.pythonhosted.org/packages/17/d2/622f4547f69cd173955194b78e4d19ca4935a1b0f03a302d655c9f6aae65/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580", size = 2423055, upload-time = "2025-07-01T09:14:58.072Z" }, - { url = "https://files.pythonhosted.org/packages/dd/80/a8a2ac21dda2e82480852978416cfacd439a4b490a501a288ecf4fe2532d/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e", size = 5281110, upload-time = "2025-07-01T09:14:59.79Z" }, - { url = "https://files.pythonhosted.org/packages/44/d6/b79754ca790f315918732e18f82a8146d33bcd7f4494380457ea89eb883d/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d", size = 4689547, upload-time = "2025-07-01T09:15:01.648Z" }, - { url = "https://files.pythonhosted.org/packages/49/20/716b8717d331150cb00f7fdd78169c01e8e0c219732a78b0e59b6bdb2fd6/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced", size = 5901554, upload-time = "2025-07-03T13:10:27.018Z" }, - { url = "https://files.pythonhosted.org/packages/74/cf/a9f3a2514a65bb071075063a96f0a5cf949c2f2fce683c15ccc83b1c1cab/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c", size = 7669132, upload-time = "2025-07-03T13:10:33.01Z" }, - { url = "https://files.pythonhosted.org/packages/98/3c/da78805cbdbee9cb43efe8261dd7cc0b4b93f2ac79b676c03159e9db2187/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8", size = 6005001, upload-time = "2025-07-01T09:15:03.365Z" }, - { url = "https://files.pythonhosted.org/packages/6c/fa/ce044b91faecf30e635321351bba32bab5a7e034c60187fe9698191aef4f/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59", size = 6668814, upload-time = "2025-07-01T09:15:05.655Z" }, - { url = "https://files.pythonhosted.org/packages/7b/51/90f9291406d09bf93686434f9183aba27b831c10c87746ff49f127ee80cb/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe", size = 6113124, upload-time = "2025-07-01T09:15:07.358Z" }, - { url = "https://files.pythonhosted.org/packages/cd/5a/6fec59b1dfb619234f7636d4157d11fb4e196caeee220232a8d2ec48488d/pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c", size = 6747186, upload-time = "2025-07-01T09:15:09.317Z" }, - { url = "https://files.pythonhosted.org/packages/49/6b/00187a044f98255225f172de653941e61da37104a9ea60e4f6887717e2b5/pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788", size = 6277546, upload-time = "2025-07-01T09:15:11.311Z" }, - { url = "https://files.pythonhosted.org/packages/e8/5c/6caaba7e261c0d75bab23be79f1d06b5ad2a2ae49f028ccec801b0e853d6/pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31", size = 6985102, upload-time = "2025-07-01T09:15:13.164Z" }, - { url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e", size = 2424803, upload-time = "2025-07-01T09:15:15.695Z" }, - { url = "https://files.pythonhosted.org/packages/73/f4/04905af42837292ed86cb1b1dabe03dce1edc008ef14c473c5c7e1443c5d/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12", size = 5278520, upload-time = "2025-07-01T09:15:17.429Z" }, - { url = "https://files.pythonhosted.org/packages/41/b0/33d79e377a336247df6348a54e6d2a2b85d644ca202555e3faa0cf811ecc/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a", size = 4686116, upload-time = "2025-07-01T09:15:19.423Z" }, - { url = "https://files.pythonhosted.org/packages/49/2d/ed8bc0ab219ae8768f529597d9509d184fe8a6c4741a6864fea334d25f3f/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632", size = 5864597, upload-time = "2025-07-03T13:10:38.404Z" }, - { url = "https://files.pythonhosted.org/packages/b5/3d/b932bb4225c80b58dfadaca9d42d08d0b7064d2d1791b6a237f87f661834/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673", size = 7638246, upload-time = "2025-07-03T13:10:44.987Z" }, - { url = "https://files.pythonhosted.org/packages/09/b5/0487044b7c096f1b48f0d7ad416472c02e0e4bf6919541b111efd3cae690/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027", size = 5973336, upload-time = "2025-07-01T09:15:21.237Z" }, - { url = "https://files.pythonhosted.org/packages/a8/2d/524f9318f6cbfcc79fbc004801ea6b607ec3f843977652fdee4857a7568b/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77", size = 6642699, upload-time = "2025-07-01T09:15:23.186Z" }, - { url = "https://files.pythonhosted.org/packages/6f/d2/a9a4f280c6aefedce1e8f615baaa5474e0701d86dd6f1dede66726462bbd/pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874", size = 6083789, upload-time = "2025-07-01T09:15:25.1Z" }, - { url = "https://files.pythonhosted.org/packages/fe/54/86b0cd9dbb683a9d5e960b66c7379e821a19be4ac5810e2e5a715c09a0c0/pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a", size = 6720386, upload-time = "2025-07-01T09:15:27.378Z" }, - { url = "https://files.pythonhosted.org/packages/e7/95/88efcaf384c3588e24259c4203b909cbe3e3c2d887af9e938c2022c9dd48/pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214", size = 6370911, upload-time = "2025-07-01T09:15:29.294Z" }, - { url = "https://files.pythonhosted.org/packages/2e/cc/934e5820850ec5eb107e7b1a72dd278140731c669f396110ebc326f2a503/pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635", size = 7117383, upload-time = "2025-07-01T09:15:31.128Z" }, - { url = "https://files.pythonhosted.org/packages/d6/e9/9c0a616a71da2a5d163aa37405e8aced9a906d574b4a214bede134e731bc/pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6", size = 2511385, upload-time = "2025-07-01T09:15:33.328Z" }, - { url = "https://files.pythonhosted.org/packages/1a/33/c88376898aff369658b225262cd4f2659b13e8178e7534df9e6e1fa289f6/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae", size = 5281129, upload-time = "2025-07-01T09:15:35.194Z" }, - { url = "https://files.pythonhosted.org/packages/1f/70/d376247fb36f1844b42910911c83a02d5544ebd2a8bad9efcc0f707ea774/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653", size = 4689580, upload-time = "2025-07-01T09:15:37.114Z" }, - { url = "https://files.pythonhosted.org/packages/eb/1c/537e930496149fbac69efd2fc4329035bbe2e5475b4165439e3be9cb183b/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6", size = 5902860, upload-time = "2025-07-03T13:10:50.248Z" }, - { url = "https://files.pythonhosted.org/packages/bd/57/80f53264954dcefeebcf9dae6e3eb1daea1b488f0be8b8fef12f79a3eb10/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36", size = 7670694, upload-time = "2025-07-03T13:10:56.432Z" }, - { url = "https://files.pythonhosted.org/packages/70/ff/4727d3b71a8578b4587d9c276e90efad2d6fe0335fd76742a6da08132e8c/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b", size = 6005888, upload-time = "2025-07-01T09:15:39.436Z" }, - { url = "https://files.pythonhosted.org/packages/05/ae/716592277934f85d3be51d7256f3636672d7b1abfafdc42cf3f8cbd4b4c8/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477", size = 6670330, upload-time = "2025-07-01T09:15:41.269Z" }, - { url = "https://files.pythonhosted.org/packages/e7/bb/7fe6cddcc8827b01b1a9766f5fdeb7418680744f9082035bdbabecf1d57f/pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50", size = 6114089, upload-time = "2025-07-01T09:15:43.13Z" }, - { url = "https://files.pythonhosted.org/packages/8b/f5/06bfaa444c8e80f1a8e4bff98da9c83b37b5be3b1deaa43d27a0db37ef84/pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b", size = 6748206, upload-time = "2025-07-01T09:15:44.937Z" }, - { url = "https://files.pythonhosted.org/packages/f0/77/bc6f92a3e8e6e46c0ca78abfffec0037845800ea38c73483760362804c41/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12", size = 6377370, upload-time = "2025-07-01T09:15:46.673Z" }, - { url = "https://files.pythonhosted.org/packages/4a/82/3a721f7d69dca802befb8af08b7c79ebcab461007ce1c18bd91a5d5896f9/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db", size = 7121500, upload-time = "2025-07-01T09:15:48.512Z" }, - { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" }, - { url = "https://files.pythonhosted.org/packages/6f/8b/209bd6b62ce8367f47e68a218bffac88888fdf2c9fcf1ecadc6c3ec1ebc7/pillow-11.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3cee80663f29e3843b68199b9d6f4f54bd1d4a6b59bdd91bceefc51238bcb967", size = 5270556, upload-time = "2025-07-01T09:16:09.961Z" }, - { url = "https://files.pythonhosted.org/packages/2e/e6/231a0b76070c2cfd9e260a7a5b504fb72da0a95279410fa7afd99d9751d6/pillow-11.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b5f56c3f344f2ccaf0dd875d3e180f631dc60a51b314295a3e681fe8cf851fbe", size = 4654625, upload-time = "2025-07-01T09:16:11.913Z" }, - { url = "https://files.pythonhosted.org/packages/13/f4/10cf94fda33cb12765f2397fc285fa6d8eb9c29de7f3185165b702fc7386/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e67d793d180c9df62f1f40aee3accca4829d3794c95098887edc18af4b8b780c", size = 4874207, upload-time = "2025-07-03T13:11:10.201Z" }, - { url = "https://files.pythonhosted.org/packages/72/c9/583821097dc691880c92892e8e2d41fe0a5a3d6021f4963371d2f6d57250/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d000f46e2917c705e9fb93a3606ee4a819d1e3aa7a9b442f6444f07e77cf5e25", size = 6583939, upload-time = "2025-07-03T13:11:15.68Z" }, - { url = "https://files.pythonhosted.org/packages/3b/8e/5c9d410f9217b12320efc7c413e72693f48468979a013ad17fd690397b9a/pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:527b37216b6ac3a12d7838dc3bd75208ec57c1c6d11ef01902266a5a0c14fc27", size = 4957166, upload-time = "2025-07-01T09:16:13.74Z" }, - { url = "https://files.pythonhosted.org/packages/62/bb/78347dbe13219991877ffb3a91bf09da8317fbfcd4b5f9140aeae020ad71/pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be5463ac478b623b9dd3937afd7fb7ab3d79dd290a28e2b6df292dc75063eb8a", size = 5581482, upload-time = "2025-07-01T09:16:16.107Z" }, - { url = "https://files.pythonhosted.org/packages/d9/28/1000353d5e61498aaeaaf7f1e4b49ddb05f2c6575f9d4f9f914a3538b6e1/pillow-11.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8dc70ca24c110503e16918a658b869019126ecfe03109b754c402daff12b3d9f", size = 6984596, upload-time = "2025-07-01T09:16:18.07Z" }, - { url = "https://files.pythonhosted.org/packages/9e/e3/6fa84033758276fb31da12e5fb66ad747ae83b93c67af17f8c6ff4cc8f34/pillow-11.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7c8ec7a017ad1bd562f93dbd8505763e688d388cde6e4a010ae1486916e713e6", size = 5270566, upload-time = "2025-07-01T09:16:19.801Z" }, - { url = "https://files.pythonhosted.org/packages/5b/ee/e8d2e1ab4892970b561e1ba96cbd59c0d28cf66737fc44abb2aec3795a4e/pillow-11.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9ab6ae226de48019caa8074894544af5b53a117ccb9d3b3dcb2871464c829438", size = 4654618, upload-time = "2025-07-01T09:16:21.818Z" }, - { url = "https://files.pythonhosted.org/packages/f2/6d/17f80f4e1f0761f02160fc433abd4109fa1548dcfdca46cfdadaf9efa565/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe27fb049cdcca11f11a7bfda64043c37b30e6b91f10cb5bab275806c32f6ab3", size = 4874248, upload-time = "2025-07-03T13:11:20.738Z" }, - { url = "https://files.pythonhosted.org/packages/de/5f/c22340acd61cef960130585bbe2120e2fd8434c214802f07e8c03596b17e/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:465b9e8844e3c3519a983d58b80be3f668e2a7a5db97f2784e7079fbc9f9822c", size = 6583963, upload-time = "2025-07-03T13:11:26.283Z" }, - { url = "https://files.pythonhosted.org/packages/31/5e/03966aedfbfcbb4d5f8aa042452d3361f325b963ebbadddac05b122e47dd/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5418b53c0d59b3824d05e029669efa023bbef0f3e92e75ec8428f3799487f361", size = 4957170, upload-time = "2025-07-01T09:16:23.762Z" }, - { url = "https://files.pythonhosted.org/packages/cc/2d/e082982aacc927fc2cab48e1e731bdb1643a1406acace8bed0900a61464e/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:504b6f59505f08ae014f724b6207ff6222662aab5cc9542577fb084ed0676ac7", size = 5581505, upload-time = "2025-07-01T09:16:25.593Z" }, - { url = "https://files.pythonhosted.org/packages/34/e7/ae39f538fd6844e982063c3a5e4598b8ced43b9633baa3a85ef33af8c05c/pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8", size = 6984598, upload-time = "2025-07-01T09:16:27.732Z" }, +version = "12.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/08/26e68b6b5da219c2a2cb7b563af008b53bb8e6b6fcb3fa40715fcdb2523a/pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b", size = 5289809, upload-time = "2025-10-15T18:21:27.791Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/4e58fb097fb74c7b4758a680aacd558810a417d1edaa7000142976ef9d2f/pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1", size = 4650606, upload-time = "2025-10-15T18:21:29.823Z" }, + { url = "https://files.pythonhosted.org/packages/4b/e0/1fa492aa9f77b3bc6d471c468e62bfea1823056bf7e5e4f1914d7ab2565e/pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363", size = 6221023, upload-time = "2025-10-15T18:21:31.415Z" }, + { url = "https://files.pythonhosted.org/packages/c1/09/4de7cd03e33734ccd0c876f0251401f1314e819cbfd89a0fcb6e77927cc6/pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca", size = 8024937, upload-time = "2025-10-15T18:21:33.453Z" }, + { url = "https://files.pythonhosted.org/packages/2e/69/0688e7c1390666592876d9d474f5e135abb4acb39dcb583c4dc5490f1aff/pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e", size = 6334139, upload-time = "2025-10-15T18:21:35.395Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1c/880921e98f525b9b44ce747ad1ea8f73fd7e992bafe3ca5e5644bf433dea/pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782", size = 7026074, upload-time = "2025-10-15T18:21:37.219Z" }, + { url = "https://files.pythonhosted.org/packages/28/03/96f718331b19b355610ef4ebdbbde3557c726513030665071fd025745671/pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10", size = 6448852, upload-time = "2025-10-15T18:21:39.168Z" }, + { url = "https://files.pythonhosted.org/packages/3a/a0/6a193b3f0cc9437b122978d2c5cbce59510ccf9a5b48825096ed7472da2f/pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa", size = 7117058, upload-time = "2025-10-15T18:21:40.997Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c4/043192375eaa4463254e8e61f0e2ec9a846b983929a8d0a7122e0a6d6fff/pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275", size = 6295431, upload-time = "2025-10-15T18:21:42.518Z" }, + { url = "https://files.pythonhosted.org/packages/92/c6/c2f2fc7e56301c21827e689bb8b0b465f1b52878b57471a070678c0c33cd/pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d", size = 7000412, upload-time = "2025-10-15T18:21:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d2/5f675067ba82da7a1c238a73b32e3fd78d67f9d9f80fbadd33a40b9c0481/pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7", size = 2435903, upload-time = "2025-10-15T18:21:46.29Z" }, + { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" }, + { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" }, + { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" }, + { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" }, + { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" }, + { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" }, + { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" }, + { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" }, + { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" }, + { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" }, + { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" }, + { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" }, + { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" }, + { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" }, + { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" }, + { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" }, + { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" }, + { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" }, + { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" }, + { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" }, + { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" }, + { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" }, + { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" }, + { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" }, + { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" }, + { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" }, + { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" }, + { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" }, + { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" }, + { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" }, + { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" }, + { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" }, + { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" }, + { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" }, + { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" }, + { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" }, + { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" }, + { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" }, + { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" }, + { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" }, + { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" }, + { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" }, + { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" }, + { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" }, + { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" }, + { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" }, + { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" }, + { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" }, + { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" }, + { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" }, + { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" }, + { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" }, + { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" }, + { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" }, + { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" }, + { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" }, + { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" }, + { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" }, + { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" }, + { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" }, + { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" }, + { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" }, + { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" }, + { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" }, + { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" }, + { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" }, + { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" }, + { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" }, + { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" }, + { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" }, ] [[package]] @@ -5571,7 +5411,7 @@ name = "sympy" version = "1.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "mpmath" }, + { name = "mpmath", marker = "sys_platform != 'linux'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } wheels = [ @@ -5852,54 +5692,48 @@ wheels = [ [[package]] name = "torch" -version = "2.8.0" +version = "2.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "filelock" }, - { name = "fsspec" }, - { name = "jinja2" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12'" }, - { name = "sympy" }, + { name = "filelock", marker = "sys_platform != 'linux'" }, + { name = "fsspec", marker = "sys_platform != 'linux'" }, + { name = "jinja2", marker = "sys_platform != 'linux'" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'linux') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, + { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'linux'" }, + { name = "sympy", marker = "sys_platform != 'linux'" }, { name = "triton", marker = "sys_platform == 'never'" }, - { name = "typing-extensions" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/63/28/110f7274254f1b8476c561dada127173f994afa2b1ffc044efb773c15650/torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905", size = 102052793, upload-time = "2025-08-06T14:53:15.852Z" }, - { url = "https://files.pythonhosted.org/packages/70/1c/58da560016f81c339ae14ab16c98153d51c941544ae568da3cb5b1ceb572/torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011", size = 888025420, upload-time = "2025-08-06T14:54:18.014Z" }, - { url = "https://files.pythonhosted.org/packages/70/87/f69752d0dd4ba8218c390f0438130c166fa264a33b7025adb5014b92192c/torch-2.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8e5bf982e87e2b59d932769938b698858c64cc53753894be25629bdf5cf2f46", size = 241363614, upload-time = "2025-08-06T14:53:31.496Z" }, - { url = "https://files.pythonhosted.org/packages/ef/d6/e6d4c57e61c2b2175d3aafbfb779926a2cfd7c32eeda7c543925dceec923/torch-2.8.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3f16a58a9a800f589b26d47ee15aca3acf065546137fc2af039876135f4c760", size = 73611154, upload-time = "2025-08-06T14:53:10.919Z" }, - { url = "https://files.pythonhosted.org/packages/8f/c4/3e7a3887eba14e815e614db70b3b529112d1513d9dae6f4d43e373360b7f/torch-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:220a06fd7af8b653c35d359dfe1aaf32f65aa85befa342629f716acb134b9710", size = 102073391, upload-time = "2025-08-06T14:53:20.937Z" }, - { url = "https://files.pythonhosted.org/packages/5a/63/4fdc45a0304536e75a5e1b1bbfb1b56dd0e2743c48ee83ca729f7ce44162/torch-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c12fa219f51a933d5f80eeb3a7a5d0cbe9168c0a14bbb4055f1979431660879b", size = 888063640, upload-time = "2025-08-06T14:55:05.325Z" }, - { url = "https://files.pythonhosted.org/packages/84/57/2f64161769610cf6b1c5ed782bd8a780e18a3c9d48931319f2887fa9d0b1/torch-2.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c7ef765e27551b2fbfc0f41bcf270e1292d9bf79f8e0724848b1682be6e80aa", size = 241366752, upload-time = "2025-08-06T14:53:38.692Z" }, - { url = "https://files.pythonhosted.org/packages/a4/5e/05a5c46085d9b97e928f3f037081d3d2b87fb4b4195030fc099aaec5effc/torch-2.8.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:5ae0524688fb6707c57a530c2325e13bb0090b745ba7b4a2cd6a3ce262572916", size = 73621174, upload-time = "2025-08-06T14:53:25.44Z" }, - { url = "https://files.pythonhosted.org/packages/49/0c/2fd4df0d83a495bb5e54dca4474c4ec5f9c62db185421563deeb5dabf609/torch-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e2fab4153768d433f8ed9279c8133a114a034a61e77a3a104dcdf54388838705", size = 101906089, upload-time = "2025-08-06T14:53:52.631Z" }, - { url = "https://files.pythonhosted.org/packages/99/a8/6acf48d48838fb8fe480597d98a0668c2beb02ee4755cc136de92a0a956f/torch-2.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2aca0939fb7e4d842561febbd4ffda67a8e958ff725c1c27e244e85e982173c", size = 887913624, upload-time = "2025-08-06T14:56:44.33Z" }, - { url = "https://files.pythonhosted.org/packages/af/8a/5c87f08e3abd825c7dfecef5a0f1d9aa5df5dd0e3fd1fa2f490a8e512402/torch-2.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:2f4ac52f0130275d7517b03a33d2493bab3693c83dcfadf4f81688ea82147d2e", size = 241326087, upload-time = "2025-08-06T14:53:46.503Z" }, - { url = "https://files.pythonhosted.org/packages/be/66/5c9a321b325aaecb92d4d1855421e3a055abd77903b7dab6575ca07796db/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:619c2869db3ada2c0105487ba21b5008defcc472d23f8b80ed91ac4a380283b0", size = 73630478, upload-time = "2025-08-06T14:53:57.144Z" }, - { url = "https://files.pythonhosted.org/packages/10/4e/469ced5a0603245d6a19a556e9053300033f9c5baccf43a3d25ba73e189e/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128", size = 101936856, upload-time = "2025-08-06T14:54:01.526Z" }, - { url = "https://files.pythonhosted.org/packages/16/82/3948e54c01b2109238357c6f86242e6ecbf0c63a1af46906772902f82057/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b", size = 887922844, upload-time = "2025-08-06T14:55:50.78Z" }, - { url = "https://files.pythonhosted.org/packages/e3/54/941ea0a860f2717d86a811adf0c2cd01b3983bdd460d0803053c4e0b8649/torch-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:659df54119ae03e83a800addc125856effda88b016dfc54d9f65215c3975be16", size = 241330968, upload-time = "2025-08-06T14:54:45.293Z" }, - { url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload-time = "2025-08-06T14:54:34.769Z" }, - { url = "https://files.pythonhosted.org/packages/15/0e/8a800e093b7f7430dbaefa80075aee9158ec22e4c4fc3c1a66e4fb96cb4f/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def", size = 102020139, upload-time = "2025-08-06T14:54:39.047Z" }, - { url = "https://files.pythonhosted.org/packages/4a/15/5e488ca0bc6162c86a33b58642bc577c84ded17c7b72d97e49b5833e2d73/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a", size = 887990692, upload-time = "2025-08-06T14:56:18.286Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a8/6a04e4b54472fc5dba7ca2341ab219e529f3c07b6941059fbf18dccac31f/torch-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a7242b86f42be98ac674b88a4988643b9bc6145437ec8f048fea23f72feb5eca", size = 241603453, upload-time = "2025-08-06T14:55:22.945Z" }, - { url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload-time = "2025-08-06T14:55:28.645Z" }, + { name = "typing-extensions", marker = "sys_platform != 'linux'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/86/245c240d2138c17ed572c943c289056c2721abab70810d772c6bf5495b28/torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd", size = 104213554, upload-time = "2025-10-15T15:45:59.798Z" }, + { url = "https://files.pythonhosted.org/packages/58/1d/fd1e88ae0948825efcab7dd66d12bec23f05d4d38ed81573c8d453c14c06/torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c", size = 899795167, upload-time = "2025-10-15T15:47:12.695Z" }, + { url = "https://files.pythonhosted.org/packages/63/5a/496197b45c14982bef4e079b24c61dc108e3ab0d0cc9718dba9f54f45a46/torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d", size = 109310314, upload-time = "2025-10-15T15:46:16.633Z" }, + { url = "https://files.pythonhosted.org/packages/58/b0/2b4e647b0fc706e88eb6c253d05511865578f5f67b55fad639bf3272a4a1/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b", size = 74452019, upload-time = "2025-10-15T15:46:04.296Z" }, + { url = "https://files.pythonhosted.org/packages/58/fe/334225e6330e672b36aef23d77451fa906ea12881570c08638a91331a212/torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677", size = 104230578, upload-time = "2025-10-15T15:46:08.182Z" }, + { url = "https://files.pythonhosted.org/packages/05/cc/49566caaa218872ec9a2912456f470ff92649894a4bc2e5274aa9ef87c4a/torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67", size = 899815990, upload-time = "2025-10-15T15:48:03.336Z" }, + { url = "https://files.pythonhosted.org/packages/74/25/e9ab21d5925b642d008f139d4a3c9664fc9ee1faafca22913c080cc4c0a5/torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e", size = 109313698, upload-time = "2025-10-15T15:46:12.425Z" }, + { url = "https://files.pythonhosted.org/packages/b3/b7/205ef3e94de636feffd64b28bb59a0dfac0771221201b9871acf9236f5ca/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec", size = 74463678, upload-time = "2025-10-15T15:46:29.779Z" }, + { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" }, + { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" }, + { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" }, + { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" }, + { url = "https://files.pythonhosted.org/packages/c2/1c/90eb13833cdf4969ea9707586d7b57095c3b6e2b223a7256bf111689bcb8/torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e", size = 104111330, upload-time = "2025-10-15T15:46:35.238Z" }, + { url = "https://files.pythonhosted.org/packages/0e/21/2254c54b8d523592c25ef4434769aa23e29b1e6bf5f4c0ad9e27bf442927/torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642", size = 899750243, upload-time = "2025-10-15T15:48:57.459Z" }, + { url = "https://files.pythonhosted.org/packages/b7/a5/5cb94fa4fd1e78223455c23c200f30f6dc10c6d4a2bcc8f6e7f2a2588370/torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece", size = 109284513, upload-time = "2025-10-15T15:46:45.061Z" }, + { url = "https://files.pythonhosted.org/packages/66/e8/fc414d8656250ee46120b44836ffbb3266343db424b3e18ca79ebbf69d4f/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5", size = 74830362, upload-time = "2025-10-15T15:46:48.983Z" }, + { url = "https://files.pythonhosted.org/packages/ed/5f/9474c98fc5ae0cd04b9466035428cd360e6611a86b8352a0fc2fa504acdc/torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637", size = 104144940, upload-time = "2025-10-15T15:47:29.076Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5a/8e0c1cf57830172c109d4bd6be2708cabeaf550983eee7029291322447a0/torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e", size = 899744054, upload-time = "2025-10-15T15:48:29.864Z" }, + { url = "https://files.pythonhosted.org/packages/6d/28/82c28b30fcb4b7c9cdd995763d18bbb830d6521356712faebbad92ffa61d/torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8", size = 109517546, upload-time = "2025-10-15T15:47:33.395Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c3/a91f96ec74347fa5fd24453fa514bc61c61ecc79196fa760b012a1873d96/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a", size = 74480732, upload-time = "2025-10-15T15:47:38.002Z" }, + { url = "https://files.pythonhosted.org/packages/5c/73/9f70af34b334a7e0ef496ceec96b7ec767bd778ea35385ce6f77557534d1/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb", size = 74433037, upload-time = "2025-10-15T15:47:41.894Z" }, + { url = "https://files.pythonhosted.org/packages/b7/84/37cf88625901934c97109e583ecc21777d21c6f54cda97a7e5bbad1ee2f2/torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb", size = 104116482, upload-time = "2025-10-15T15:47:46.266Z" }, + { url = "https://files.pythonhosted.org/packages/56/8e/ca8b17866943a8d4f4664d402ea84210aa274588b4c5d89918f5caa24eec/torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f", size = 899746916, upload-time = "2025-10-15T15:50:40.294Z" }, + { url = "https://files.pythonhosted.org/packages/43/65/3b17c0fbbdab6501c5b320a52a648628d0d44e7379f64e27d9eef701b6bf/torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125", size = 109275151, upload-time = "2025-10-15T15:49:20.715Z" }, + { url = "https://files.pythonhosted.org/packages/83/36/74f8c051f785500396e42f93542422422dfd874a174f21f8d955d36e5d64/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc", size = 74823353, upload-time = "2025-10-15T15:49:16.59Z" }, + { url = "https://files.pythonhosted.org/packages/62/51/dc3b4e2f9ba98ae27238f0153ca098bf9340b2dafcc67fde645d496dfc2a/torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e", size = 104140340, upload-time = "2025-10-15T15:50:19.67Z" }, + { url = "https://files.pythonhosted.org/packages/c0/8d/b00657f8141ac16af7bb6cda2e67de18499a3263b78d516b9a93fcbc98e3/torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b", size = 899731750, upload-time = "2025-10-15T15:49:36.673Z" }, + { url = "https://files.pythonhosted.org/packages/fc/29/bd361e0cbb2c79ce6450f42643aaf6919956f89923a50571b0ebfe92d142/torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75", size = 109503850, upload-time = "2025-10-15T15:50:24.118Z" }, ] [[package]] @@ -5918,34 +5752,42 @@ wheels = [ [[package]] name = "torchvision" -version = "0.23.0" +version = "0.24.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, - { name = "pillow" }, + { name = "numpy", marker = "sys_platform != 'linux'" }, + { name = "pillow", marker = "sys_platform != 'linux'" }, { name = "torch", marker = "sys_platform == 'never'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/49/5ad5c3ff4920be0adee9eb4339b4fb3b023a0fc55b9ed8dbc73df92946b8/torchvision-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7266871daca00ad46d1c073e55d972179d12a58fa5c9adec9a3db9bbed71284a", size = 1856885, upload-time = "2025-08-06T14:57:55.024Z" }, - { url = "https://files.pythonhosted.org/packages/25/44/ddd56d1637bac42a8c5da2c8c440d8a28c431f996dd9790f32dd9a96ca6e/torchvision-0.23.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:31c583ba27426a3a04eca8c05450524105c1564db41be6632f7536ef405a6de2", size = 2394251, upload-time = "2025-08-06T14:58:01.725Z" }, - { url = "https://files.pythonhosted.org/packages/93/f3/3cdf55bbf0f737304d997561c34ab0176222e0496b6743b0feab5995182c/torchvision-0.23.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3932bf67256f2d095ce90a9f826f6033694c818856f4bb26794cf2ce64253e53", size = 8627497, upload-time = "2025-08-06T14:58:09.317Z" }, - { url = "https://files.pythonhosted.org/packages/97/90/02afe57c3ef4284c5cf89d3b7ae203829b3a981f72b93a7dd2a3fd2c83c1/torchvision-0.23.0-cp310-cp310-win_amd64.whl", hash = "sha256:83ee5bf827d61a8af14620c0a61d8608558638ac9c3bac8adb7b27138e2147d1", size = 1600760, upload-time = "2025-08-06T14:57:56.783Z" }, - { url = "https://files.pythonhosted.org/packages/f0/d7/15d3d7bd8d0239211b21673d1bac7bc345a4ad904a8e25bb3fd8a9cf1fbc/torchvision-0.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:49aa20e21f0c2bd458c71d7b449776cbd5f16693dd5807195a820612b8a229b7", size = 1856884, upload-time = "2025-08-06T14:58:00.237Z" }, - { url = "https://files.pythonhosted.org/packages/dd/14/7b44fe766b7d11e064c539d92a172fa9689a53b69029e24f2f1f51e7dc56/torchvision-0.23.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:01dc33ee24c79148aee7cdbcf34ae8a3c9da1674a591e781577b716d233b1fa6", size = 2395543, upload-time = "2025-08-06T14:58:04.373Z" }, - { url = "https://files.pythonhosted.org/packages/79/9c/fcb09aff941c8147d9e6aa6c8f67412a05622b0c750bcf796be4c85a58d4/torchvision-0.23.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35c27941831b653f5101edfe62c03d196c13f32139310519e8228f35eae0e96a", size = 8628388, upload-time = "2025-08-06T14:58:07.802Z" }, - { url = "https://files.pythonhosted.org/packages/93/40/3415d890eb357b25a8e0a215d32365a88ecc75a283f75c4e919024b22d97/torchvision-0.23.0-cp311-cp311-win_amd64.whl", hash = "sha256:09bfde260e7963a15b80c9e442faa9f021c7e7f877ac0a36ca6561b367185013", size = 1600741, upload-time = "2025-08-06T14:57:59.158Z" }, - { url = "https://files.pythonhosted.org/packages/df/1d/0ea0b34bde92a86d42620f29baa6dcbb5c2fc85990316df5cb8f7abb8ea2/torchvision-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e0e2c04a91403e8dd3af9756c6a024a1d9c0ed9c0d592a8314ded8f4fe30d440", size = 1856885, upload-time = "2025-08-06T14:58:06.503Z" }, - { url = "https://files.pythonhosted.org/packages/e2/00/2f6454decc0cd67158c7890364e446aad4b91797087a57a78e72e1a8f8bc/torchvision-0.23.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6dd7c4d329a0e03157803031bc856220c6155ef08c26d4f5bbac938acecf0948", size = 2396614, upload-time = "2025-08-06T14:58:03.116Z" }, - { url = "https://files.pythonhosted.org/packages/e4/b5/3e580dcbc16f39a324f3dd71b90edbf02a42548ad44d2b4893cc92b1194b/torchvision-0.23.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4e7d31c43bc7cbecbb1a5652ac0106b436aa66e26437585fc2c4b2cf04d6014c", size = 8627108, upload-time = "2025-08-06T14:58:12.956Z" }, - { url = "https://files.pythonhosted.org/packages/82/c1/c2fe6d61e110a8d0de2f94276899a2324a8f1e6aee559eb6b4629ab27466/torchvision-0.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:a2e45272abe7b8bf0d06c405e78521b5757be1bd0ed7e5cd78120f7fdd4cbf35", size = 1600723, upload-time = "2025-08-06T14:57:57.986Z" }, - { url = "https://files.pythonhosted.org/packages/91/37/45a5b9407a7900f71d61b2b2f62db4b7c632debca397f205fdcacb502780/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600", size = 1856886, upload-time = "2025-08-06T14:58:05.491Z" }, - { url = "https://files.pythonhosted.org/packages/ac/da/a06c60fc84fc849377cf035d3b3e9a1c896d52dbad493b963c0f1cdd74d0/torchvision-0.23.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2f7fd6c15f3697e80627b77934f77705f3bc0e98278b989b2655de01f6903e1d", size = 2353112, upload-time = "2025-08-06T14:58:26.265Z" }, - { url = "https://files.pythonhosted.org/packages/a0/27/5ce65ba5c9d3b7d2ccdd79892ab86a2f87ac2ca6638f04bb0280321f1a9c/torchvision-0.23.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a76fafe113b2977be3a21bf78f115438c1f88631d7a87203acb3dd6ae55889e6", size = 8627658, upload-time = "2025-08-06T14:58:15.999Z" }, - { url = "https://files.pythonhosted.org/packages/1f/e4/028a27b60aa578a2fa99d9d7334ff1871bb17008693ea055a2fdee96da0d/torchvision-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:07d069cb29691ff566e3b7f11f20d91044f079e1dbdc9d72e0655899a9b06938", size = 1600749, upload-time = "2025-08-06T14:58:10.719Z" }, - { url = "https://files.pythonhosted.org/packages/05/35/72f91ad9ac7c19a849dedf083d347dc1123f0adeb401f53974f84f1d04c8/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9", size = 2047192, upload-time = "2025-08-06T14:58:11.813Z" }, - { url = "https://files.pythonhosted.org/packages/1d/9d/406cea60a9eb9882145bcd62a184ee61e823e8e1d550cdc3c3ea866a9445/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2a3299d2b1d5a7aed2d3b6ffb69c672ca8830671967eb1cee1497bacd82fe47b", size = 2359295, upload-time = "2025-08-06T14:58:17.469Z" }, - { url = "https://files.pythonhosted.org/packages/2b/f4/34662f71a70fa1e59de99772142f22257ca750de05ccb400b8d2e3809c1d/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:76bc4c0b63d5114aa81281390f8472a12a6a35ce9906e67ea6044e5af4cab60c", size = 8800474, upload-time = "2025-08-06T14:58:22.53Z" }, - { url = "https://files.pythonhosted.org/packages/6e/f5/b5a2d841a8d228b5dbda6d524704408e19e7ca6b7bb0f24490e081da1fa1/torchvision-0.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b9e2dabf0da9c8aa9ea241afb63a8f3e98489e706b22ac3f30416a1be377153b", size = 1527667, upload-time = "2025-08-06T14:58:14.446Z" }, + { url = "https://files.pythonhosted.org/packages/63/5b/1404eeab00819df71a30e916c2081654366741f7838fcc4fff86b7bd9e7e/torchvision-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e8d5e667deff87bd66d26df6d225f46224bb0782d4f3f8f5d2f3068b5fd4492", size = 1891723, upload-time = "2025-10-15T15:51:08.5Z" }, + { url = "https://files.pythonhosted.org/packages/88/e3/1b003ecd52bd721f8304aeb66691edfbc2002747ec83d36188ad6abab506/torchvision-0.24.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a110a51c75e89807a8382b0d8034f5e180fb9319570be3389ffd3d4ac4fd57a9", size = 2418988, upload-time = "2025-10-15T15:51:25.195Z" }, + { url = "https://files.pythonhosted.org/packages/56/2e/3c19a35e62da0f606baf8f6e2ceeab1eb66aaa2f84c6528538b06b416d54/torchvision-0.24.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:81d5b12a6df1bb2cc8bdbad837b637d6ea446f2866e6d94f1b5d478856331be3", size = 8046769, upload-time = "2025-10-15T15:51:15.221Z" }, + { url = "https://files.pythonhosted.org/packages/e0/1d/e7ab614a1ace820a2366eab1532679fbe81bd9501ffd6a1b7be14936366d/torchvision-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:0839dbb305d34671f5a64f558782095134b04bbeff8b90f11eb80515d7d50092", size = 3686529, upload-time = "2025-10-15T15:51:20.982Z" }, + { url = "https://files.pythonhosted.org/packages/a3/17/54ed2ec6944ea972b461a86424c8c7f98835982c90cbc45bf59bd962863a/torchvision-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f771cf918351ad509a28488be475f3e9cc71a750d6b1467842bfb64863a5e986", size = 1891719, upload-time = "2025-10-15T15:51:10.384Z" }, + { url = "https://files.pythonhosted.org/packages/f8/07/0cd6776eee784742ad3cb2bfd3295383d84cb2f9e87386119333d1587f0f/torchvision-0.24.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbd63bf4ebff84c48c50123eba90526cc9f794fe45bc9f5dd07cec19e8c62bce", size = 2420513, upload-time = "2025-10-15T15:51:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/1a/f4/6026c08011ddcefcbc14161c5aa9dce55c35c6b045e04ef0952e88bf4594/torchvision-0.24.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:78fe414b3bb6dbf7e6f6da6f733ba96881f6b29a9b997228de7c5f603e5ed940", size = 8048018, upload-time = "2025-10-15T15:51:13.579Z" }, + { url = "https://files.pythonhosted.org/packages/2f/b4/362b4e67ed87cee0fb4f8f0363a852eaeef527968bf62c07ed56f764d729/torchvision-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:629584b94e52f32a6278f2a35d85eeaae95fcc38730fcb765064f26c3c96df5d", size = 4027686, upload-time = "2025-10-15T15:51:19.189Z" }, + { url = "https://files.pythonhosted.org/packages/47/ef/81e4e69e02e2c4650b30e8c11c8974f946682a30e0ab7e9803a831beff76/torchvision-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c61d40bcd2e2451e932902a702ad495ba1ec6f279e90b1e15cef2bb55dc911e2", size = 1891726, upload-time = "2025-10-15T15:51:16.977Z" }, + { url = "https://files.pythonhosted.org/packages/00/7b/e3809b3302caea9a12c13f3adebe4fef127188438e719fd6c8dc93db1da6/torchvision-0.24.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b0531d1483fc322d7da0d83be52f0df860a75114ab87dbeeb9de765feaeda843", size = 2419495, upload-time = "2025-10-15T15:51:11.885Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e6/7324ead6793075a8c75c56abeed1236d1750de16a5613cfe2ddad164a92a/torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:26b9dd9c083f8e5f7ac827de6d5b88c615d9c582dc87666770fbdf16887e4c25", size = 8050480, upload-time = "2025-10-15T15:51:24.012Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ad/3c56fcd2a0d6e8afa80e115b5ade4302232ec99655220a51d05709819523/torchvision-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:060b7c50ed4b3fb0316b08e2e31bfd874ec2f63ef5ae02f81e54341ca4e88703", size = 4292225, upload-time = "2025-10-15T15:51:27.699Z" }, + { url = "https://files.pythonhosted.org/packages/4f/b5/b2008e4b77a8d6aada828dd0f6a438d8f94befa23fdd2d62fa0ac6e60113/torchvision-0.24.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84d79cfc6457310107ce4d712de7a3d388b24484bc9aeded4a76d8f8e3a2813d", size = 1891722, upload-time = "2025-10-15T15:51:28.854Z" }, + { url = "https://files.pythonhosted.org/packages/8f/02/e2f6b0ff93ca4db5751ac9c5be43f13d5e53d9e9412324f464dca1775027/torchvision-0.24.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fec12a269cf80f6b0b71471c8d498cd3bdd9d8e892c425bf39fecb604852c3b0", size = 2371478, upload-time = "2025-10-15T15:51:37.842Z" }, + { url = "https://files.pythonhosted.org/packages/77/85/42e5fc4f716ec7b73cf1f32eeb5c77961be4d4054b26cd6a5ff97f20c966/torchvision-0.24.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7323a9be5e3da695605753f501cdc87824888c5655d27735cdeaa9986b45884c", size = 8050200, upload-time = "2025-10-15T15:51:46.276Z" }, + { url = "https://files.pythonhosted.org/packages/93/c2/48cb0b6b26276d2120b1e0dbc877579a748eae02b4091a7522ce54f6d5e1/torchvision-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:08cad8b204196e945f0b2d73adee952d433db1c03645851d52b22a45f1015b13", size = 4309939, upload-time = "2025-10-15T15:51:39.002Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d7/3dd10830b047eeb46ae6b465474258d7b4fbb7d8872dca69bd42449f5c82/torchvision-0.24.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ab956a6e588623353e0f20d4b03eb1656cb4a3c75ca4dd8b4e32e01bc43271a", size = 2028355, upload-time = "2025-10-15T15:51:22.384Z" }, + { url = "https://files.pythonhosted.org/packages/f7/cf/2d7e43409089ce7070f5336161f9216d58653ee1cb26bcb5d6c84cc2de36/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:b1b3db80609c32a088554e8e94b4fc31f1033fe5bb4ac0673ec49c3eb03fb4da", size = 2374466, upload-time = "2025-10-15T15:51:35.382Z" }, + { url = "https://files.pythonhosted.org/packages/e9/30/8f7c328fd7e0a9665da4b6b56b1c627665c18470bfe62f3729ad3eda9aec/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:e6635f100d455c80b43f297df4b8585a76c6a2e114802f6567ddd28d7b5479b0", size = 8217068, upload-time = "2025-10-15T15:51:36.623Z" }, + { url = "https://files.pythonhosted.org/packages/55/a2/b6f9e40e2904574c80b3bb872c66af20bbd642053e7c8e1b9e99ab396535/torchvision-0.24.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4ce158bbdc3a9086034bced0b5212888bd5b251fee6d08a9eff151d30b4b228a", size = 4273912, upload-time = "2025-10-15T15:51:33.866Z" }, + { url = "https://files.pythonhosted.org/packages/1b/24/790a39645cc8c71bf442d54a76da9bda5caeb2a44c5f7e02498649cd99d4/torchvision-0.24.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4bdfc85a5ed706421555f32cdc5e3ddb6d40bf65ef03a274ce3c176393e2904b", size = 2028335, upload-time = "2025-10-15T15:51:26.252Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d7/69479a066ea773653e88eda99031e38681e9094046f87cb957af5036db0e/torchvision-0.24.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:73576a9c4a593223fbae85a64e8bbd77049abd1101893ecf3c5e981284fd58b4", size = 2371609, upload-time = "2025-10-15T15:51:29.859Z" }, + { url = "https://files.pythonhosted.org/packages/46/64/3c7fdb3771ec992b9445a1f7a969466b23ce2cdb14e09303b3db351a0655/torchvision-0.24.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:dd565b1b06666ff399d0801d4d1824fa570c0167a179ca700a5be232527b3c62", size = 8214918, upload-time = "2025-10-15T15:51:41.465Z" }, + { url = "https://files.pythonhosted.org/packages/58/51/abc416bc34d574ad479af738e413d9ebf93027ee92d0f4ae38f966b818f7/torchvision-0.24.0-cp314-cp314-win_amd64.whl", hash = "sha256:eb45d12ac48d757738788fd3fb8e88e647d6b2ab2424134ca87556efc72d81b5", size = 4257776, upload-time = "2025-10-15T15:51:42.642Z" }, + { url = "https://files.pythonhosted.org/packages/08/f7/261d1353c611820541ecd43046b89da3f1ae998dc786e4288b890a009883/torchvision-0.24.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:68120e7e03c31900e499a10bb7fdd63cfd67f0054c9fa108e7e27f9cd372f315", size = 2028359, upload-time = "2025-10-15T15:51:32.119Z" }, + { url = "https://files.pythonhosted.org/packages/a2/fd/615d8a86db1578345de7fa1edaf476fbcf4f057bf7e4fd898306b620c487/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:64e54494043eecf9f57a9881c6fdea49c62282782e737c002ae8b1639e6ea80e", size = 2374469, upload-time = "2025-10-15T15:51:40.19Z" }, + { url = "https://files.pythonhosted.org/packages/04/98/bac11e8fdbf00d6c398246ff2781370aa72c99f2ac685c01ce79354c9a32/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:75ef9546323b321a451239d886f0cb528f7e98bb294da47a3200effd4e572064", size = 8217060, upload-time = "2025-10-15T15:51:45.033Z" }, + { url = "https://files.pythonhosted.org/packages/47/6f/9fba8abc468c904570699eceeb51588f9622172b8fffa4ab11bcf15598c2/torchvision-0.24.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2efb617667950814fc8bb9437e5893861b3616e214285be33cbc364a3f42c599", size = 4358490, upload-time = "2025-10-15T15:51:43.884Z" }, ] [[package]] @@ -6023,12 +5865,19 @@ name = "triton" version = "3.5.0" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/22/507b6f58a35e05e84381630b2dc2a3cee1a7a2a7eaf4cba857c638a18a24/triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3", size = 159827599, upload-time = "2025-10-15T19:15:43.87Z" }, { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" }, + { url = "https://files.pythonhosted.org/packages/79/f9/b6f60f978397c616fd8dacca2305759fe4f80d397b20ef72534803244bd5/triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c", size = 159926731, upload-time = "2025-10-15T19:15:49.682Z" }, { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" }, + { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" }, { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" }, + { url = "https://files.pythonhosted.org/packages/cd/85/e37f1197acb04c8f3d83851d23d5d6ed5060ef74580668b112e23fdfa203/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3", size = 159958970, upload-time = "2025-10-15T19:16:01.717Z" }, { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" }, + { url = "https://files.pythonhosted.org/packages/b8/1d/38258f05010ac17a7b058c022911c9cae6526e149b7397134a048cf5a6c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae", size = 160073012, upload-time = "2025-10-15T19:16:07.477Z" }, { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" }, + { url = "https://files.pythonhosted.org/packages/91/fe/8f5771d00227f4eb1ee034f218ed427102b989366d2275fe3b3c105a3921/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac", size = 159957460, upload-time = "2025-10-15T19:16:12.626Z" }, { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/99edd103958fe6e42b50b9ad8ce4f223ddf4ccf475259cf7d2b53381dc6c/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686", size = 160075629, upload-time = "2025-10-15T19:16:18.746Z" }, { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" }, ]