diff --git a/ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_single_card.yaml b/ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_single_card.yaml new file mode 100644 index 000000000..aa9c80c5e --- /dev/null +++ b/ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_single_card.yaml @@ -0,0 +1,53 @@ +_base_: ./pretrain_gpt_base.yaml + + +Engine: + mix_precision: + level: "o2" + scale_loss: 32768.0 + custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div", "where"] + custom_white_list: ["lookup_table", "lookup_table_v2"] + use_fp16_guard: False + + +Generation: + top_k: 50 + top_p: 0.75 + temperature: 1.0 + min_dec_len: 1 + max_dec_len: 200 + num_return_sequences: 1 + decode_strategy: "sampling" + + +Model: + module: GPTGenerationModuleAuto + vocab_size: 50304 + hidden_size: 1024 + num_layers: 24 + num_attention_heads: 16 + ffn_hidden_size: 4096 + hidden_dropout_prob: 0.1 + attention_probs_dropout_prob: 0.1 + max_position_embeddings: 1024 + type_vocab_size: 16 + initializer_range: 0.02 + use_recompute: False + fuse_attn_qkv: True + + +Distributed: + dp_degree: 1 + mp_degree: 1 + pp_degree: 1 + sharding: + sharding_degree: 1 + sharding_stage: 1 + + +Quantization: + enable: True + channel_wise_abs_max: False + weight_bits: 8 + activation_bits: 8 + onnx_format: True diff --git a/ppfleetx/core/engine/inference_engine.py b/ppfleetx/core/engine/inference_engine.py index 937c506f3..897138adc 100644 --- a/ppfleetx/core/engine/inference_engine.py +++ b/ppfleetx/core/engine/inference_engine.py @@ -190,6 +190,7 @@ def _init_predictor(self): config.enable_memory_optim() config.switch_ir_optim(True) + # config.switch_ir_debug(True) config.enable_use_gpu(100, device_id) # distributed config diff --git a/projects/gpt/auto_export_gpt_345M_single_card.sh b/projects/gpt/auto_export_gpt_345M_single_card.sh index ed2113948..dd0ed025a 100644 --- a/projects/gpt/auto_export_gpt_345M_single_card.sh +++ b/projects/gpt/auto_export_gpt_345M_single_card.sh @@ -20,4 +20,4 @@ rm -rf $log_dir python -m paddle.distributed.launch --log_dir $log_dir --devices "1" \ ./tools/auto_export.py \ -c ./ppfleetx/configs/nlp/gpt/auto/generation_gpt_345M_single_card.yaml \ - -o Engine.save_load.ckpt_dir=./ckpt/PaddleFleetX_GPT_345M_220826/ + -o Engine.save_load.ckpt_dir=./ckpt/PaddleFleetX_GPT_345M_220826/auto diff --git a/projects/gpt/auto_export_qat_gpt_345M_single_card.sh b/projects/gpt/auto_export_qat_gpt_345M_single_card.sh new file mode 100644 index 000000000..6a1649dad --- /dev/null +++ b/projects/gpt/auto_export_qat_gpt_345M_single_card.sh @@ -0,0 +1,23 @@ +#! /bin/bash + +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log_dir=log_345m_mp1 +rm -rf $log_dir + +python -m paddle.distributed.launch --log_dir $log_dir --devices "1" \ + ./tools/auto_export.py \ + -c ./ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_single_card.yaml \ + -o Engine.save_load.ckpt_dir=./GPT_345M_QAT_wo_analysis/auto diff --git a/projects/gpt/docs/inference.md b/projects/gpt/docs/inference.md index b4c99e5dd..7d9d18c24 100644 --- a/projects/gpt/docs/inference.md +++ b/projects/gpt/docs/inference.md @@ -33,22 +33,23 @@ sh projects/gpt/auto_export_gpt_175B_mp8.sh ### 1.2 量化模型导出 -导出单卡`GPT-3(345M)`量化模型: +导出单卡`GPT-3(345M)`量化模型(FP32): ```shell # 为了方便快速体验,这里给出345M量化训练的模型,若已有量化模型,则无需下载 wget https://paddlefleetx.bj.bcebos.com/model/nlp/gpt/GPT_345M_QAT_wo_analysis.tar tar xf GPT_345M_QAT_wo_analysis.tar -export CUDA_VISIBLE_DEVICES=0 -python ./tools/export.py \ - -c ./ppfleetx/configs/nlp/gpt/generation_qat_gpt_345M_single_card.yaml \ - -o Model.hidden_dropout_prob=0.0 \ - -o Model.attention_probs_dropout_prob=0.0 \ - -o Engine.save_load.ckpt_dir='./GPT_345M_QAT_wo_analysis/' +bash projects/gpt/export_qat_gpt_345M_single_card.sh +``` + +导出单卡`GPT-3(345M)`量化模型(FP16): + +```shell +bash projects/gpt/auto_export_qat_gpt_345M_single_card.sh ``` -导出单卡`GPT-3(6.7B)`量化模型: +导出单卡`GPT-3(6.7B)`量化模型(FP32): ```shell export CUDA_VISIBLE_DEVICES=0 diff --git a/projects/gpt/export_gpt_345M_single_card.sh b/projects/gpt/export_gpt_345M_single_card.sh index 8c5ff05e4..4bc5aed5f 100644 --- a/projects/gpt/export_gpt_345M_single_card.sh +++ b/projects/gpt/export_gpt_345M_single_card.sh @@ -16,4 +16,5 @@ export CUDA_VISIBLE_DEVICES=0 -python ./tools/export.py -c ./ppfleetx/configs/nlp/gpt/generation_gpt_345M_single_card.yaml +python ./tools/export.py -c ./ppfleetx/configs/nlp/gpt/inference_gpt_345M_single_card.yaml \ +-o Engine.save_load.ckpt_dir=./ckpt/PaddleFleetX_GPT_345M_220826/