diff --git a/ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_single_card.yaml b/ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_single_card.yaml
new file mode 100644
index 000000000..aa9c80c5e
--- /dev/null
+++ b/ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_single_card.yaml
@@ -0,0 +1,53 @@
+_base_: ./pretrain_gpt_base.yaml
+
+
+Engine:
+  mix_precision:
+    level: "o2"
+    scale_loss: 32768.0
+    custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div", "where"]
+    custom_white_list: ["lookup_table", "lookup_table_v2"]
+    use_fp16_guard: False
+
+
+Generation:
+  top_k: 50
+  top_p: 0.75
+  temperature: 1.0
+  min_dec_len: 1
+  max_dec_len: 200
+  num_return_sequences: 1
+  decode_strategy: "sampling"
+
+
+Model:
+  module: GPTGenerationModuleAuto
+  vocab_size: 50304
+  hidden_size: 1024
+  num_layers: 24
+  num_attention_heads: 16
+  ffn_hidden_size: 4096
+  hidden_dropout_prob: 0.1
+  attention_probs_dropout_prob: 0.1
+  max_position_embeddings: 1024
+  type_vocab_size: 16
+  initializer_range: 0.02
+  use_recompute: False
+  fuse_attn_qkv: True
+
+
+Distributed:
+  dp_degree: 1
+  mp_degree: 1
+  pp_degree: 1
+  sharding:
+    sharding_degree: 1
+    sharding_stage: 1
+
+
+Quantization:
+  enable: True
+  channel_wise_abs_max: False
+  weight_bits: 8
+  activation_bits: 8
+  onnx_format: True
diff --git a/ppfleetx/core/engine/inference_engine.py b/ppfleetx/core/engine/inference_engine.py
index 937c506f3..897138adc 100644
--- a/ppfleetx/core/engine/inference_engine.py
+++ b/ppfleetx/core/engine/inference_engine.py
@@ -190,6 +190,7 @@ def _init_predictor(self):
 
         config.enable_memory_optim()
         config.switch_ir_optim(True)
+        # config.switch_ir_debug(True)
         config.enable_use_gpu(100, device_id)
 
         # distributed config
diff --git a/projects/gpt/auto_export_gpt_345M_single_card.sh b/projects/gpt/auto_export_gpt_345M_single_card.sh
index ed2113948..dd0ed025a 100644
--- a/projects/gpt/auto_export_gpt_345M_single_card.sh
+++ b/projects/gpt/auto_export_gpt_345M_single_card.sh
@@ -20,4 +20,4 @@ rm -rf $log_dir
 python -m paddle.distributed.launch --log_dir $log_dir --devices "1" \
     ./tools/auto_export.py \
     -c ./ppfleetx/configs/nlp/gpt/auto/generation_gpt_345M_single_card.yaml \
-    -o Engine.save_load.ckpt_dir=./ckpt/PaddleFleetX_GPT_345M_220826/
+    -o Engine.save_load.ckpt_dir=./ckpt/PaddleFleetX_GPT_345M_220826/auto
diff --git a/projects/gpt/auto_export_qat_gpt_345M_single_card.sh b/projects/gpt/auto_export_qat_gpt_345M_single_card.sh
new file mode 100644
index 000000000..6a1649dad
--- /dev/null
+++ b/projects/gpt/auto_export_qat_gpt_345M_single_card.sh
@@ -0,0 +1,23 @@
+#! /bin/bash
+
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log_dir=log_345m_mp1
+rm -rf $log_dir
+
+python -m paddle.distributed.launch --log_dir $log_dir --devices "1" \
+    ./tools/auto_export.py \
+    -c ./ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_single_card.yaml \
+    -o Engine.save_load.ckpt_dir=./GPT_345M_QAT_wo_analysis/auto
diff --git a/projects/gpt/docs/inference.md b/projects/gpt/docs/inference.md
index b4c99e5dd..7d9d18c24 100644
--- a/projects/gpt/docs/inference.md
+++ b/projects/gpt/docs/inference.md
@@ -33,22 +33,23 @@ sh projects/gpt/auto_export_gpt_175B_mp8.sh
 
 ### 1.2 量化模型导出
 
-导出单卡`GPT-3(345M)`量化模型：
+导出单卡`GPT-3(345M)`量化模型(FP32)：
 
 ```shell
 # 为了方便快速体验，这里给出345M量化训练的模型，若已有量化模型，则无需下载
 wget https://paddlefleetx.bj.bcebos.com/model/nlp/gpt/GPT_345M_QAT_wo_analysis.tar
 tar xf GPT_345M_QAT_wo_analysis.tar
 
-export CUDA_VISIBLE_DEVICES=0
-python ./tools/export.py \
-    -c ./ppfleetx/configs/nlp/gpt/generation_qat_gpt_345M_single_card.yaml \
-    -o Model.hidden_dropout_prob=0.0 \
-    -o Model.attention_probs_dropout_prob=0.0 \
-    -o Engine.save_load.ckpt_dir='./GPT_345M_QAT_wo_analysis/'
+bash projects/gpt/export_qat_gpt_345M_single_card.sh
+```
+
+导出单卡`GPT-3(345M)`量化模型(FP16)：
+
+```shell
+bash projects/gpt/auto_export_qat_gpt_345M_single_card.sh
 ```
 
-导出单卡`GPT-3(6.7B)`量化模型：
+导出单卡`GPT-3(6.7B)`量化模型(FP32)：
 
 ```shell
 export CUDA_VISIBLE_DEVICES=0
diff --git a/projects/gpt/export_gpt_345M_single_card.sh b/projects/gpt/export_gpt_345M_single_card.sh
index 8c5ff05e4..4bc5aed5f 100644
--- a/projects/gpt/export_gpt_345M_single_card.sh
+++ b/projects/gpt/export_gpt_345M_single_card.sh
@@ -16,4 +16,5 @@
 
 
 export CUDA_VISIBLE_DEVICES=0
-python ./tools/export.py -c ./ppfleetx/configs/nlp/gpt/generation_gpt_345M_single_card.yaml
+python ./tools/export.py -c ./ppfleetx/configs/nlp/gpt/inference_gpt_345M_single_card.yaml \
+-o Engine.save_load.ckpt_dir=./ckpt/PaddleFleetX_GPT_345M_220826/