diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh index 63e65c64f46..2fc7f46ca00 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh @@ -111,14 +111,14 @@ function run_benchmark { elif [ "${topology}" = "opt_125m_woq_autotune_int4" ]; then model_name_or_path="facebook/opt-125m" elif [ "${topology}" = "llama3_1_8b_gptq_int4_hf" ]; then - if [[ ${int8} == "true" ]]; then + if [[ ${optimized} == "true" ]]; then model_name_or_path=${tuned_checkpoint} else model_name_or_path="/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B-Instruct/" fi extra_cmd=$extra_cmd" --woq_algo GPTQ --use_hf_format" elif [ "${topology}" = "llama2_70b_gptq_int4_hf" ]; then - if [[ ${int8} == "true" ]]; then + if [[ ${optimized} == "true" ]]; then model_name_or_path=${tuned_checkpoint} else model_name_or_path="/mnt/weka/data/pytorch/llama2/Llama-2-70b-hf"