Skip to content

Commit 9f797b9

Browse files
committed
feat: support GLM 4.5 family of models
1 parent e4c20b6 commit 9f797b9

File tree

4 files changed

+5
-5
lines changed

4 files changed

+5
-5
lines changed

convert_hf_to_gguf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6599,6 +6599,8 @@ def set_vocab(self):
65996599
"eos", tokenizer.get_added_vocab()["<|endoftext|>"]
66006600
)
66016601
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"])
6602+
special_vocab._set_special_token("eog", tokenizer.get_added_vocab()["<|user|>"])
6603+
special_vocab._set_special_token("eog", tokenizer.get_added_vocab()["<|observation|>"])
66026604
special_vocab._set_special_token(
66036605
"unk", tokenizer.get_added_vocab()["<|endoftext|>"]
66046606
)

gguf-py/gguf/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2132,7 +2132,6 @@ class MODEL_TENSOR(IntEnum):
21322132
MODEL_TENSOR.FFN_GATE_SHEXP,
21332133
MODEL_TENSOR.FFN_DOWN_SHEXP,
21342134
MODEL_TENSOR.FFN_UP_SHEXP,
2135-
MODEL_TENSOR.ATTN_POST_NORM,
21362135
],
21372136
MODEL_ARCH.BITNET: [
21382137
MODEL_TENSOR.ATTN_Q,

src/llama-arch.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1414,7 +1414,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14141414
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
14151415
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
14161416
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1417-
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
14181417
},
14191418
},
14201419
{

src/llama-model.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4400,7 +4400,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
44004400
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED);
44014401

44024402
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, 0);
4403-
layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), { n_embd }, 0);
44044403

44054404
// K/Q norm tensors (optional for GLM-4.5 355B variant)
44064405
layer.attn_q_norm = create_tensor(
@@ -4448,9 +4447,10 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
44484447
create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), { n_embd, n_ff_shexp }, 0);
44494448
}
44504449
} else {
4451-
// Dense layers (first k layers)
4450+
// Dense layers (first k layers) - GLM uses separate gate/up projections
4451+
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), { n_embd, n_ff }, 0);
44524452
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd }, 0);
4453-
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), { n_embd, n_ff * 2 }, 0);
4453+
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), { n_embd, n_ff }, 0);
44544454
}
44554455
}
44564456
}

0 commit comments

Comments
 (0)