Skip to content

Commit 01a4150

Browse files
author
John
committed
Support for OpenBuddy with finetune
It's a bit of a hack but works for now Batched mode fails, will be fixed later
1 parent 60f82ca commit 01a4150

File tree

4 files changed

+29
-3
lines changed

4 files changed

+29
-3
lines changed

examples/falcon/falcon_main.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,20 @@ int main(int argc, char ** argv) {
340340
inp_system_baseline = ::falcon_tokenize(ctx, ">>INTRODUCTION<<"+params.system_baseline_prompt+"\n", false);
341341
}
342342
break;
343+
case FINETUNE_OPENBUDDY:
344+
inp_pfx = ::falcon_tokenize(ctx, "User: ", false);
345+
inp_sfx = ::falcon_tokenize(ctx, "\nAssistant:", false);
346+
if (!params.system_prompt.size())
347+
{
348+
inp_system = ::falcon_tokenize(ctx, "Consider a conversation between User (a human) and Assistant (named Buddy).\nBuddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team, running on https://github.com/cmp-nct/ggllm.cpp\nBuddy cannot access the Internet.\nBuddy can fluently speak the user's language (e.g. English, Chinese).\nBuddy can generate poems, stories, code, essays, songs, and more.\nBuddy possesses knowledge about the world, history, and culture, but not everything. Knowledge cutoff: 2021-09.\nBuddy's responses are always positive, unharmful, safe, creative, high-quality, human-like, and interesting.\n\nUser: Hi.\nAssistant: Hi, I'm Buddy, your AI assistant. How can I help you today?\n", false);
349+
} else
350+
if (params.system_prompt.size() &&!params.sys_prompt_is_raw)
351+
{
352+
inp_system = ::falcon_tokenize(ctx, ">>INTRODUCTION<<"+params.system_prompt+"\n", false);
353+
if(!params.sys_prompt_simple)
354+
inp_system_baseline = ::falcon_tokenize(ctx, ">>INTRODUCTION<<"+params.system_baseline_prompt+"\n", false);
355+
}
356+
break;
343357
case FINETUNE_FALCONINSTRUCT:
344358
inp_pfx = ::falcon_tokenize(ctx, "User: ", false);
345359
inp_sfx = ::falcon_tokenize(ctx, "\nAssistant:", false); // must not include space

falcon_convert.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,15 @@ def bytes_to_unicode():
108108

109109
for i in range(hparams["vocab_size"]):
110110
if i in reverse_vocab:
111-
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
111+
try:
112+
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
113+
except KeyError:
114+
text = bytearray()
115+
for c in reverse_vocab[i]:
116+
if ord(c) < 256: # single byte character
117+
text.append(byte_decoder[ord(c)])
118+
else: # multibyte special token character
119+
text.extend(c.encode('utf-8'))
112120
else:
113121
print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
114122
padding_token = f"[PAD{i}]".encode("utf8")

libfalcon.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,10 @@ t_finetune_type falcon_detect_finetune(falcon_context * ctx, std::string model_p
14971497
return FINETUNE_OPENASSISTANT;
14981498
}
14991499
}
1500+
if (ctx->vocab.id_to_token.size() == 70144)
1501+
{
1502+
return FINETUNE_OPENBUDDY;
1503+
}
15001504
if (model_lower.find("wizard") != std::string::npos) {
15011505
return FINETUNE_WIZARD;
15021506
}

libfalcon.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ extern "C" {
268268

269269
// Token Id -> String. Uses the vocabulary in the provided context
270270
LLAMA_API const char * falcon_token_to_str(const struct falcon_context * ctx, falcon_token token);
271-
typedef enum { FINETUNE_UNSPECIFIED, FINETUNE_NONE, FINETUNE_ALPACA, FINETUNE_OPENASSISTANT, FINETUNE_OPENASSIST_V1, FINETUNE_WIZARD, FINETUNE_FALCONINSTRUCT } t_finetune_type;
272-
static const char *FINETUNE_NAME[7] = { "UNSPECIFIED", "NONE", "ALPACA", "OPENASSISTANT", "OPENASSIST_V1", "WIZARD", "FALCONINSTRUCT" };
271+
typedef enum { FINETUNE_UNSPECIFIED, FINETUNE_NONE, FINETUNE_ALPACA, FINETUNE_OPENASSISTANT, FINETUNE_OPENASSIST_V1, FINETUNE_WIZARD, FINETUNE_FALCONINSTRUCT, FINETUNE_OPENBUDDY } t_finetune_type;
272+
static const char *FINETUNE_NAME[8] = { "UNSPECIFIED", "NONE", "ALPACA", "OPENASSISTANT", "OPENASSIST_V1", "WIZARD", "FALCONINSTRUCT", "OPENBUDDY"};
273273

274274

275275

0 commit comments

Comments
 (0)