|
38 | 38 | CompletionResponse, CompletionResponseChoice,
|
39 | 39 | CompletionResponseStreamChoice, CompletionStreamResponse, DeltaMessage,
|
40 | 40 | EmbeddingsRequest, EncodeRequest, EncodeResponse, ErrorResponse,
|
41 |
| - GenerateReqInput, GenerateReqOutput, GenerateRequest, LogProbs, ModelCard, |
42 |
| - ModelList, ModelPermission, PoolingRequest, PoolingResponse, TopLogprob, |
43 |
| - UpdateParamsRequest, UsageInfo) |
| 41 | + GenerateReqInput, GenerateReqMetaOutput, GenerateReqOutput, GenerateRequest, |
| 42 | + LogProbs, ModelCard, ModelList, ModelPermission, PoolingRequest, |
| 43 | + PoolingResponse, TopLogprob, UpdateParamsRequest, UsageInfo) |
44 | 44 | from lmdeploy.serve.openai.reasoning_parser.reasoning_parser import ReasoningParser, ReasoningParserManager
|
45 | 45 | from lmdeploy.serve.openai.tool_parser.tool_parser import ToolParser, ToolParserManager
|
46 | 46 | from lmdeploy.tokenizer import DetokenizeState, Tokenizer
|
@@ -937,47 +937,51 @@ async def generate(request: GenerateReqInput, raw_request: Request = None):
|
937 | 937 | do_preprocess=False,
|
938 | 938 | )
|
939 | 939 |
|
940 |
| - def create_generate_response_json(text, gen_tokens, logprobs, finish_reason): |
941 |
| - response = GenerateReqOutput( |
942 |
| - text=text, |
943 |
| - gen_tokens=gen_tokens, |
944 |
| - logprobs=logprobs or None, |
945 |
| - finish_reason=finish_reason, |
946 |
| - ) |
| 940 | + def create_finish_reason(finish_reason): |
| 941 | + # TODO: add detail info |
| 942 | + if not finish_reason: |
| 943 | + return None |
| 944 | + if finish_reason == 'length': |
| 945 | + return dict(type='length') |
| 946 | + if finish_reason == 'stop': |
| 947 | + return dict(type='stop') |
| 948 | + return dict(type='abort') |
| 949 | + |
| 950 | + def create_generate_response_json(text, output_ids, logprobs, finish_reason): |
| 951 | + meta = GenerateReqMetaOutput(finish_reason=create_finish_reason(finish_reason), |
| 952 | + output_token_logprobs=logprobs or None) |
| 953 | + response = GenerateReqOutput(text=text, output_ids=output_ids, meta_info=meta) |
947 | 954 | return response.model_dump_json()
|
948 | 955 |
|
949 | 956 | async def generate_stream_generator():
|
950 |
| - text = '' # full response |
951 | 957 | async for res in result_generator:
|
952 |
| - text += res.response or '' |
953 |
| - gen_tokens = res.token_ids |
| 958 | + text = res.response or '' |
| 959 | + output_ids = res.token_ids |
954 | 960 | logprobs = []
|
955 | 961 | if res.logprobs:
|
956 | 962 | for tok, tok_logprobs in zip(res.token_ids, res.logprobs):
|
957 |
| - logprobs.append((tok, tok_logprobs[tok])) |
958 |
| - response_json = create_generate_response_json(text, gen_tokens, logprobs, res.finish_reason) |
| 963 | + logprobs.append((tok_logprobs[tok], tok)) |
| 964 | + response_json = create_generate_response_json(text, output_ids, logprobs, res.finish_reason) |
959 | 965 | yield f'data: {response_json}\n\n'
|
960 | 966 | yield 'data: [DONE]\n\n'
|
961 | 967 |
|
962 | 968 | if request.stream:
|
963 | 969 | return StreamingResponse(generate_stream_generator(), media_type='text/event-stream')
|
964 | 970 |
|
965 | 971 | text = ''
|
966 |
| - gen_tokens = [] |
| 972 | + output_ids = [] |
967 | 973 | logprobs = []
|
968 | 974 | async for res in result_generator:
|
969 | 975 | text += res.response or ''
|
970 |
| - gen_tokens.extend(res.token_ids or []) |
| 976 | + output_ids.extend(res.token_ids or []) |
971 | 977 | if res.logprobs:
|
972 | 978 | for tok, tok_logprobs in zip(res.token_ids, res.logprobs):
|
973 |
| - logprobs.append((tok, tok_logprobs[tok])) |
| 979 | + logprobs.append((tok_logprobs[tok], tok)) |
974 | 980 |
|
975 |
| - response = GenerateReqOutput( |
976 |
| - text=text, |
977 |
| - gen_tokens=gen_tokens, |
978 |
| - logprobs=logprobs or None, |
979 |
| - finish_reason=res.finish_reason, |
980 |
| - ) |
| 981 | + response = GenerateReqOutput(text=text, |
| 982 | + output_ids=output_ids, |
| 983 | + meta_info=GenerateReqMetaOutput(finish_reason=create_finish_reason(res.finish_reason), |
| 984 | + output_token_logprobs=logprobs or None)) |
981 | 985 | return response
|
982 | 986 |
|
983 | 987 |
|
|
0 commit comments