Skip to content

Conversation

irexyc
Copy link
Collaborator

@irexyc irexyc commented Sep 30, 2025

usage

lmdeploy serve api_server Qwen/Qwen2.5-1.5B-Instruct --backend pytorch --logprobs-mode raw_logprobs

input/output

# /generate input
class GenerateReqInput(BaseModel):
session_id: Optional[int] = -1
prompt: Optional[str] = None
input_ids: Optional[List[int]] = None
return_logprob: Optional[bool] = None
max_tokens: int = 128
stop: Optional[Union[str, List[str]]] = None
stop_token_ids: Optional[List[int]] = None
stream: Optional[bool] = False
temperature: float = 1.0
repetition_penalty: Optional[float] = 1.0
ignore_eos: Optional[bool] = False
top_p: float = 1.0
top_k: int = 0
min_p: float = 0.0
skip_special_tokens: Optional[bool] = True
spaces_between_special_tokens: Optional[bool] = True
include_stop_str_in_output: Optional[bool] = False
class GenerateReqMetaOutput(BaseModel):
finish_reason: Optional[Dict[str, Any]] = None
output_token_logprobs: Optional[List[tuple[float, int]]] = None # (logprob, token_id)
# /generate output
class GenerateReqOutput(BaseModel):
text: str
output_ids: List[int]
meta_info: GenerateReqMetaOutput

curl http://0.0.0.0:23333/generate \
  -H "Content-Type: application/json" \
  -d '{
    "input_ids": [151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 72357, 752, 264, 21646, 151645, 198, 151644, 77091, 198],
    "max_tokens": 128,
    "stream": "true",
    "return_logprob": "true",
    "include_stop_str_in_output": "true"
  }'
data: {"text":"Why","output_ids":[10234],"meta_info":{"finish_reason":null,"output_token_logprobs":[[-1.4453125,10234]]}}

data: {"text":" don't scientists trust atoms? Because they make up everything.","output_ids":[1513,944,13923,6950,32199,30,9211,807,1281,705,4297,13],"meta_info":{"finish_reason":null,"output_token_logprobs":[[-1.1171875,1513],[-0.0125732421875,944],[-0.0164794921875,13923],[-0.000347137451171875,6950],[-0.0012969970703125,32199],[-0.5625,30],[-0.07421875,9211],[-0.00064849853515625,807],[-0.03173828125,1281],[-0.0034942626953125,705],[-0.00092315673828125,4297],[-0.0859375,13]]}}

data: {"text":"<|im_end|>","output_ids":[151645],"meta_info":{"finish_reason":{"type":"stop"},"output_token_logprobs":[[-0.01055908203125,151645]]}}

data: [DONE]


curl http://0.0.0.0:23333/generate   -H "Content-Type: application/json"   -d '{
    "input_ids": [151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 72357, 752, 264, 21646, 151645, 198, 151644, 77091, 198],
    "max_tokens": 128,
    "stream": "true",
    "return_logprob": "true",
    "include_stop_str_in_output": "false"
  }'
data: {"text":"Sure","output_ids":[39814],"meta_info":{"finish_reason":null,"output_token_logprobs":[[-0.44140625,39814]]}}

data: {"text":", here's a joke for you:\n\nWhy don't scientists trust atoms?\n\nBecause","output_ids":[11,1588,594,264,21646,369,498,1447,10234,1513,944,13923,6950,32199,1939,17949],"meta_info":{"finish_reason":null,"output_token_logprobs":[[-0.3515625,11],[-0.005126953125,1588],[-0.03759765625,594],[-0.030029296875,264],[-0.2080078125,21646],[-0.10693359375,369],[-0.00008153915405273438,498],[-0.486328125,1447],[-0.01220703125,10234],[-0.80078125,1513],[-0.01611328125,944],[-0.0179443359375,13923],[-0.0003948211669921875,6950],[-0.0003643035888671875,32199],[-0.049072265625,1939],[-0.00179290771484375,17949]]}}

data: {"text":" they make up everything.","output_ids":[807,1281,705,4297,13],"meta_info":{"finish_reason":null,"output_token_logprobs":[[-0.000507354736328125,807],[-0.005645751953125,1281],[-0.005096435546875,705],[-0.0007171630859375,4297],[-0.65625,13]]}}

data: {"text":"","output_ids":[],"meta_info":{"finish_reason":{"type":"stop"},"output_token_logprobs":null}}

data: [DONE]





curl http://0.0.0.0:23333/generate \
  -H "Content-Type: application/json" \
  -d '{
    "input_ids": [151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 72357, 752, 264, 21646, 151645, 198, 151644, 77091, 198],
    "max_tokens": 128,
    "stream": "false",
    "return_logprob": "true",
    "include_stop_str_in_output": "true"
  }'

{"text":"Sure! Here's a short and not-so-obnoxious joke:\n\nWhy was the math book sad?\nBecause it had too many problems!<|im_end|>","output_ids":[39814,0,5692,594,264,2805,323,537,33019,92449,94732,21646,1447,10234,572,279,6888,2311,12421,5267,17949,432,1030,2238,1657,5322,0,151645],"meta_info":{"finish_reason":{"type":"stop"},"output_token_logprobs":[[-0.44140625,39814],[-1.2265625,0],[-0.01092529296875,5692],[-0.049072265625,594],[-0.033935546875,264],[-3.1875,2805],[-2.625,323],[-4.9375,537],[-1.5234375,33019],[-4.28125,92449],[-2.46875,94732],[-0.02490234375,21646],[-2.46875,1447],[-0.01123046875,10234],[-2.3125,572],[-0.00299072265625,279],[-0.005645751953125,6888],[-0.00023174285888671875,2311],[-0.059814453125,12421],[-0.86328125,5267],[-0.01373291015625,17949],[-0.0002231597900390625,432],[-0.001495361328125,1030],[-0.00439453125,2238],[-0.0002918243408203125,1657],[-0.0003757476806640625,5322],[-0.921875,0],[-0.028564453125,151645]]}}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant