@@ -129,17 +129,17 @@ def create_error_response(status: HTTPStatus, message: str, error_type='invalid_
129
129
async def check_request (request ) -> Optional [JSONResponse ]:
130
130
"""Check if a request is valid."""
131
131
if hasattr (request , 'model' ) and request .model not in get_model_list ():
132
- return create_error_response (HTTPStatus .NOT_FOUND , f'The model ` { request .model } ` does not exist.' )
132
+ return create_error_response (HTTPStatus .NOT_FOUND , f'The model { request .model !r } does not exist.' )
133
133
if hasattr (request , 'n' ) and request .n <= 0 :
134
- return create_error_response (HTTPStatus .BAD_REQUEST , f'The n ` { request .n } ` must be a positive int.' )
134
+ return create_error_response (HTTPStatus .BAD_REQUEST , f'The n { request .n !r } must be a positive int.' )
135
135
if hasattr (request , 'top_p' ) and not (request .top_p > 0 and request .top_p <= 1 ):
136
- return create_error_response (HTTPStatus .BAD_REQUEST , f'The top_p ` { request .top_p } ` must be in (0, 1].' )
136
+ return create_error_response (HTTPStatus .BAD_REQUEST , f'The top_p { request .top_p !r } must be in (0, 1].' )
137
137
if hasattr (request , 'top_k' ) and request .top_k < 0 :
138
138
return create_error_response (HTTPStatus .BAD_REQUEST ,
139
- f'The top_k ` { request .top_k } ` cannot be a negative integer.' )
139
+ f'The top_k { request .top_k !r } cannot be a negative integer.' )
140
140
if hasattr (request , 'temperature' ) and not (request .temperature <= 2 and request .temperature >= 0 ):
141
141
return create_error_response (HTTPStatus .BAD_REQUEST ,
142
- f'The temperature ` { request .temperature } ` must be in [0, 2]' )
142
+ f'The temperature { request .temperature !r } must be in [0, 2]' )
143
143
return
144
144
145
145
@@ -315,8 +315,8 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
315
315
1.0 means no penalty
316
316
- stop (str | List[str] | None): To stop generating further
317
317
tokens. Only accept stop words that's encoded to one token idex.
318
- - response_format (Dict | None): Only pytorch backend support formatting
319
- response . Examples: `{"type": "json_schema", "json_schema": {"name":
318
+ - response_format (Dict | None): To generate response according to given
319
+ schema . Examples: `{"type": "json_schema", "json_schema": {"name":
320
320
"test","schema": {"properties": {"name": {"type": "string"}},
321
321
"required": ["name"], "type": "object"}}}`
322
322
or `{"type": "regex_schema", "regex_schema": "call me [A-Za-z]{1,10}"}`
@@ -365,7 +365,7 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
365
365
if error_check_ret is not None :
366
366
return error_check_ret
367
367
if VariableInterface .async_engine .id2step .get (request .session_id , 0 ) != 0 :
368
- return create_error_response (HTTPStatus .BAD_REQUEST , f'The session_id ` { request .session_id } ` is occupied.' )
368
+ return create_error_response (HTTPStatus .BAD_REQUEST , f'The session_id { request .session_id !r } is occupied.' )
369
369
370
370
model_name = request .model
371
371
adapter_name = None
@@ -385,8 +385,6 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
385
385
gen_logprobs = request .top_logprobs
386
386
response_format = None
387
387
if request .response_format and request .response_format .type != 'text' :
388
- if VariableInterface .async_engine .backend != 'pytorch' :
389
- return create_error_response (HTTPStatus .BAD_REQUEST , 'only pytorch backend can use response_format now' )
390
388
response_format = request .response_format .model_dump ()
391
389
392
390
if request .logit_bias is not None :
@@ -717,7 +715,7 @@ async def completions_v1(request: CompletionRequest, raw_request: Request = None
717
715
if error_check_ret is not None :
718
716
return error_check_ret
719
717
if VariableInterface .async_engine .id2step .get (request .session_id , 0 ) != 0 :
720
- return create_error_response (HTTPStatus .BAD_REQUEST , f'The session_id ` { request .session_id } ` is occupied.' )
718
+ return create_error_response (HTTPStatus .BAD_REQUEST , f'The session_id { request .session_id !r } is occupied.' )
721
719
722
720
model_name = request .model
723
721
adapter_name = None
@@ -1325,8 +1323,8 @@ def serve(model_path: str,
1325
1323
VariableInterface .proxy_url = proxy_url
1326
1324
VariableInterface .api_server_url = f'{ http_or_https } ://{ server_name } :{ server_port } ' # noqa
1327
1325
for i in range (3 ):
1328
- print (f'HINT: Please open \033 [93m\033 [1m{ http_or_https } ://'
1329
- f'{ server_name } :{ server_port } \033 [0m in a browser for detailed api'
1326
+ print (f'HINT: Please open \033 [93m\033 [1m{ http_or_https } ://' # noqa: E231
1327
+ f'{ server_name } :{ server_port } \033 [0m in a browser for detailed api' # noqa: E231
1330
1328
' usage!!!' )
1331
1329
uvicorn .run (app = app ,
1332
1330
host = server_name ,
0 commit comments