File tree Expand file tree Collapse file tree 1 file changed +21
-10
lines changed
examples/offline_inference Expand file tree Collapse file tree 1 file changed +21
-10
lines changed Original file line number Diff line number Diff line change 22
22
# If you want to load the official original version, the init parameters are
23
23
# as follows.
24
24
25
- model = LLM (
26
- model = model_name ,
27
- task = "score" ,
28
- hf_overrides = {
29
- "architectures" : ["Qwen3ForSequenceClassification" ],
30
- "classifier_from_token" : ["no" , "yes" ],
31
- "is_original_qwen3_reranker" : True ,
32
- },
33
- )
25
+
26
+ def get_model ():
27
+ return LLM (
28
+ model = model_name ,
29
+ task = "score" ,
30
+ hf_overrides = {
31
+ "architectures" : ["Qwen3ForSequenceClassification" ],
32
+ "classifier_from_token" : ["no" , "yes" ],
33
+ "is_original_qwen3_reranker" : True ,
34
+ },
35
+ )
36
+
34
37
35
38
# Why do we need hf_overrides for the official original version:
36
39
# vllm converts it to Qwen3ForSequenceClassification when loaded for
51
54
query_template = "{prefix}<Instruct>: {instruction}\n <Query>: {query}\n "
52
55
document_template = "<Document>: {doc}{suffix}"
53
56
54
- if __name__ == "__main__" :
57
+
58
+ def main ():
55
59
instruction = (
56
60
"Given a web search query, retrieve relevant passages that answer the query"
57
61
)
72
76
]
73
77
documents = [document_template .format (doc = doc , suffix = suffix ) for doc in documents ]
74
78
79
+ model = get_model ()
75
80
outputs = model .score (queries , documents )
76
81
82
+ print ("-" * 30 )
77
83
print ([output .outputs .score for output in outputs ])
84
+ print ("-" * 30 )
85
+
86
+
87
+ if __name__ == "__main__" :
88
+ main ()
You can’t perform that action at this time.
0 commit comments