[Doc] ruff format remaining Python examples (#26795)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@ -60,7 +60,7 @@ from vllm import LLM
|
||||
llm = LLM(
|
||||
"s3://my-bucket/vllm/facebook/opt-125m/v1",
|
||||
load_format="tensorizer",
|
||||
enable_lora=True
|
||||
enable_lora=True,
|
||||
)
|
||||
```
|
||||
|
||||
@ -97,6 +97,6 @@ llm = LLM(
|
||||
"s3://my-bucket/vllm/facebook/opt-125m/v1",
|
||||
load_format="tensorizer",
|
||||
enable_lora=True,
|
||||
model_loader_extra_config={"deserialization_kwargs": {"num_readers": 2}}
|
||||
model_loader_extra_config={"deserialization_kwargs": {"num_readers": 2}},
|
||||
)
|
||||
```
|
||||
|
||||
@ -98,15 +98,15 @@ and automatically applies the model's [chat template](https://huggingface.co/doc
|
||||
conversation = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant"
|
||||
"content": "You are a helpful assistant",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello"
|
||||
"content": "Hello",
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Hello! How can I assist you today?"
|
||||
"content": "Hello! How can I assist you today?",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
|
||||
@ -130,8 +130,10 @@ It is designed for embedding models and cross-encoder models. Embedding models u
|
||||
from vllm import LLM
|
||||
|
||||
llm = LLM(model="BAAI/bge-reranker-v2-m3", runner="pooling")
|
||||
(output,) = llm.score("What is the capital of France?",
|
||||
"The capital of Brazil is Brasilia.")
|
||||
(output,) = llm.score(
|
||||
"What is the capital of France?",
|
||||
"The capital of Brazil is Brasilia.",
|
||||
)
|
||||
|
||||
score = output.outputs.score
|
||||
print(f"Score: {score}")
|
||||
@ -209,7 +211,7 @@ For models that support Matryoshka Embeddings but not recognized by vLLM, please
|
||||
|
||||
Here is an example to serve a model with Matryoshka Embeddings enabled.
|
||||
|
||||
```text
|
||||
```bash
|
||||
vllm serve Snowflake/snowflake-arctic-embed-m-v1.5 --hf-overrides '{"matryoshka_dimensions":[256]}'
|
||||
```
|
||||
|
||||
@ -220,11 +222,15 @@ You can change the output dimensions of embedding models that support Matryoshka
|
||||
```python
|
||||
from vllm import LLM, PoolingParams
|
||||
|
||||
llm = LLM(model="jinaai/jina-embeddings-v3",
|
||||
runner="pooling",
|
||||
trust_remote_code=True)
|
||||
outputs = llm.embed(["Follow the white rabbit."],
|
||||
pooling_params=PoolingParams(dimensions=32))
|
||||
llm = LLM(
|
||||
model="jinaai/jina-embeddings-v3",
|
||||
runner="pooling",
|
||||
trust_remote_code=True,
|
||||
)
|
||||
outputs = llm.embed(
|
||||
["Follow the white rabbit."],
|
||||
pooling_params=PoolingParams(dimensions=32),
|
||||
)
|
||||
print(outputs[0].outputs)
|
||||
```
|
||||
|
||||
@ -234,13 +240,13 @@ A code example can be found here: <gh-file:examples/offline_inference/pooling/em
|
||||
|
||||
Use the following command to start vllm server.
|
||||
|
||||
```text
|
||||
```bash
|
||||
vllm serve jinaai/jina-embeddings-v3 --trust-remote-code
|
||||
```
|
||||
|
||||
You can change the output dimensions of embedding models that support Matryoshka Embeddings by using the dimensions parameter.
|
||||
|
||||
```text
|
||||
```bash
|
||||
curl http://127.0.0.1:8000/v1/embeddings \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
|
||||
@ -278,8 +278,8 @@ https_proxy=http://your.proxy.server:port vllm serve <model_name>
|
||||
```python
|
||||
import os
|
||||
|
||||
os.environ['http_proxy'] = 'http://your.proxy.server:port'
|
||||
os.environ['https_proxy'] = 'http://your.proxy.server:port'
|
||||
os.environ["http_proxy"] = "http://your.proxy.server:port"
|
||||
os.environ["https_proxy"] = "http://your.proxy.server:port"
|
||||
```
|
||||
|
||||
### ModelScope
|
||||
|
||||
Reference in New Issue
Block a user