[Model] Adding Support for Qwen2VL as an Embedding Model. Using MrLight/dse-qwen2-2b-mrl-v1 (#9944)
Signed-off-by: FurtherAI <austin.veselka@lighton.ai> Co-authored-by: FurtherAI <austin.veselka@lighton.ai>
This commit is contained in:
@ -1,33 +1,120 @@
|
||||
import argparse
|
||||
import base64
|
||||
import io
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||
|
||||
response = requests.post(
|
||||
"http://localhost:8000/v1/embeddings",
|
||||
json={
|
||||
"model":
|
||||
"TIGER-Lab/VLM2Vec-Full",
|
||||
"messages": [{
|
||||
|
||||
def vlm2vec():
|
||||
response = requests.post(
|
||||
"http://localhost:8000/v1/embeddings",
|
||||
json={
|
||||
"model":
|
||||
"TIGER-Lab/VLM2Vec-Full",
|
||||
"messages": [{
|
||||
"role":
|
||||
"user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": image_url
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Represent the given image."
|
||||
},
|
||||
],
|
||||
}],
|
||||
"encoding_format":
|
||||
"float",
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
response_json = response.json()
|
||||
|
||||
print("Embedding output:", response_json["data"][0]["embedding"])
|
||||
|
||||
|
||||
def dse_qwen2_vl(inp: dict):
|
||||
# Embedding an Image
|
||||
if inp["dtype"] == "image":
|
||||
messages = [{
|
||||
"role":
|
||||
"user",
|
||||
"content": [{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": inp["image_url"],
|
||||
}
|
||||
}, {
|
||||
"type": "text",
|
||||
"text": "What is shown in this image?"
|
||||
}]
|
||||
}]
|
||||
# Embedding a Text Query
|
||||
else:
|
||||
# MrLight/dse-qwen2-2b-mrl-v1 requires a placeholder image
|
||||
# of the minimum input size
|
||||
buffer = io.BytesIO()
|
||||
image_placeholder = Image.new("RGB", (56, 56))
|
||||
image_placeholder.save(buffer, "png")
|
||||
buffer.seek(0)
|
||||
image_placeholder = base64.b64encode(buffer.read()).decode('utf-8')
|
||||
messages = [{
|
||||
"role":
|
||||
"user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": image_url
|
||||
"url": f"data:image/jpeg;base64,{image_placeholder}",
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Represent the given image."
|
||||
"text": f"Query: {inp['content']}"
|
||||
},
|
||||
],
|
||||
}],
|
||||
"encoding_format":
|
||||
"float",
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
response_json = response.json()
|
||||
]
|
||||
}]
|
||||
|
||||
print("Embedding output:", response_json["data"][0]["embedding"])
|
||||
response = requests.post(
|
||||
"http://localhost:8000/v1/embeddings",
|
||||
json={
|
||||
"model": "MrLight/dse-qwen2-2b-mrl-v1",
|
||||
"messages": messages,
|
||||
"encoding_format": "float",
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
response_json = response.json()
|
||||
|
||||
print("Embedding output:", response_json["data"][0]["embedding"])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
"Script to call a specified VLM through the API. Make sure to serve "
|
||||
"the model with --task embedding before running this.")
|
||||
parser.add_argument("model",
|
||||
type=str,
|
||||
choices=["vlm2vec", "dse_qwen2_vl"],
|
||||
required=True,
|
||||
help="Which model to call.")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model == "vlm2vec":
|
||||
vlm2vec()
|
||||
elif args.model == "dse_qwen2_vl":
|
||||
dse_qwen2_vl({
|
||||
"dtye": "image",
|
||||
"image_url": image_url,
|
||||
})
|
||||
dse_qwen2_vl({
|
||||
"dtype": "text",
|
||||
"content": "What is the weather like today?",
|
||||
})
|
||||
|
||||
7
examples/template_dse_qwen2_vl.jinja
Normal file
7
examples/template_dse_qwen2_vl.jinja
Normal file
@ -0,0 +1,7 @@
|
||||
{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}{% raw %}<|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
{% endraw %}{% endif %}<|im_start|>{{ message['role'] }}{% raw %}
|
||||
{% endraw %}{% if message['content'] is string %}{{ message['content'] }}<|im_end|>{% raw %}
|
||||
{% endraw %}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>{% raw %}
|
||||
{% endraw %}{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant{% raw %}
|
||||
{% endraw %}{% endif %}<|endoftext|>
|
||||
Reference in New Issue
Block a user