[Misc] IO Processor plugins for pooling models (#22820)
Signed-off-by: Christian Pinto <christian.pinto@ibm.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
@ -6,7 +6,8 @@
|
||||
import json
|
||||
import time
|
||||
from http import HTTPStatus
|
||||
from typing import Annotated, Any, ClassVar, Literal, Optional, Union
|
||||
from typing import (Annotated, Any, ClassVar, Generic, Literal, Optional,
|
||||
TypeVar, Union)
|
||||
|
||||
import regex as re
|
||||
import torch
|
||||
@ -1405,7 +1406,46 @@ EmbeddingRequest = Union[EmbeddingCompletionRequest, EmbeddingChatRequest]
|
||||
|
||||
PoolingCompletionRequest = EmbeddingCompletionRequest
|
||||
PoolingChatRequest = EmbeddingChatRequest
|
||||
PoolingRequest = Union[PoolingCompletionRequest, PoolingChatRequest]
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class IOProcessorRequest(OpenAIBaseModel, Generic[T]):
|
||||
model: Optional[str] = None
|
||||
|
||||
priority: int = Field(default=0)
|
||||
"""
|
||||
The priority of the request (lower means earlier handling;
|
||||
default: 0). Any priority other than 0 will raise an error
|
||||
if the served model does not use priority scheduling.
|
||||
"""
|
||||
data: T
|
||||
"""
|
||||
When using plugins IOProcessor plugins, the actual input is processed
|
||||
by the plugin itself. Hence, we use a generic type for the request data
|
||||
"""
|
||||
|
||||
def to_pooling_params(self):
|
||||
return PoolingParams(task="encode")
|
||||
|
||||
|
||||
class IOProcessorResponse(OpenAIBaseModel, Generic[T]):
|
||||
|
||||
request_id: Optional[str] = None
|
||||
"""
|
||||
The request_id associated with this response
|
||||
"""
|
||||
created_at: int = Field(default_factory=lambda: int(time.time()))
|
||||
|
||||
data: T
|
||||
"""
|
||||
When using plugins IOProcessor plugins, the actual output is generated
|
||||
by the plugin itself. Hence, we use a generic type for the response data
|
||||
"""
|
||||
|
||||
|
||||
PoolingRequest = Union[PoolingCompletionRequest, PoolingChatRequest,
|
||||
IOProcessorRequest]
|
||||
|
||||
|
||||
class ScoreRequest(OpenAIBaseModel):
|
||||
|
||||
Reference in New Issue
Block a user