[Doc]: fix typos in Python comments (#24026)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
@ -23,7 +23,7 @@ def create_test_prompts(
|
||||
2 requests for base model, 4 requests for the LoRA. We define 2
|
||||
different LoRA adapters (using the same model for demo purposes).
|
||||
Since we also set `max_loras=1`, the expectation is that the requests
|
||||
with the second LoRA adapter will be ran after all requests with the
|
||||
with the second LoRA adapter will be run after all requests with the
|
||||
first adapter have finished.
|
||||
"""
|
||||
return [
|
||||
|
||||
@ -31,7 +31,7 @@ class PyNcclCommunicator:
|
||||
group: the process group to work on. If None, it will use the
|
||||
default process group.
|
||||
device: the device to bind the PyNcclCommunicator to. If None,
|
||||
it will be bind to f"cuda:{local_rank}".
|
||||
it will be bound to f"cuda:{local_rank}".
|
||||
library_path: the path to the NCCL library. If None, it will
|
||||
use the default library path.
|
||||
It is the caller's responsibility to make sure each communicator
|
||||
|
||||
@ -939,8 +939,8 @@ def get_pipeline_model_parallel_group():
|
||||
def graph_capture(device: torch.device):
|
||||
"""
|
||||
`graph_capture` is a context manager which should surround the code that
|
||||
is capturing the CUDA graph. Its main purpose is to ensure that the
|
||||
some operations will be run after the graph is captured, before the graph
|
||||
is capturing the CUDA graph. Its main purpose is to ensure that some
|
||||
operations will be run after the graph is captured, before the graph
|
||||
is replayed. It returns a `GraphCaptureContext` object which contains the
|
||||
necessary data for the graph capture. Currently, it only contains the
|
||||
stream that the graph capture is running on. This stream is set to the
|
||||
|
||||
@ -165,7 +165,7 @@ class PythonicToolParser(ToolParser):
|
||||
index] += delta.function.arguments
|
||||
|
||||
# HACK: serving_chat.py inspects the internal state of tool parsers
|
||||
# when determining it's final streaming delta, automatically
|
||||
# when determining its final streaming delta, automatically
|
||||
# adding autocompleted JSON.
|
||||
# These two lines avoid that nonsense while ensuring finish_reason
|
||||
# is set to tool_calls when at least one tool is called.
|
||||
|
||||
@ -7,7 +7,7 @@ import torch.nn.functional as F
|
||||
|
||||
def _histogram(input: torch.Tensor, min: int, max: int) -> torch.Tensor:
|
||||
"""
|
||||
Compute the histogram of a int32 tensor. The bin edges are defined by the
|
||||
Compute the histogram of an int32 tensor. The bin edges are defined by the
|
||||
min and max values, with step = 1.
|
||||
"""
|
||||
assert input.dtype == torch.int32, "input must be of torch.int32 dtype."
|
||||
|
||||
@ -544,7 +544,7 @@ class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
vision_embeddings)
|
||||
input_ids = None
|
||||
|
||||
# up until here we have a inputs_embeds 100% numerical identity
|
||||
# up until here we have an inputs_embeds 100% numerical identity
|
||||
# between the OG HF Transformers implementation and ours
|
||||
hidden_states = self.llm(
|
||||
input_ids=input_ids,
|
||||
|
||||
@ -43,7 +43,7 @@ class ConformerEncoderLayer(nn.Module):
|
||||
if set different to 0, the number of
|
||||
depthwise_seperable_out_channel will be used as a
|
||||
channel_out of the second conv1d layer.
|
||||
otherwise, it equal to 0, the second conv1d layer is skipped.
|
||||
otherwise, it equals to 0, the second conv1d layer is skipped.
|
||||
depthwise_multiplier: int
|
||||
number of input_dim channels duplication. this value
|
||||
will be used to compute the hidden channels of the Conv1D.
|
||||
@ -115,7 +115,7 @@ class ConformerEncoderLayer(nn.Module):
|
||||
we recalculate activation in backward.
|
||||
default "".
|
||||
export: bool, optional
|
||||
if set to True, it remove the padding from convolutional layers
|
||||
if set to True, it removes the padding from convolutional layers
|
||||
and allow the onnx conversion for inference.
|
||||
default False.
|
||||
use_pt_scaled_dot_product_attention: bool, optional
|
||||
@ -686,7 +686,7 @@ class ConformerEncoder(TransformerEncoderBase):
|
||||
only work for glu_in_attention !=0
|
||||
default "swish".
|
||||
export: bool, optional
|
||||
if set to True, it remove the padding from convolutional layers
|
||||
if set to True, it removes the padding from convolutional layers
|
||||
and allow the onnx conversion for inference.
|
||||
default False.
|
||||
activation_checkpointing: str, optional
|
||||
|
||||
@ -258,7 +258,7 @@ class DepthWiseSeperableConv1d(nn.Module):
|
||||
if set different to 0, the number of
|
||||
depthwise_seperable_out_channel will be used as a channel_out
|
||||
of the second conv1d layer.
|
||||
otherwise, it equal to 0, the second conv1d layer is skipped.
|
||||
otherwise, it equals to 0, the second conv1d layer is skipped.
|
||||
kernel_size: int
|
||||
kernel_size
|
||||
depthwise_multiplier: int
|
||||
|
||||
2
vllm/third_party/pynvml.py
vendored
2
vllm/third_party/pynvml.py
vendored
@ -1022,7 +1022,7 @@ def _extractNVMLErrorsAsClasses():
|
||||
Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate
|
||||
exceptions more easily.
|
||||
|
||||
NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass.
|
||||
NVMLError is a parent class. Each NVML_ERROR_* gets its own subclass.
|
||||
e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized
|
||||
'''
|
||||
this_module = sys.modules[__name__]
|
||||
|
||||
@ -26,7 +26,7 @@ logger = logging.get_logger(__name__)
|
||||
class NemotronConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a
|
||||
[`NemotronModel`]. It is used to instantiate an Nemotron model
|
||||
[`NemotronModel`]. It is used to instantiate a Nemotron model
|
||||
according to the specified arguments, defining the model architecture.
|
||||
Instantiating a configuration with the defaults will yield a similar
|
||||
configuration to that of the Nemotron-8B.
|
||||
|
||||
@ -38,7 +38,7 @@ class NemotronHConfig(PretrainedConfig):
|
||||
passed when calling [`NemotronHModel`]
|
||||
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
||||
Whether the model's input and output word embeddings should be
|
||||
tied. Note that this is only relevant if the model has a output
|
||||
tied. Note that this is only relevant if the model has an output
|
||||
word embedding layer.
|
||||
hidden_size (`int`, *optional*, defaults to 4096):
|
||||
Dimension of the hidden representations.
|
||||
|
||||
@ -55,7 +55,7 @@ class OvisProcessorKwargs(ProcessingKwargs, total=False): # type: ignore[call-
|
||||
|
||||
class OvisProcessor(ProcessorMixin):
|
||||
r"""
|
||||
Constructs a Ovis processor which wraps a Ovis image processor and a Qwen2 tokenizer into a single processor.
|
||||
Constructs an Ovis processor which wraps an Ovis image processor and a Qwen2 tokenizer into a single processor.
|
||||
[`OvisProcessor`] offers all the functionalities of [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. See the
|
||||
[`~OvisProcessor.__call__`] and [`~OvisProcessor.decode`] for more information.
|
||||
Args:
|
||||
|
||||
@ -41,7 +41,7 @@ class Ovis2_5ProcessorKwargs(ProcessingKwargs,
|
||||
|
||||
class Ovis2_5Processor(ProcessorMixin):
|
||||
r"""
|
||||
Constructs a Ovis processor which wraps a Ovis image processor
|
||||
Constructs an Ovis processor which wraps an Ovis image processor
|
||||
and a Qwen2 tokenizer into a single processor.
|
||||
[`OvisProcessor`] offers all the functionalities of
|
||||
[`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`].
|
||||
|
||||
@ -107,7 +107,7 @@ def _find_longest_matched_ngram_and_propose_tokens(
|
||||
longest_ngram = 0
|
||||
position = 0
|
||||
|
||||
# lps[0] always equal to 0, we starts with index 1
|
||||
# lps[0] always equal to 0, we start with index 1
|
||||
prev_lps = 0
|
||||
i = 1
|
||||
while i < total_token:
|
||||
|
||||
Reference in New Issue
Block a user