diff --git a/examples/offline_inference/multilora_inference.py b/examples/offline_inference/multilora_inference.py
index f0c00bcaae..6040683c68 100644
--- a/examples/offline_inference/multilora_inference.py
+++ b/examples/offline_inference/multilora_inference.py
@@ -23,7 +23,7 @@ def create_test_prompts(
     2 requests for base model, 4 requests for the LoRA. We define 2
     different LoRA adapters (using the same model for demo purposes).
     Since we also set `max_loras=1`, the expectation is that the requests
-    with the second LoRA adapter will be ran after all requests with the
+    with the second LoRA adapter will be run after all requests with the
     first adapter have finished.
     """
     return [
diff --git a/vllm/distributed/device_communicators/pynccl.py b/vllm/distributed/device_communicators/pynccl.py
index 502bfd3900..3e4d0d250a 100644
--- a/vllm/distributed/device_communicators/pynccl.py
+++ b/vllm/distributed/device_communicators/pynccl.py
@@ -31,7 +31,7 @@ class PyNcclCommunicator:
             group: the process group to work on. If None, it will use the
                 default process group.
             device: the device to bind the PyNcclCommunicator to. If None,
-                it will be bind to f"cuda:{local_rank}".
+                it will be bound to f"cuda:{local_rank}".
             library_path: the path to the NCCL library. If None, it will
                 use the default library path.
         It is the caller's responsibility to make sure each communicator
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index b89aee99c8..fc96c2ac92 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -939,8 +939,8 @@ def get_pipeline_model_parallel_group():
 def graph_capture(device: torch.device):
     """
     `graph_capture` is a context manager which should surround the code that
-    is capturing the CUDA graph. Its main purpose is to ensure that the
-    some operations will be run after the graph is captured, before the graph
+    is capturing the CUDA graph. Its main purpose is to ensure that some
+    operations will be run after the graph is captured, before the graph
     is replayed. It returns a `GraphCaptureContext` object which contains the
     necessary data for the graph capture. Currently, it only contains the
     stream that the graph capture is running on. This stream is set to the
diff --git a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
index 73329cdf70..992f141bef 100644
--- a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
@@ -165,7 +165,7 @@ class PythonicToolParser(ToolParser):
                             index] += delta.function.arguments
 
             # HACK: serving_chat.py inspects the internal state of tool parsers
-            # when determining it's final streaming delta, automatically
+            # when determining its final streaming delta, automatically
             # adding autocompleted JSON.
             # These two lines avoid that nonsense while ensuring finish_reason
             # is set to tool_calls when at least one tool is called.
diff --git a/vllm/model_executor/layers/fused_moe/moe_pallas.py b/vllm/model_executor/layers/fused_moe/moe_pallas.py
index 582ae3e12c..23f618b1a5 100644
--- a/vllm/model_executor/layers/fused_moe/moe_pallas.py
+++ b/vllm/model_executor/layers/fused_moe/moe_pallas.py
@@ -7,7 +7,7 @@ import torch.nn.functional as F
 
 def _histogram(input: torch.Tensor, min: int, max: int) -> torch.Tensor:
     """
-  Compute the histogram of a int32 tensor. The bin edges are defined by the
+  Compute the histogram of an int32 tensor. The bin edges are defined by the
   min and max values, with step = 1.
   """
     assert input.dtype == torch.int32, "input must be of torch.int32 dtype."
diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py
index 04a06e5f9d..41fd272397 100644
--- a/vllm/model_executor/models/ovis.py
+++ b/vllm/model_executor/models/ovis.py
@@ -544,7 +544,7 @@ class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
                                                       vision_embeddings)
             input_ids = None
 
-        # up until here we have a inputs_embeds 100% numerical identity
+        # up until here we have an inputs_embeds 100% numerical identity
         # between the OG HF Transformers implementation and ours
         hidden_states = self.llm(
             input_ids=input_ids,
diff --git a/vllm/model_executor/models/phi4mm_audio.py b/vllm/model_executor/models/phi4mm_audio.py
index 0b0d66ae77..b5e4d727bf 100644
--- a/vllm/model_executor/models/phi4mm_audio.py
+++ b/vllm/model_executor/models/phi4mm_audio.py
@@ -43,7 +43,7 @@ class ConformerEncoderLayer(nn.Module):
             if set different to 0, the number of 
              depthwise_seperable_out_channel will be used as a
              channel_out of the second conv1d layer. 
-             otherwise, it equal to 0, the second conv1d layer is skipped.
+             otherwise, it equals to 0, the second conv1d layer is skipped.
         depthwise_multiplier: int
             number of input_dim channels duplication. this value
              will be used to compute the hidden channels of the Conv1D.
@@ -115,7 +115,7 @@ class ConformerEncoderLayer(nn.Module):
                     we recalculate activation in backward.
             default "".
         export: bool, optional
-            if set to True, it remove the padding from convolutional layers
+            if set to True, it removes the padding from convolutional layers
              and allow the onnx conversion for inference.
               default False.
         use_pt_scaled_dot_product_attention: bool, optional
@@ -686,7 +686,7 @@ class ConformerEncoder(TransformerEncoderBase):
             only work for glu_in_attention !=0
             default "swish".
         export: bool, optional
-            if set to True, it remove the padding from convolutional layers
+            if set to True, it removes the padding from convolutional layers
              and allow the onnx conversion for inference.
               default False.
         activation_checkpointing: str, optional
diff --git a/vllm/model_executor/models/phi4mm_utils.py b/vllm/model_executor/models/phi4mm_utils.py
index c4890d8427..5953550382 100644
--- a/vllm/model_executor/models/phi4mm_utils.py
+++ b/vllm/model_executor/models/phi4mm_utils.py
@@ -258,7 +258,7 @@ class DepthWiseSeperableConv1d(nn.Module):
             if set different to 0, the number of 
              depthwise_seperable_out_channel will be used as a channel_out
              of the second conv1d layer.
-             otherwise, it equal to 0, the second conv1d layer is skipped.
+             otherwise, it equals to 0, the second conv1d layer is skipped.
         kernel_size: int
             kernel_size
         depthwise_multiplier: int
diff --git a/vllm/third_party/pynvml.py b/vllm/third_party/pynvml.py
index d215e5d8bf..c06aa56744 100644
--- a/vllm/third_party/pynvml.py
+++ b/vllm/third_party/pynvml.py
@@ -1022,7 +1022,7 @@ def _extractNVMLErrorsAsClasses():
     Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate
     exceptions more easily.
 
-    NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass.
+    NVMLError is a parent class. Each NVML_ERROR_* gets its own subclass.
     e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized
     '''
     this_module = sys.modules[__name__]
diff --git a/vllm/transformers_utils/configs/nemotron.py b/vllm/transformers_utils/configs/nemotron.py
index 9a7243b126..090fefa142 100644
--- a/vllm/transformers_utils/configs/nemotron.py
+++ b/vllm/transformers_utils/configs/nemotron.py
@@ -26,7 +26,7 @@ logger = logging.get_logger(__name__)
 class NemotronConfig(PretrainedConfig):
     r"""
     This is the configuration class to store the configuration of a
-    [`NemotronModel`]. It is used to instantiate an Nemotron model
+    [`NemotronModel`]. It is used to instantiate a Nemotron model
     according to the specified arguments, defining the model architecture.
     Instantiating a configuration with the defaults will yield a similar
     configuration to that of the Nemotron-8B.
diff --git a/vllm/transformers_utils/configs/nemotron_h.py b/vllm/transformers_utils/configs/nemotron_h.py
index 027f291154..581bed5716 100644
--- a/vllm/transformers_utils/configs/nemotron_h.py
+++ b/vllm/transformers_utils/configs/nemotron_h.py
@@ -38,7 +38,7 @@ class NemotronHConfig(PretrainedConfig):
             passed when calling [`NemotronHModel`]
         tie_word_embeddings (`bool`, *optional*, defaults to `False`):
             Whether the model's input and output word embeddings should be
-            tied. Note that this is only relevant if the model has a output
+            tied. Note that this is only relevant if the model has an output
             word embedding layer.
         hidden_size (`int`, *optional*, defaults to 4096):
             Dimension of the hidden representations.
diff --git a/vllm/transformers_utils/processors/ovis.py b/vllm/transformers_utils/processors/ovis.py
index 557d251c45..0077a7a8ce 100644
--- a/vllm/transformers_utils/processors/ovis.py
+++ b/vllm/transformers_utils/processors/ovis.py
@@ -55,7 +55,7 @@ class OvisProcessorKwargs(ProcessingKwargs, total=False):   # type: ignore[call-
 
 class OvisProcessor(ProcessorMixin):
     r"""
-    Constructs a Ovis processor which wraps a Ovis image processor and a Qwen2 tokenizer into a single processor.
+    Constructs an Ovis processor which wraps an Ovis image processor and a Qwen2 tokenizer into a single processor.
     [`OvisProcessor`] offers all the functionalities of [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. See the
     [`~OvisProcessor.__call__`] and [`~OvisProcessor.decode`] for more information.
     Args:
diff --git a/vllm/transformers_utils/processors/ovis2_5.py b/vllm/transformers_utils/processors/ovis2_5.py
index d3273257ff..282e9cb211 100644
--- a/vllm/transformers_utils/processors/ovis2_5.py
+++ b/vllm/transformers_utils/processors/ovis2_5.py
@@ -41,7 +41,7 @@ class Ovis2_5ProcessorKwargs(ProcessingKwargs,
 
 class Ovis2_5Processor(ProcessorMixin):
     r"""
-    Constructs a Ovis processor which wraps a Ovis image processor
+    Constructs an Ovis processor which wraps an Ovis image processor
     and a Qwen2 tokenizer into a single processor.
     [`OvisProcessor`] offers all the functionalities of 
     [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. 
diff --git a/vllm/v1/spec_decode/ngram_proposer.py b/vllm/v1/spec_decode/ngram_proposer.py
index fbcf2cb50d..b92e396d45 100644
--- a/vllm/v1/spec_decode/ngram_proposer.py
+++ b/vllm/v1/spec_decode/ngram_proposer.py
@@ -107,7 +107,7 @@ def _find_longest_matched_ngram_and_propose_tokens(
     longest_ngram = 0
     position = 0
 
-    # lps[0] always equal to 0, we starts with index 1
+    # lps[0] always equal to 0, we start with index 1
     prev_lps = 0
     i = 1
     while i < total_token: