diff --git a/.gitignore b/.gitignore
index e49d1d6ba6..88a42a5c0f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -200,5 +200,5 @@ benchmarks/**/*.json
 actionlint
 shellcheck*/
 
-# Ingore moe/marlin_moe gen code
+# Ignore moe/marlin_moe gen code
 csrc/moe/marlin_moe_wna16/kernel_*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a105b0e14c..e13738d671 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,12 +20,10 @@ repos:
     args: [--output-format, github, --fix]
   - id: ruff-format
     files: ^(.buildkite|benchmarks|examples)/.*
-- repo: https://github.com/codespell-project/codespell
-  rev: v2.4.1
+- repo: https://github.com/crate-ci/typos
+  rev: v1.32.0
   hooks:
-  - id: codespell
-    additional_dependencies: ['tomli']
-    args: ['--toml', 'pyproject.toml']
+  - id: typos
 - repo: https://github.com/PyCQA/isort
   rev: 6.0.1
   hooks:
diff --git a/csrc/cpu/attention.cpp b/csrc/cpu/attention.cpp
index 0257d8ff16..82862fea7f 100644
--- a/csrc/cpu/attention.cpp
+++ b/csrc/cpu/attention.cpp
@@ -137,8 +137,8 @@ FORCE_INLINE std::pair<T, T> reduceSoftmaxAlibi(T* data, const int size,
 }
 
 template <typename T>
-FORCE_INLINE void reducePartitonSoftmax(const T* max_data, T* sum_data,
-                                        const int size) {
+FORCE_INLINE void reducePartitionSoftmax(const T* max_data, T* sum_data,
+                                         const int size) {
   T max = max_data[0];
   for (int i = 1; i < size; ++i) {
     max = max >= max_data[i] ? max : max_data[i];
@@ -634,7 +634,7 @@ struct paged_attention_v2_impl {
 
         if (partition_num == 1) continue;
 
-        reducePartitonSoftmax(
+        reducePartitionSoftmax(
             max_logits + seq_idx * num_heads * max_num_partitions +
                 head_idx * max_num_partitions,
             exp_sums + seq_idx * num_heads * max_num_partitions +
diff --git a/csrc/cpu/cpu_types_x86.hpp b/csrc/cpu/cpu_types_x86.hpp
index 9a613ba588..3952c43cbc 100644
--- a/csrc/cpu/cpu_types_x86.hpp
+++ b/csrc/cpu/cpu_types_x86.hpp
@@ -83,7 +83,7 @@ struct FP16Vec16 : public Vec<FP16Vec16> {
   explicit FP16Vec16(const void* ptr)
       : reg((__m256i)_mm256_loadu_si256((__m256i*)ptr)) {}
 
-  // non-temproal load
+  // non-temporal load
   explicit FP16Vec16(bool, void* ptr)
       : reg(_mm256_stream_load_si256((__m256i*)ptr)) {}
 
@@ -120,7 +120,7 @@ struct BF16Vec16 : public Vec<BF16Vec16> {
   explicit BF16Vec16(const void* ptr)
       : reg((__m256i)_mm256_loadu_si256((__m256i*)ptr)) {}
 
-  // non-temproal load
+  // non-temporal load
   explicit BF16Vec16(bool, void* ptr)
       : reg(_mm256_stream_load_si256((__m256i*)ptr)) {}
 
@@ -327,7 +327,7 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
   // normal load
   explicit FP32Vec16(const float* ptr) : reg(_mm512_loadu_ps(ptr)) {}
 
-  // non-temproal load
+  // non-temporal load
   explicit FP32Vec16(bool, void* ptr)
       : reg((__m512)_mm512_stream_load_si512(ptr)) {}
 
@@ -576,7 +576,7 @@ struct INT8Vec64 : public Vec<INT8Vec64> {
   // normal load
   explicit INT8Vec64(void* ptr) : reg(_mm512_loadu_epi8(ptr)) {}
 
-  // non-temproal load
+  // non-temporal load
   explicit INT8Vec64(bool, void* ptr) : reg(_mm512_stream_load_si512(ptr)) {}
 
   void save(void* ptr) const { _mm512_storeu_epi8(ptr, reg); }
@@ -587,7 +587,7 @@ struct INT8Vec64 : public Vec<INT8Vec64> {
     _mm512_mask_storeu_epi8(ptr, mask, reg);
   }
 
-  // non-temproal save
+  // non-temporal save
   void nt_save(int8_t* ptr) { _mm512_stream_si512((__m512i*)ptr, reg); }
 };
 #endif
diff --git a/csrc/moe/moe_permute_unpermute_op.cu b/csrc/moe/moe_permute_unpermute_op.cu
index 68f429fac1..a77471a7f2 100644
--- a/csrc/moe/moe_permute_unpermute_op.cu
+++ b/csrc/moe/moe_permute_unpermute_op.cu
@@ -12,7 +12,7 @@ void moe_permute(
     const torch::Tensor& input,                      // [n_token, hidden]
     const torch::Tensor& topk_weights,               //[n_token, topk]
     torch::Tensor& topk_ids,                         // [n_token, topk]
-    const torch::Tensor& token_expert_indicies,      // [n_token, topk]
+    const torch::Tensor& token_expert_indices,       // [n_token, topk]
     const std::optional<torch::Tensor>& expert_map,  // [n_expert]
     int64_t n_expert, int64_t n_local_expert, int64_t topk,
     const std::optional<int64_t>& align_block_size,
@@ -27,15 +27,15 @@ void moe_permute(
               "expert_first_token_offset must be int64");
   TORCH_CHECK(topk_ids.scalar_type() == at::ScalarType::Int,
               "topk_ids must be int32");
-  TORCH_CHECK(token_expert_indicies.scalar_type() == at::ScalarType::Int,
-              "token_expert_indicies must be int32");
+  TORCH_CHECK(token_expert_indices.scalar_type() == at::ScalarType::Int,
+              "token_expert_indices must be int32");
   TORCH_CHECK(src_row_id2dst_row_id_map.scalar_type() == at::ScalarType::Int,
               "src_row_id2dst_row_id_map must be int32");
   TORCH_CHECK(expert_first_token_offset.size(0) == n_local_expert + 1,
               "expert_first_token_offset shape != n_local_expert+1")
   TORCH_CHECK(
-      src_row_id2dst_row_id_map.sizes() == token_expert_indicies.sizes(),
-      "token_expert_indicies shape must be same as src_row_id2dst_row_id_map");
+      src_row_id2dst_row_id_map.sizes() == token_expert_indices.sizes(),
+      "token_expert_indices shape must be same as src_row_id2dst_row_id_map");
   auto n_token = input.sizes()[0];
   auto n_hidden = input.sizes()[1];
   auto align_block_size_value =
@@ -71,7 +71,7 @@ void moe_permute(
                              expert_map_ptr, n_expert, stream);
   }
   // expert sort topk expert id and scan expert id get expert_first_token_offset
-  sortAndScanExpert(get_ptr<int>(topk_ids), get_ptr<int>(token_expert_indicies),
+  sortAndScanExpert(get_ptr<int>(topk_ids), get_ptr<int>(token_expert_indices),
                     get_ptr<int>(permuted_experts_id),
                     get_ptr<int>(dst_row_id2src_row_id_map),
                     get_ptr<int64_t>(expert_first_token_offset), n_token,
@@ -190,7 +190,7 @@ void shuffle_rows(const torch::Tensor& input_tensor,
 
 void moe_permute(const torch::Tensor& input, const torch::Tensor& topk_weights,
                  torch::Tensor& topk_ids,
-                 const torch::Tensor& token_expert_indicies,
+                 const torch::Tensor& token_expert_indices,
                  const std::optional<torch::Tensor>& expert_map,
                  int64_t n_expert, int64_t n_local_expert, int64_t topk,
                  const std::optional<int64_t>& align_block_size,
@@ -203,7 +203,7 @@ void moe_permute(const torch::Tensor& input, const torch::Tensor& topk_weights,
 
 void moe_unpermute(const torch::Tensor& input,
                    const torch::Tensor& topk_weights, torch::Tensor& topk_ids,
-                   const torch::Tensor& token_expert_indicies,
+                   const torch::Tensor& token_expert_indices,
                    const std::optional<torch::Tensor>& expert_map,
                    int64_t n_expert, int64_t n_local_expert, int64_t topk,
                    const std::optional<int64_t>& align_block_size,
diff --git a/csrc/moe/topk_softmax_kernels.cu b/csrc/moe/topk_softmax_kernels.cu
index 10be47966f..dea5b1f21e 100644
--- a/csrc/moe/topk_softmax_kernels.cu
+++ b/csrc/moe/topk_softmax_kernels.cu
@@ -425,7 +425,7 @@ void topkGatingSoftmaxLauncherHelper(const float* input, const bool* finished, f
 
 #define LAUNCH_SOFTMAX(NUM_EXPERTS, WARPS_PER_TB)                       \
     topkGatingSoftmaxLauncherHelper<NUM_EXPERTS, WARPS_PER_TB>(         \
-        gating_output, nullptr, topk_weights, topk_indicies,            \
+        gating_output, nullptr, topk_weights, topk_indices,            \
         token_expert_indices, num_tokens, topk, 0, num_experts,         \
         stream);
 
@@ -433,7 +433,7 @@ template <typename IndType>
 void topkGatingSoftmaxKernelLauncher(
     const float* gating_output,
     float* topk_weights,
-    IndType* topk_indicies,
+    IndType* topk_indices,
     int* token_expert_indices,
     float* softmax_workspace,
     const int num_tokens,
@@ -476,7 +476,7 @@ void topkGatingSoftmaxKernelLauncher(
             moeSoftmax<TPB><<<num_tokens, TPB, 0, stream>>>(
                 gating_output, nullptr, softmax_workspace, num_experts);
             moeTopK<TPB><<<num_tokens, TPB, 0, stream>>>(
-                softmax_workspace, nullptr, topk_weights, topk_indicies, token_expert_indices,
+                softmax_workspace, nullptr, topk_weights, topk_indices, token_expert_indices,
                 num_experts, topk, 0, num_experts);
         }
     }
diff --git a/csrc/moe/torch_bindings.cpp b/csrc/moe/torch_bindings.cpp
index a74eb3720c..d6ef4940b6 100644
--- a/csrc/moe/torch_bindings.cpp
+++ b/csrc/moe/torch_bindings.cpp
@@ -66,7 +66,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) {
 
   m.def(
       "moe_permute(Tensor input, Tensor topk_weight, Tensor! topk_ids,"
-      "Tensor token_expert_indicies, Tensor? expert_map, int n_expert,"
+      "Tensor token_expert_indices, Tensor? expert_map, int n_expert,"
       "int n_local_expert,"
       "int topk, int? align_block_size,Tensor! permuted_input, Tensor! "
       "expert_first_token_offset, Tensor! src_row_id2dst_row_id_map, Tensor! "
diff --git a/csrc/quantization/machete/machete_mainloop.cuh b/csrc/quantization/machete/machete_mainloop.cuh
index 572894064d..eca5d328b0 100644
--- a/csrc/quantization/machete/machete_mainloop.cuh
+++ b/csrc/quantization/machete/machete_mainloop.cuh
@@ -1003,7 +1003,7 @@ struct MacheteCollectiveMma {
     static constexpr int A_CPY_VEC =
         decltype(max_common_vector(tCsA, tCrA_load)){};
 
-    static constexpr int COVERSION_WIDTH =
+    static constexpr int CONVERSION_WIDTH =
         std::min(A_CPY_VEC, int(size<0>(tCrA_mma)));
 
     auto load_A_to_registers = [&](int read_stage) {
@@ -1026,8 +1026,8 @@ struct MacheteCollectiveMma {
     // PIPELINED MAIN LOOP
     //
 
-    auto convert_A = [&, a_vec = Int<COVERSION_WIDTH>{}](int k_block,
-                                                         int read_stage) {
+    auto convert_A = [&, a_vec = Int<CONVERSION_WIDTH>{}](int k_block,
+                                                          int read_stage) {
       load_extra_info_to_registers(partitioned_extra_info,
                                    copy_partitions_extra_info, k_block,
                                    read_stage);
diff --git a/csrc/rocm/skinny_gemms.cu b/csrc/rocm/skinny_gemms.cu
index e31aa01626..6212570c79 100644
--- a/csrc/rocm/skinny_gemms.cu
+++ b/csrc/rocm/skinny_gemms.cu
@@ -320,7 +320,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   // Goal is to bring the activation matrix A to the LDS
   // and use it across the lifetime of the work group
   // TODO: When activation matrix is larger than 64 KB
-  //	     then this is not goint to work!
+  //	     then this is not going to work!
   //----------------------------------------------------
   __shared__ scalar_t s[max_lds_len];
 
@@ -581,7 +581,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   // Goal is to bring the activation matrix A to the LDS
   // and use it across the lifetime of the work group
   // TODO: When activation matrix is larger than 64 KB
-  //	     then this is not goint to work!
+  //	     then this is not going to work!
   //----------------------------------------------------
   __shared__ scalar_t s[max_lds_len];
 
@@ -601,7 +601,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   // int _WvPrGrp = mindiv(N, CuCount * YTILE, WvPrGrp);
   uint32_t m = (blockIdx.x * _WvPrGrp + threadIdx.y) * YTILE;
 
-  // Check whether there will be fragmenation!
+  // Check whether there will be fragmentation!
   // This will happen only for the last wave!
   if (m < M && (m + YTILE) >= M) {
     uint32_t startColumn = M - YTILE;
@@ -827,7 +827,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
 
     m += CuCount * _WvPrGrp * YTILE;
 
-    // Check whether there will be fragmenation!
+    // Check whether there will be fragmentation!
     // This will happen only for the last wave!
     if (m < M && (m + YTILE) >= M) {
       uint32_t startColumn = M - YTILE;
@@ -882,7 +882,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   // Goal is to bring the activation matrix A to the LDS
   // and use it across the lifetime of the work group
   // TODO: When activation matrix is larger than 64 KB
-  //	     then this is not goint to work!
+  //	     then this is not going to work!
   //----------------------------------------------------
   __shared__ scalar_t s[max_lds_len];
 
@@ -904,7 +904,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   //----------------------------------------------------
   uint32_t m = (blockIdx.x * _WvPrGrp + threadIdx.y) * YTILE;
 
-  // Check whether there will be fragmenation!
+  // Check whether there will be fragmentation!
   // This will happen only for the last wave!
   if (m < M && (m + YTILE) >= M) {
     uint32_t startColumn = M - YTILE;
@@ -1176,7 +1176,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
     m += CuCount * _WvPrGrp * YTILE;
     kBase = 0;
 
-    // Check whether there will be fragmenation!
+    // Check whether there will be fragmentation!
     // This will happen only for the last wave!
     if (m < M && (m + YTILE) >= M) {
       uint32_t startColumn = M - YTILE;
diff --git a/csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu b/csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu
index 3dcaa6373f..d053ecc8dd 100644
--- a/csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu
+++ b/csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu
@@ -277,7 +277,7 @@ CompressorResult cutlass_sparse_compress_sm90(torch::Tensor const& a) {
   uint32_t const m = 1;  // Set M to 1 for compression
   uint32_t const n = a.size(1);
 
-  // Note: For correctess, the compressed format must be invariant in:
+  // Note: For correctness, the compressed format must be invariant in:
   //  - M, the flattened number of tokens
   //  - Whether output dtype is fp16 or bf16
   //  - CUTLASS epilogues
diff --git a/pyproject.toml b/pyproject.toml
index 307878f7e3..e8c2403af0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -137,10 +137,6 @@ exclude = [
     'vllm/attention/ops/.*\.py$'
 ]
 
-[tool.codespell]
-ignore-words-list = "dout, te, indicies, subtile, ElementE"
-skip = "tests/models/fixtures/*,tests/prompts/*,benchmarks/sonnet.txt,tests/lora/data/*,build/*,vllm/third_party/*"
-
 [tool.isort]
 skip_glob = [
     ".buildkite/*",
diff --git a/tests/compile/test_async_tp.py b/tests/compile/test_async_tp.py
index 1e4ee571f1..508056ea19 100644
--- a/tests/compile/test_async_tp.py
+++ b/tests/compile/test_async_tp.py
@@ -223,7 +223,7 @@ def test_async_tp_pass_correctness(
         "VLLM_USE_V1": "1",
     }
 
-    aysnc_tp_args = [
+    async_tp_args = [
         *common_args,
         "--tensor-parallel-size",
         str(tp_size),
@@ -242,7 +242,7 @@ def test_async_tp_pass_correctness(
     ]
 
     compare_two_settings(model_id,
-                         aysnc_tp_args,
+                         async_tp_args,
                          tp_args,
                          async_tp_env,
                          tp_env,
diff --git a/tests/core/block/e2e/test_correctness.py b/tests/core/block/e2e/test_correctness.py
index f296c81e17..93222b564e 100644
--- a/tests/core/block/e2e/test_correctness.py
+++ b/tests/core/block/e2e/test_correctness.py
@@ -437,8 +437,8 @@ def test_auto_prefix_caching_with_preemption(baseline_llm_generator,
     "enable_prefix_caching": True,
 }])
 @pytest.mark.parametrize("seed", [1])
-def test_auto_prefix_caching_after_evition_start(baseline_llm_generator,
-                                                 test_llm_generator):
+def test_auto_prefix_caching_after_eviction_start(baseline_llm_generator,
+                                                  test_llm_generator):
     """Verify block manager v2 with auto prefix caching could works normal
     even when eviction started.
     With APC enabled, all blocks are held by native block at the beginning.
diff --git a/tests/core/block/e2e/test_correctness_sliding_window.py b/tests/core/block/e2e/test_correctness_sliding_window.py
index 3429a858dd..4d67eea226 100644
--- a/tests/core/block/e2e/test_correctness_sliding_window.py
+++ b/tests/core/block/e2e/test_correctness_sliding_window.py
@@ -33,8 +33,8 @@ BLOCK_SIZE = 16
 @pytest.mark.parametrize("batch_size", [5])
 @pytest.mark.parametrize("seed", [1])
 @pytest.mark.parametrize("backend", ["FLASH_ATTN", "FLASHINFER", "XFORMERS"])
-def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
-                                 batch_size, seed, backend, monkeypatch):
+def test_sliding_window_retrieval(baseline_llm_generator, test_llm_generator,
+                                  batch_size, seed, backend, monkeypatch):
     """
     The test does a bunch of assignments "x1 = 10\nx2 = 33\n..." and then
     asks for value of one of them (which is outside the sliding window).
@@ -100,7 +100,7 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
 def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
                                         backend, monkeypatch):
     """
-    This is similar to test_sliding_window_retrival, however, it doesn't
+    This is similar to test_sliding_window_retrieval, however, it doesn't
     compare against the v1 block manager since v1 doesn't support
     chunked prefill with sliding window.
 
diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py
index db78a9d556..5e8e5f9767 100644
--- a/tests/core/test_scheduler.py
+++ b/tests/core/test_scheduler.py
@@ -594,8 +594,8 @@ def test_decode_schedule_preempted():
     # should be preempted. 1 will also be preempted.
     budget = create_token_budget()
     output = scheduler._schedule_running(budget, curr_loras)
-    remainig_running = scheduler.running
-    assert len(remainig_running) == 0
+    remaining_running = scheduler.running
+    assert len(remaining_running) == 0
     assert len(output.decode_seq_groups) == 1
     assert len(output.prefill_seq_groups) == 0
     assert output.decode_seq_groups[0].seq_group.request_id == "0"
diff --git a/tests/entrypoints/openai/test_chat_template.py b/tests/entrypoints/openai/test_chat_template.py
index daa4a78c93..6e32887f5e 100644
--- a/tests/entrypoints/openai/test_chat_template.py
+++ b/tests/entrypoints/openai/test_chat_template.py
@@ -16,7 +16,7 @@ chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
 assert chatml_jinja_path.exists()
 
 # Define models, templates, and their corresponding expected outputs
-MODEL_TEMPLATE_GENERATON_OUTPUT = [
+MODEL_TEMPLATE_GENERATION_OUTPUT = [
     ("facebook/opt-125m", chatml_jinja_path, True, False, """<|im_start|>user
 Hello<|im_end|>
 <|im_start|>assistant
@@ -91,7 +91,7 @@ def test_no_load_chat_template_literallike():
 
 @pytest.mark.parametrize(
     "model,template,add_generation_prompt,continue_final_message,expected_output",
-    MODEL_TEMPLATE_GENERATON_OUTPUT)
+    MODEL_TEMPLATE_GENERATION_OUTPUT)
 def test_get_gen_prompt(model, template, add_generation_prompt,
                         continue_final_message, expected_output):
     model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
diff --git a/tests/kernels/attention/test_cache.py b/tests/kernels/attention/test_cache.py
index e508505c2b..7895076155 100644
--- a/tests/kernels/attention/test_cache.py
+++ b/tests/kernels/attention/test_cache.py
@@ -72,8 +72,8 @@ def test_copy_blocks(
     # destination blocks.
     assert 2 * num_mappings <= num_blocks
     src_blocks = random.sample(range(num_blocks), num_mappings)
-    remainig_blocks = list(set(range(num_blocks)) - set(src_blocks))
-    dst_blocks = random.sample(remainig_blocks, 2 * num_mappings)
+    remaining_blocks = list(set(range(num_blocks)) - set(src_blocks))
+    dst_blocks = random.sample(remaining_blocks, 2 * num_mappings)
     block_mapping: list[tuple[int, int]] = []
     for i in range(num_mappings):
         src = src_blocks[i]
@@ -189,12 +189,12 @@ def test_reshape_and_cache(
 
     # Run the reference implementation.
     reshaped_key = key.reshape(num_tokens, *key_cache[0, :, :, 0, :].shape)
-    block_indicies = torch.div(slot_mapping, block_size, rounding_mode="floor")
-    block_indicies_lst = block_indicies.cpu().tolist()
+    block_indices = torch.div(slot_mapping, block_size, rounding_mode="floor")
+    block_indices_lst = block_indices.cpu().tolist()
     block_offsets = slot_mapping % block_size
     block_offsets_lst = block_offsets.cpu().tolist()
     for i in range(num_tokens):
-        block_idx = block_indicies_lst[i]
+        block_idx = block_indices_lst[i]
         block_offset = block_offsets_lst[i]
         cloned_key_cache[block_idx, :, :, block_offset, :] = reshaped_key[i]
         cloned_value_cache[block_idx, :, :, block_offset] = value[i]
@@ -322,12 +322,12 @@ def test_reshape_and_cache_flash(
                         kv_dtype=kv_cache_dtype)
 
     # Run the reference implementation.
-    block_indicies = torch.div(slot_mapping, block_size, rounding_mode="floor")
-    block_indicies_lst = block_indicies.cpu().tolist()
+    block_indices = torch.div(slot_mapping, block_size, rounding_mode="floor")
+    block_indices_lst = block_indices.cpu().tolist()
     block_offsets = slot_mapping % block_size
     block_offsets_lst = block_offsets.cpu().tolist()
     for i in range(num_tokens):
-        block_idx = block_indicies_lst[i]
+        block_idx = block_indices_lst[i]
         block_offset = block_offsets_lst[i]
         if kv_cache_layout == "NHD":
             cloned_key_cache[block_idx, block_offset, :, :] = key[i]
diff --git a/tests/kernels/attention/test_encoder_decoder_attn.py b/tests/kernels/attention/test_encoder_decoder_attn.py
index 8efc701f3a..a2e6986460 100644
--- a/tests/kernels/attention/test_encoder_decoder_attn.py
+++ b/tests/kernels/attention/test_encoder_decoder_attn.py
@@ -46,7 +46,7 @@ CUDA_DEVICE = "cuda:0"
 MAX_DEC_SEQ_LENS = [128]
 MAX_ENC_SEQ_LENS = [128]
 
-# Narrow teest-cases for unsupported-scenario
+# Narrow test-cases for unsupported-scenario
 # tests
 HEAD_SIZES_FOR_UNSUPP = [HEAD_SIZES[0]]
 
diff --git a/tests/kernels/core/test_rotary_embedding.py b/tests/kernels/core/test_rotary_embedding.py
index db0fdcbf5e..d1fd960bf1 100644
--- a/tests/kernels/core/test_rotary_embedding.py
+++ b/tests/kernels/core/test_rotary_embedding.py
@@ -39,10 +39,10 @@ def rotary_embedding_opcheck(rot,
 @pytest.mark.parametrize("head_size", [32, 108])
 @pytest.mark.parametrize("seq_len", [11, 1024])
 @pytest.mark.parametrize("use_key", [True, False])
-@pytest.mark.parametrize("head_stride_is_contingous", [True, False])
+@pytest.mark.parametrize("head_stride_is_contiguous", [True, False])
 def test_rotary_embedding_opcheck(dist_init, device, max_position,
                                   is_neox_style, rotary_dim, head_size,
-                                  seq_len, use_key, head_stride_is_contingous):
+                                  seq_len, use_key, head_stride_is_contiguous):
     batch_size = 1
     base = 10000
     num_heads = 7
@@ -52,7 +52,7 @@ def test_rotary_embedding_opcheck(dist_init, device, max_position,
     positions = torch.randint(0,
                               max_position, (batch_size, seq_len),
                               device=device)
-    head_stride = head_size + (64 if head_stride_is_contingous else 0)
+    head_stride = head_size + (64 if head_stride_is_contiguous else 0)
 
     query = torch.randn(batch_size,
                         seq_len,
@@ -72,7 +72,7 @@ def test_rotary_embedding_opcheck(dist_init, device, max_position,
 
     # if we have a contiguous head stride, test the alternate
     # [..., num_heads * head_dim] shape/layout
-    if head_stride_is_contingous:
+    if head_stride_is_contiguous:
         rotary_embedding_opcheck(
             rot, positions, query.flatten(start_dim=-2),
             key.flatten(start_dim=-2) if use_key else None)
diff --git a/tests/kernels/mamba/test_mamba_ssm_ssd.py b/tests/kernels/mamba/test_mamba_ssm_ssd.py
index abed1252a3..ccf0ff6abd 100644
--- a/tests/kernels/mamba/test_mamba_ssm_ssd.py
+++ b/tests/kernels/mamba/test_mamba_ssm_ssd.py
@@ -107,15 +107,15 @@ def generate_random_inputs(batch_size,
     return A, dt, X, B, C
 
 
-def generate_continous_batched_examples(example_lens_by_batch,
-                                        num_examples,
-                                        full_length,
-                                        last_taken,
-                                        exhausted,
-                                        n_heads,
-                                        d_head,
-                                        itype,
-                                        device='cuda'):
+def generate_continuous_batched_examples(example_lens_by_batch,
+                                         num_examples,
+                                         full_length,
+                                         last_taken,
+                                         exhausted,
+                                         n_heads,
+                                         d_head,
+                                         itype,
+                                         device='cuda'):
 
     # this function generates a random examples of certain length
     # and then cut according to "example_lens_by_batch" and feed
@@ -269,11 +269,10 @@ def test_mamba_chunk_scan_cont_batch(d_head, n_heads, seq_len_chunk_size_cases,
     exhausted: dict = {}  # map: eg -> boolean indicating example is exhausted
 
     states = None
-    for Y_min, cu_seqlens, seq_idx, (A, dt, X, B,
-                                     C) in generate_continous_batched_examples(
-                                         cases, num_examples, seqlen,
-                                         last_taken, exhausted, n_heads,
-                                         d_head, itype):
+    for Y_min, cu_seqlens, seq_idx, (
+            A, dt, X, B, C) in generate_continuous_batched_examples(
+                cases, num_examples, seqlen, last_taken, exhausted, n_heads,
+                d_head, itype):
 
         chunk_indices, chunk_offsets = \
             _query_start_loc_to_chunk_indices_offsets(
diff --git a/tests/lora/test_transfomers_model.py b/tests/lora/test_transformers_model.py
similarity index 100%
rename from tests/lora/test_transfomers_model.py
rename to tests/lora/test_transformers_model.py
diff --git a/tests/models/language/generation/test_bart.py b/tests/models/language/generation/test_bart.py
index 7d8acab5e8..b4c7718401 100644
--- a/tests/models/language/generation/test_bart.py
+++ b/tests/models/language/generation/test_bart.py
@@ -118,7 +118,7 @@ def run_test(
     # default to enforce_eager=True if enforce_eager
     # is left unspecified. However, the
     # VllmRunner test fixture (which wraps around the LLM class) defaults to
-    # enforce_eager=False (a behavior which a number of already-exisitng
+    # enforce_eager=False (a behavior which a number of already-existing
     # decoder-only unit tests expect), so when testing an encoder/decoder
     # model we must explicitly specify enforce_eager=True in the VllmRunner
     # constructor.
diff --git a/tests/samplers/test_typical_acceptance_sampler.py b/tests/samplers/test_typical_acceptance_sampler.py
index 418471b8e5..119841470b 100644
--- a/tests/samplers/test_typical_acceptance_sampler.py
+++ b/tests/samplers/test_typical_acceptance_sampler.py
@@ -248,7 +248,7 @@ def test_temperature_zero_target_distribution(seed: int, device: str):
                                     size=(batch_size, 1),
                                     dtype=torch.int64)
     # The target probaility distribution is a temperature zero distribution
-    # with zero entroy. Since our draft token ids don't match the probability
+    # with zero entropy. Since our draft token ids don't match the probability
     # 1.0 tokens in the target distribution we will reject all of them and
     # fallback to the greedy sampling for selecting 1 token for each sequence.
     # Verify the same.
diff --git a/tests/spec_decode/e2e/test_eagle_correctness.py b/tests/spec_decode/e2e/test_eagle_correctness.py
index 9893946142..fd838285ab 100644
--- a/tests/spec_decode/e2e/test_eagle_correctness.py
+++ b/tests/spec_decode/e2e/test_eagle_correctness.py
@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
     * Test greedy equality under various number of speculative tokens.
 
 With those tests, we can say at least, EAGLE would not break the
-correctess for the target model outputs.
+correctness for the target model outputs.
 """
 
 import pytest
diff --git a/tests/spec_decode/e2e/test_medusa_correctness.py b/tests/spec_decode/e2e/test_medusa_correctness.py
index 064a6e10ae..bc9501bd57 100644
--- a/tests/spec_decode/e2e/test_medusa_correctness.py
+++ b/tests/spec_decode/e2e/test_medusa_correctness.py
@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
     * Test greedy equality under various number of speculative tokens.
 
 With those tests, we can say at least, Medusa would not break the
-correctess for the target model outputs.
+correctness for the target model outputs.
 """
 
 import pytest
diff --git a/tests/spec_decode/e2e/test_mtp_correctness.py b/tests/spec_decode/e2e/test_mtp_correctness.py
index d4d4d519b7..d9c7be8ffe 100644
--- a/tests/spec_decode/e2e/test_mtp_correctness.py
+++ b/tests/spec_decode/e2e/test_mtp_correctness.py
@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
     * Test greedy equality under various number of speculative tokens.
 
 With those tests, we can say at least, mtp would not break the
-correctess for the target model outputs.
+correctness for the target model outputs.
 """
 
 import pytest
diff --git a/tests/spec_decode/e2e/test_ngram_correctness.py b/tests/spec_decode/e2e/test_ngram_correctness.py
index c10329a9ba..5aefc1df84 100644
--- a/tests/spec_decode/e2e/test_ngram_correctness.py
+++ b/tests/spec_decode/e2e/test_ngram_correctness.py
@@ -22,8 +22,8 @@ However, we still need to verify below scenario could be passed:
     * Test greedy equality under preemption
     * Test greedy equality under various ngram sizes / speculative sizes
 
-With those tests, we can say at least, ngram spec would not break the correctess
-for the target model outputs.
+With those tests, we can say at least, ngram spec would not break the
+correctness for the target model outputs.
 """
 
 import pytest
diff --git a/tests/v1/e2e/test_correctness_sliding_window.py b/tests/v1/e2e/test_correctness_sliding_window.py
index d8882b1d94..277ea3c838 100644
--- a/tests/v1/e2e/test_correctness_sliding_window.py
+++ b/tests/v1/e2e/test_correctness_sliding_window.py
@@ -30,7 +30,7 @@ model_config = {
     ])
 @pytest.mark.parametrize("batch_size", [5])
 @pytest.mark.parametrize("seed", [1])
-def test_sliding_window_retrival(monkeypatch, model, batch_size, seed):
+def test_sliding_window_retrieval(monkeypatch, model, batch_size, seed):
     """
     The test does a bunch of assignments "x1 = 10\nx2 = 33\n..." and then
     asks for value of one of them (which is outside the sliding window).
diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py
index 622ab6f35d..a0bcb8f602 100644
--- a/tests/v1/kv_connector/unit/test_nixl_connector.py
+++ b/tests/v1/kv_connector/unit/test_nixl_connector.py
@@ -7,7 +7,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
 from .utils import create_request, create_scheduler, create_vllm_config
 
 
-def test_basic_inferface():
+def test_basic_interface():
     """Unit test for basic NixlConnector interface functionality."""
 
     vllm_config = create_vllm_config()
@@ -25,7 +25,7 @@ def test_basic_inferface():
 
     scheduler.add_request(request)
 
-    # Remote Prefill, triggers NixlConnectorMetdata.
+    # Remote Prefill, triggers NixlConnectorMetadata.
     scheduler_output = scheduler.schedule()
     kv_connector_metadata = scheduler_output.kv_connector_metadata
     assert kv_connector_metadata is not None
diff --git a/tests/v1/sample/test_logprobs_e2e.py b/tests/v1/sample/test_logprobs_e2e.py
index 085b2ee097..0b135613ff 100644
--- a/tests/v1/sample/test_logprobs_e2e.py
+++ b/tests/v1/sample/test_logprobs_e2e.py
@@ -32,7 +32,7 @@ def test_prompt_logprobs_e2e():
             ), f"Expected: {EXPECTED_VALUE} |  Measured: {measured_value}"
 
 
-def test_promt_logprobs_e2e_server():
+def test_prompt_logprobs_e2e_server():
     with RemoteOpenAIServer(MODEL, SERVER_ARGS) as remote_server:
         url = f"{remote_server.url_for('v1')}/completions"
 
diff --git a/tests/worker/test_model_input.py b/tests/worker/test_model_input.py
index a5e61128d1..ec33d334ab 100644
--- a/tests/worker/test_model_input.py
+++ b/tests/worker/test_model_input.py
@@ -209,32 +209,32 @@ def test_multi_step_model_runner_input():
     received_model_input = (StatefulModelInput.from_broadcasted_tensor_dict(
         tensor_dict, attn_backend=attn_backend))
 
-    receieved_frozen_input = received_model_input.frozen_model_input
+    received_frozen_input = received_model_input.frozen_model_input
 
     # Check that received copy has correct values.
     assert isinstance(received_model_input, StatefulModelInput)
-    assert receieved_frozen_input.input_tokens is not None
-    assert (receieved_frozen_input.input_tokens ==
+    assert received_frozen_input.input_tokens is not None
+    assert (received_frozen_input.input_tokens ==
             frozen_model_input.input_tokens).all()
-    assert receieved_frozen_input.input_positions is not None
-    assert (receieved_frozen_input.input_positions ==
+    assert received_frozen_input.input_positions is not None
+    assert (received_frozen_input.input_positions ==
             frozen_model_input.input_positions).all()
-    assert receieved_frozen_input.multi_modal_kwargs is None
+    assert received_frozen_input.multi_modal_kwargs is None
     assert (frozen_model_input.multi_modal_kwargs ==
             frozen_model_input.multi_modal_kwargs)
-    assert receieved_frozen_input.lora_requests is None
-    assert (receieved_frozen_input.lora_requests ==
+    assert received_frozen_input.lora_requests is None
+    assert (received_frozen_input.lora_requests ==
             frozen_model_input.lora_requests)
-    assert receieved_frozen_input.lora_mapping is None
+    assert received_frozen_input.lora_mapping is None
     assert (
-        receieved_frozen_input.lora_mapping == frozen_model_input.lora_mapping)
+        received_frozen_input.lora_mapping == frozen_model_input.lora_mapping)
     for field in dataclasses.fields(AttentionMetadata):
-        assert getattr(receieved_frozen_input.attn_metadata, field.name,
+        assert getattr(received_frozen_input.attn_metadata, field.name,
                        None) == getattr(attn_metadata, field.name, None)
     # For sampling metadata, only selected_token_indices is copied.
-    assert (receieved_frozen_input.sampling_metadata.selected_token_indices ==
+    assert (received_frozen_input.sampling_metadata.selected_token_indices ==
             sampling_metadata.selected_token_indices)
-    assert receieved_frozen_input.sampling_metadata.seq_groups is None
+    assert received_frozen_input.sampling_metadata.seq_groups is None
 
     # check non frozen fields
     assert received_model_input.is_last_step == model_input.is_last_step
diff --git a/tools/report_build_time_ninja.py b/tools/report_build_time_ninja.py
index 7368ae9531..7386cdd9f7 100644
--- a/tools/report_build_time_ninja.py
+++ b/tools/report_build_time_ninja.py
@@ -116,7 +116,7 @@ def ReadTargets(log, show_all):
             # If ninja.exe is rudely halted then the .ninja_log file may be
             # corrupt. Silently continue.
             continue
-        start, end, _, name, cmdhash = parts  # Ignore restat.
+        start, end, _, name, cmdhash = parts  # Ignore restart.
         # Convert from integral milliseconds to float seconds.
         start = int(start) / 1000.0
         end = int(end) / 1000.0
diff --git a/typos.toml b/typos.toml
new file mode 100644
index 0000000000..f51ce2f362
--- /dev/null
+++ b/typos.toml
@@ -0,0 +1,179 @@
+[files]
+# these files may be written in non english words
+extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
+    "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
+    "vllm/third_party/*"]
+ignore-hidden = true
+ignore-files = true
+ignore-dot = true
+ignore-vcs = true
+ignore-global = true
+ignore-parent = true
+
+[default]
+binary = false
+check-filename = false
+check-file = true
+unicode = true
+ignore-hex = true
+identifier-leading-digits = false
+locale = "en"
+extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
+    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
+    ".*ot.*", ".*[Tt]h[rR].*"]
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[default.extend-identifiers]
+bbc5b7ede = "bbc5b7ede"
+womens_doubles = "womens_doubles"
+v_2nd = "v_2nd"
+splitted_input = "splitted_input"
+NOOPs = "NOOPs"
+typ = "typ"
+nin_shortcut = "nin_shortcut"
+UperNetDecoder = "UperNetDecoder"
+subtile = "subtile"
+cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
+SFOuput = "SFOuput"
+# huggingface transformers repo uses these words
+depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
+DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
+depthwise_seperable_CNN = "depthwise_seperable_CNN"
+
+[default.extend-words]
+iy = "iy"
+tendencias = "tendencias"
+# intel cpu features
+tme = "tme"
+dout = "dout"
+Pn = "Pn"
+arange = "arange"
+
+[type.py]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.py.extend-identifiers]
+arange = "arange"
+NDArray = "NDArray"
+EOFError = "EOFError"
+
+[type.py.extend-words]
+
+[type.cpp]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.cpp.extend-identifiers]
+countr_one = "countr_one"
+
+[type.cpp.extend-words]
+
+[type.rust]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.rust.extend-identifiers]
+flate2 = "flate2"
+
+[type.rust.extend-words]
+ser = "ser"
+
+[type.lock]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.lock.extend-identifiers]
+
+[type.lock.extend-words]
+
+[type.jl]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.jl.extend-identifiers]
+
+[type.jl.extend-words]
+modul = "modul"
+egals = "egals"
+usig = "usig"
+egal = "egal"
+
+[type.go]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.go.extend-identifiers]
+flate = "flate"
+
+[type.go.extend-words]
+
+[type.css]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.css.extend-identifiers]
+nd = "nd"
+
+[type.css.extend-words]
+
+[type.man]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.man.extend-identifiers]
+Nd = "Nd"
+
+[type.man.extend-words]
+
+[type.cert]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.cert.extend-identifiers]
+
+[type.cert.extend-words]
+
+[type.sh]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.sh.extend-identifiers]
+stap = "stap"
+ot = "ot"
+
+[type.sh.extend-words]
+
+[type.vimscript]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.vimscript.extend-identifiers]
+windo = "windo"
+
+[type.vimscript.extend-words]
diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index 92de1f5efa..d6bbfbc328 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1550,10 +1550,10 @@ def moe_wna16_gemm(input: torch.Tensor, output: torch.Tensor,
 
 
 def topk_softmax(topk_weights: torch.Tensor, topk_ids: torch.Tensor,
-                 token_expert_indicies: torch.Tensor,
+                 token_expert_indices: torch.Tensor,
                  gating_output: torch.Tensor) -> None:
-    torch.ops._moe_C.topk_softmax(topk_weights, topk_ids,
-                                  token_expert_indicies, gating_output)
+    torch.ops._moe_C.topk_softmax(topk_weights, topk_ids, token_expert_indices,
+                                  gating_output)
 
 
 def moe_wna16_marlin_gemm(input: torch.Tensor, output: Optional[torch.Tensor],
diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py
index e3f02a1936..34e059067d 100644
--- a/vllm/attention/backends/utils.py
+++ b/vllm/attention/backends/utils.py
@@ -373,7 +373,7 @@ class CommonAttentionState(AttentionState):
                 f"Expected attn_backend name to be either 'XFORMERS'," \
                 f"'ROCM_FLASH', or 'FLASH_ATTN', but " \
                 f"got '{self.runner.attn_backend.get_name()}'"
-            self._add_additonal_input_buffers_for_enc_dec_model(
+            self._add_additional_input_buffers_for_enc_dec_model(
                 attn_metadata=attn_metadata, input_buffers=input_buffers)
         return input_buffers
 
@@ -427,7 +427,7 @@ class CommonAttentionState(AttentionState):
         attn_metadata.max_encoder_seq_len = self.runner.max_seq_len_to_capture
         attn_metadata.num_encoder_tokens = 0
 
-    def _add_additonal_input_buffers_for_enc_dec_model(
+    def _add_additional_input_buffers_for_enc_dec_model(
             self, attn_metadata, input_buffers: Dict[str, Any]):
         """
         Saves additional input buffers specific to the encoder-decoder model
diff --git a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
index e5dcdf9a07..92004de030 100644
--- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
@@ -40,7 +40,7 @@ class Internlm2ToolParser(ToolParser):
             request.skip_special_tokens = False
         return request
 
-    def get_argments(self, obj):
+    def get_arguments(self, obj):
         if "parameters" in obj:
             return obj.get("parameters")
         elif "arguments" in obj:
@@ -119,9 +119,9 @@ class Internlm2ToolParser(ToolParser):
             # now we know we're on the same tool call and we're streaming
             # arguments
             else:
-                prev_arguments = self.get_argments(
+                prev_arguments = self.get_arguments(
                     self.prev_tool_call_arr[self.current_tool_id])
-                cur_arguments = self.get_argments(tool_call_arr)
+                cur_arguments = self.get_arguments(tool_call_arr)
 
                 # not arguments generated
                 if not cur_arguments and not prev_arguments:
@@ -170,7 +170,7 @@ class Internlm2ToolParser(ToolParser):
             # check to see if the name is defined and has been sent. if so,
             # stream the name - otherwise keep waiting
             # finish by setting old and returning None as base case
-            tool_call_arr["arguments"] = self.get_argments(tool_call_arr)
+            tool_call_arr["arguments"] = self.get_arguments(tool_call_arr)
             self.prev_tool_call_arr = [tool_call_arr]
             return delta
         except Exception:
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
index 66e037a97d..3d0c583175 100644
--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -1202,7 +1202,7 @@ class LinearScalingRotaryEmbeddingWithLoRA(BaseLayerWithLoRA):
     multiple LoRA adapters with a specialized kernel.
 
     Replace LinearScalingRotaryEmbedding with MultiLinearScalingRotaryEmbedding
-    which can handle multi lora adapters in a specialied kernel.
+    which can handle multi lora adapters in a specialized kernel.
     """
 
     def __init__(self, base_layer: RotaryEmbedding) -> None:
diff --git a/vllm/lora/punica_wrapper/utils.py b/vllm/lora/punica_wrapper/utils.py
index 0b0a7989f3..8430cb9186 100644
--- a/vllm/lora/punica_wrapper/utils.py
+++ b/vllm/lora/punica_wrapper/utils.py
@@ -68,11 +68,11 @@ def convert_mapping(
                 LoRA indices.
             sampler_indices: Tensor of shape [batch_size] mapping requests to
                 LoRA indices for sampler. For generation, this will be the
-                same as base_indicies. For prefill, this will map requests
+                same as base_indices. For prefill, this will map requests
                 to LoRA indices.
             sampler_indices_padded: Tensor of shape [batch_size] mapping
                 requests to LoRA indices for sampler with padding.
-                Same as sampler_indicies, but -1 is replaced with
+                Same as sampler_indices, but -1 is replaced with
                 max_loras.
             embeddings_indices: Tensor of shape [2, batch_size] mapping
                 requests to embedding indices. First row is for embeddings
diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py
index 6d9ea53878..cd3b0b3907 100644
--- a/vllm/model_executor/layers/mamba/mamba_mixer2.py
+++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py
@@ -319,7 +319,7 @@ class MambaMixer2(CustomOp):
             n_groups == 1,  # if there was only one group
         )
         intermediate_settings = (intermediate_size, 0, False)
-        head_setings = (self.num_heads, 0, False)
+        head_settings = (self.num_heads, 0, False)
 
         # - the weight already has a "weight_loader" attribute
         #   which set_weight_attrs will raise if we do not
@@ -372,7 +372,7 @@ class MambaMixer2(CustomOp):
                             intermediate_settings,
                             group_shard_settings,
                             group_shard_settings,
-                            head_setings,  # for dt
+                            head_settings,  # for dt
                         ],
                         self.tp_size,
                         tp_rank,
diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
index 58bfb661d3..ad58a9918f 100644
--- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
+++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
@@ -516,7 +516,7 @@ def _chunk_state_varlen_kernel(
                 offs_n[None, :] * stride_chunk_states_dstate)
         else:
 
-            # - this seems repetitve, buts its to help the compiler
+            # - this seems repetitive, buts its to help the compiler
             if start_idx < pid_c * chunk_size:
                 past_states_ptrs = chunk_states_ptr + (
                     offs_m[:, None] * stride_chunk_states_hdim +
diff --git a/vllm/model_executor/layers/quantization/utils/int8_utils.py b/vllm/model_executor/layers/quantization/utils/int8_utils.py
index a694a19174..1fdf7d174e 100644
--- a/vllm/model_executor/layers/quantization/utils/int8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/int8_utils.py
@@ -219,7 +219,7 @@ def per_token_group_quant_int8(
     quantized tensor along with the scaling factor used for quantization.
 
     Args:
-        x: The input tenosr with ndim >= 2.
+        x: The input tensor with ndim >= 2.
         group_size: The group size used for quantization.
         eps: The minimum to avoid dividing zero.
         dtype: The dype of output tensor. Note that only `torch.int8`
diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py
index 068a4e355f..3146c35a4e 100644
--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -401,7 +401,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
                         self.target_modules.append(
                             name.replace(rep_name, sub_name))
                 # Add original module name even if the module has stacked map,
-                # in case model has a mixture of disk-merged and disk-splitted
+                # in case model has a mixture of disk-merged and disk-split
                 # weights with same last name.
                 self.target_modules.append(name)
 
diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py
index 0de5de5e83..804a2f1785 100644
--- a/vllm/model_executor/models/baichuan.py
+++ b/vllm/model_executor/models/baichuan.py
@@ -131,7 +131,7 @@ class BaiChuanAttention(nn.Module):
         self.num_heads = (self.total_num_heads //
                           tensor_model_parallel_world_size)
         self.head_dim = hidden_size // self.total_num_heads
-        self.postion_embedding = position_embedding
+        self.position_embedding = position_embedding
         self.rope_theta = rope_theta
         self.max_position_embeddings = max_position_embeddings
 
@@ -151,7 +151,7 @@ class BaiChuanAttention(nn.Module):
             quant_config=quant_config,
         )
         # Create the alibi slopes and slice them.
-        if self.postion_embedding == "ALIBI":
+        if self.position_embedding == "ALIBI":
             tp_rank = get_tensor_model_parallel_rank()
             head_start = tp_rank * self.num_heads
             head_end = (tp_rank + 1) * self.num_heads
@@ -187,7 +187,7 @@ class BaiChuanAttention(nn.Module):
     ) -> torch.Tensor:
         qkv, _ = self.W_pack(hidden_states)
         q, k, v = qkv.chunk(chunks=3, dim=-1)
-        if self.postion_embedding != "ALIBI":
+        if self.position_embedding != "ALIBI":
             q, k = self.rotary_emb(positions, q, k)
         attn_output = self.attn(q, k, v)
         output, _ = self.o_proj(attn_output)
diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py
index 765718e575..d8c01f83ed 100644
--- a/vllm/model_executor/models/deepseek_vl2.py
+++ b/vllm/model_executor/models/deepseek_vl2.py
@@ -344,7 +344,7 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
             self.image_newline = nn.Parameter(
                 torch.randn(self.projector_config.n_embed) * embed_std)
             # This is a typo in original implementation
-            self.view_seperator = nn.Parameter(
+            self.view_separator = nn.Parameter(
                 torch.randn(self.projector_config.n_embed) * embed_std)
         else:
             raise ValueError(
@@ -549,13 +549,13 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
             if self.global_view_pos == "head":
                 global_local_features = torch.cat([
                     global_features,
-                    self.view_seperator[None, :],
+                    self.view_separator[None, :],
                     local_features,
                 ])
             else:
                 global_local_features = torch.cat([
                     local_features,
-                    self.view_seperator[None, :],
+                    self.view_separator[None, :],
                     global_features,
                 ])
 
diff --git a/vllm/model_executor/models/eagle.py b/vllm/model_executor/models/eagle.py
index 2219321457..d219b5228a 100644
--- a/vllm/model_executor/models/eagle.py
+++ b/vllm/model_executor/models/eagle.py
@@ -197,7 +197,7 @@ class EAGLE(nn.Module):
         return logits
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
-        # This implementation is incompitable with https://huggingface.co/yuhuili/EAGLE-LLaMA3-Instruct-8B
+        # This implementation is incompatible with https://huggingface.co/yuhuili/EAGLE-LLaMA3-Instruct-8B
         # due to missing lm_head weights and its config being that of a
         # Llama model. Here's a compatible version with the same weights:
         # https://huggingface.co/abhigoyal/EAGLE-LLaMA3-Instruct-8B-vllm
diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py
index 23e2517079..18cb6ea68d 100644
--- a/vllm/model_executor/models/gemma3_mm.py
+++ b/vllm/model_executor/models/gemma3_mm.py
@@ -634,13 +634,13 @@ class Gemma3ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP,
         kwargs["has_images"] = True
         # NOTE(woosuk): Here, we distinguish the sequences by the position id 0.
         # This is a HACK. Fix this.
-        start_idices = (positions == 0).cpu().nonzero()
-        num_seqs = len(start_idices)
+        start_indices = (positions == 0).cpu().nonzero()
+        num_seqs = len(start_indices)
         seq_lens = []
         for i in range(num_seqs):
-            start_idx = start_idices[i].item()
+            start_idx = start_indices[i].item()
             if i < num_seqs - 1:
-                end_idx = start_idices[i + 1].item()
+                end_idx = start_indices[i + 1].item()
             else:
                 end_idx = len(input_ids)
             seq_lens.append(end_idx - start_idx)
diff --git a/vllm/model_executor/models/llama4.py b/vllm/model_executor/models/llama4.py
index a852be66bd..9fb73261cd 100644
--- a/vllm/model_executor/models/llama4.py
+++ b/vllm/model_executor/models/llama4.py
@@ -52,7 +52,7 @@ class Llama4MoE(nn.Module):
         renormalize: bool,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         router_scores, router_indices = fast_topk(gating_output, topk, dim=-1)
-        # psuedo-standard is that the router scores are floats
+        # pseudo-standard is that the router scores are floats
         router_scores = torch.sigmoid(router_scores.float())
         return (router_scores, router_indices.to(torch.int32))
 
diff --git a/vllm/model_executor/models/mixtral_quant.py b/vllm/model_executor/models/mixtral_quant.py
index 3183c762d2..c8ad358c62 100644
--- a/vllm/model_executor/models/mixtral_quant.py
+++ b/vllm/model_executor/models/mixtral_quant.py
@@ -114,9 +114,9 @@ class MixtralMoE(nn.Module):
                 f"Tensor parallel size {self.tp_size} is greater than "
                 f"the number of experts {self.num_total_experts}.")
         # Split experts equally between ranks
-        self.expert_indicies = np.array_split(range(
-            self.num_total_experts), self.tp_size)[self.rank].tolist()
-        if not self.expert_indicies:
+        self.expert_indices = np.array_split(range(self.num_total_experts),
+                                             self.tp_size)[self.rank].tolist()
+        if not self.expert_indices:
             raise ValueError(
                 f"Rank {self.rank} has no experts assigned to it.")
 
@@ -125,7 +125,7 @@ class MixtralMoE(nn.Module):
                        config.hidden_size,
                        config.intermediate_size,
                        quant_config=quant_config)
-            if idx in self.expert_indicies else None
+            if idx in self.expert_indices else None
             for idx in range(self.num_total_experts)
         ])
         self.gate = ReplicatedLinear(config.hidden_size,
@@ -146,7 +146,7 @@ class MixtralMoE(nn.Module):
         routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
 
         final_hidden_states = None
-        for expert_idx in self.expert_indicies:
+        for expert_idx in self.expert_indices:
             expert_layer = self.experts[expert_idx]
             expert_mask = (selected_experts == expert_idx)
             expert_weights = (routing_weights * expert_mask).sum(dim=-1,
diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py
index 5c11d54c61..770e08aa2a 100644
--- a/vllm/model_executor/models/ovis.py
+++ b/vllm/model_executor/models/ovis.py
@@ -283,7 +283,7 @@ class OvisProcessingInfo(BaseProcessingInfo):
     def get_image_size_with_most_features(self) -> ImageSize:
         height, width = self.get_hf_processor().get_image_size()
         hs = self.get_hf_config().visual_tokenizer_config.hidden_stride
-        # NOTE(Isotr0py): 9 is `max_partion` hardcoded in original code
+        # NOTE(Isotr0py): 9 is `max_partition` hardcoded in original code
         # https://huggingface.co/AIDC-AI/Ovis2-1B/blob/main/modeling_ovis.py#L96
         return ImageSize(width=width * hs * 9, height=height * hs * 9)
 
diff --git a/vllm/model_executor/models/phi3_small.py b/vllm/model_executor/models/phi3_small.py
index 533655fd52..754ddda233 100644
--- a/vllm/model_executor/models/phi3_small.py
+++ b/vllm/model_executor/models/phi3_small.py
@@ -145,7 +145,7 @@ class Phi3SmallSelfAttention(nn.Module):
         self.num_q_per_kv = self.num_heads // self.num_key_value_heads
         if self.tp_size > 1:
             assert self.num_key_value_heads % self.tp_size == 0
-        self.num_kv_heads_per_partion = max(
+        self.num_kv_heads_per_partition = max(
             1, self.num_key_value_heads // self.tp_size)
         self.num_heads_per_partition = self.num_heads // self.tp_size
 
@@ -212,7 +212,7 @@ class Phi3SmallSelfAttention(nn.Module):
             bs_params = {
                 'max_seqlen': self.max_position_embeddings,
                 'num_heads': self.num_heads_per_partition,
-                "num_kv_heads": self.num_kv_heads_per_partion,
+                "num_kv_heads": self.num_kv_heads_per_partition,
                 "block_size": self.sparse_block_size,
                 "local_blocks": self.local_blocks,
                 "vert_stride": self.vert_stride,
@@ -222,7 +222,7 @@ class Phi3SmallSelfAttention(nn.Module):
         self.attn = Attention(self.num_heads_per_partition,
                               self.head_dim,
                               self.scale,
-                              num_kv_heads=self.num_kv_heads_per_partion,
+                              num_kv_heads=self.num_kv_heads_per_partition,
                               cache_config=cache_config,
                               quant_config=quant_config,
                               blocksparse_params=bs_params,
@@ -243,8 +243,8 @@ class Phi3SmallSelfAttention(nn.Module):
         # NOTE: this is required by RotaryEmbed, which indeed does not have to
         # TODO: allow 3D QK for rotary forward
         q = q.reshape(-1, self.head_dim * self.num_heads_per_partition)
-        k = k.reshape(-1, self.head_dim * self.num_kv_heads_per_partion)
-        v = v.reshape(-1, self.head_dim * self.num_kv_heads_per_partion)
+        k = k.reshape(-1, self.head_dim * self.num_kv_heads_per_partition)
+        v = v.reshape(-1, self.head_dim * self.num_kv_heads_per_partition)
 
         q, k = self.rotary_emb(positions, q, k)
         attn_output = self.attn(q, k, v)
diff --git a/vllm/model_executor/models/phi4mm_audio.py b/vllm/model_executor/models/phi4mm_audio.py
index ae7a8a732c..0b0d66ae77 100644
--- a/vllm/model_executor/models/phi4mm_audio.py
+++ b/vllm/model_executor/models/phi4mm_audio.py
@@ -41,7 +41,7 @@ class ConformerEncoderLayer(nn.Module):
              for the last pointwise conv after swish activation.
         depthwise_seperable_out_channel: int
             if set different to 0, the number of 
-             depthwise_seperable_out_channel will be used as a 
+             depthwise_seperable_out_channel will be used as a
              channel_out of the second conv1d layer. 
              otherwise, it equal to 0, the second conv1d layer is skipped.
         depthwise_multiplier: int
@@ -126,7 +126,7 @@ class ConformerEncoderLayer(nn.Module):
             (Multi-Head Attention),
             1 = typical Multi-Head Attention,
             1 < attn_group_sizes < attention_heads = Grouped-Query Attention
-            attn_group_sizes = attenion_heads = Multi-Query Attention
+            attn_group_sizes = attention_heads = Multi-Query Attention
     """
 
     def __init__(
@@ -318,7 +318,7 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
             1 = typical Multi-Head Attention,
             1 < attention_group_size < attention_heads = Grouped-Query 
             Attention
-            attention_group_size = attenion_heads = Multi-Query Attention
+            attention_group_size = attention_heads = Multi-Query Attention
     """
 
     def __init__(
@@ -744,7 +744,7 @@ class ConformerEncoder(TransformerEncoderBase):
             1 = typical Multi-Head Attention,
             1 < attention_group_size < attention_heads = Grouped-Query
             Attention
-            attention_group_size = attenion_heads = Multi-Query Attention
+            attention_group_size = attention_heads = Multi-Query Attention
     """
 
     extra_multi_layer_output_idxs: list[int]
diff --git a/vllm/model_executor/models/phimoe.py b/vllm/model_executor/models/phimoe.py
index dddd19c746..cdb7e0d18d 100644
--- a/vllm/model_executor/models/phimoe.py
+++ b/vllm/model_executor/models/phimoe.py
@@ -147,15 +147,15 @@ class mp(torch.autograd.Function):
 
         grad_at_output = grad_at_output * multiplier
 
-        grad_at_scores_expaned = masked_gates * grad_at_output.mul(-1)
-        grad_at_scores_expaned.scatter_add_(
+        grad_at_scores_expanded = masked_gates * grad_at_output.mul(-1)
+        grad_at_scores_expanded.scatter_add_(
             dim=-1,
             index=selected_experts,
             src=grad_at_output,
         )
 
         return (
-            grad_at_scores_expaned,
+            grad_at_scores_expanded,
             None,
             None,
             None,
diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py
index 11a25f8515..5e61d460fa 100644
--- a/vllm/multimodal/utils.py
+++ b/vllm/multimodal/utils.py
@@ -324,7 +324,7 @@ def merge_and_sort_multimodal_metadata(
     Returns:
         list[str]: List of item modalities in order of their positions in the
         input sequence.
-        list[PlaceholderRange]: Sorted list of all PlaceholdeRanges from
+        list[PlaceholderRange]: Sorted list of all PlaceholderRanges from
         mm_positions.
         Optional[list[str]]: Sorted list of all hashes from mm_hashes if given,
         None otherwise.
diff --git a/vllm/transformers_utils/processors/ovis.py b/vllm/transformers_utils/processors/ovis.py
index 4fe76d0df6..557d251c45 100644
--- a/vllm/transformers_utils/processors/ovis.py
+++ b/vllm/transformers_utils/processors/ovis.py
@@ -68,7 +68,7 @@ class OvisProcessor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    valid_kwargs = ["chat_template", "image_pad_token", "image_segement_len"]
+    valid_kwargs = ["chat_template", "image_pad_token", "image_segment_len"]
 
     image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
index 17123d2b48..5860368298 100644
--- a/vllm/worker/hpu_model_runner.py
+++ b/vllm/worker/hpu_model_runner.py
@@ -886,7 +886,7 @@ class HPUModelRunnerBase(ModelRunnerBase[TModelInputForHPU]):
             num_decode_tokens=0,
             slot_mapping=slot_mapping,
             multi_modal_placeholder_index_maps=
-            None,  # FIXME(kzawora): mutli-modality will not work here
+            None,  # FIXME(kzawora): multi-modality will not work here
             enable_kv_scales_calculation=False,
         )
         multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
diff --git a/vllm/worker/multi_step_model_runner.py b/vllm/worker/multi_step_model_runner.py
index cc0cc855e7..0680e60b52 100644
--- a/vllm/worker/multi_step_model_runner.py
+++ b/vllm/worker/multi_step_model_runner.py
@@ -277,7 +277,7 @@ class StatefulModelInput(BroadcastableModelInput):
         assert fmi.input_tokens.shape[0] >= self.num_seqs
         fmi_new_input_tokens: torch.Tensor = fmi.input_tokens[:self.num_seqs]
 
-        # Update frozen_model_input::input_positons.
+        # Update frozen_model_input::input_positions.
         assert fmi.input_positions is not None
         assert fmi.input_positions.shape[0] >= self.num_seqs
         fmi_new_input_positions: torch.Tensor = fmi.input_positions[:self.
diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py
index 5f1535271b..336bc0bcec 100644
--- a/vllm/worker/tpu_model_runner.py
+++ b/vllm/worker/tpu_model_runner.py
@@ -798,9 +798,9 @@ class ModelWrapper(nn.Module):
         """
         batch_size, seq_len = token_ids.shape
         # Calculate the positions to sample from.
-        start_indicies = torch.arange(
+        start_indices = torch.arange(
             batch_size, dtype=torch.int32, device=input_lens.device) * seq_len
-        logits_indices = start_indicies + input_lens - 1
+        logits_indices = start_indices + input_lens - 1
         attn_metadata = get_forward_context().attn_metadata
 
         # FIXME(woosuk): This is a temporary hack to avoid using the existing
@@ -822,14 +822,14 @@ class ModelWrapper(nn.Module):
             num_kv_heads, num_blocks, block_size, _ = kv_caches[0][0].shape
             slot_mapping = attn_metadata.slot_mapping
             slot_mapping = slot_mapping.flatten()
-            head_indicies = torch.arange(0,
-                                         num_kv_heads,
-                                         device=slot_mapping.device,
-                                         dtype=slot_mapping.dtype)
-            head_indicies *= block_size * num_blocks
+            head_indices = torch.arange(0,
+                                        num_kv_heads,
+                                        device=slot_mapping.device,
+                                        dtype=slot_mapping.dtype)
+            head_indices *= block_size * num_blocks
             slot_mapping = slot_mapping.repeat_interleave(num_kv_heads).view(
                 -1, num_kv_heads)
-            slot_mapping = slot_mapping + head_indicies.view(1, -1)
+            slot_mapping = slot_mapping + head_indices.view(1, -1)
             slot_mapping = slot_mapping.flatten()
             attn_metadata.slot_mapping = slot_mapping