diff --git a/.buildkite/check-wheel-size.py b/.buildkite/check-wheel-size.py
index a378bc6baa..e29881fcba 100644
--- a/.buildkite/check-wheel-size.py
+++ b/.buildkite/check-wheel-size.py
@@ -8,12 +8,12 @@ import zipfile
 # Note that we have 400 MiB quota, please use it wisely.
 # See https://github.com/pypi/support/issues/3792 .
 # Please also sync the value with the one in Dockerfile.
-VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
+VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 400))
 
 
 def print_top_10_largest_files(zip_file):
     """Print the top 10 largest files in the given zip file."""
-    with zipfile.ZipFile(zip_file, 'r') as z:
+    with zipfile.ZipFile(zip_file, "r") as z:
         file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
         file_sizes.sort(key=lambda x: x[1], reverse=True)
         for f, size in file_sizes[:10]:
@@ -28,14 +28,18 @@ def check_wheel_size(directory):
                 wheel_path = os.path.join(root, file_name)
                 wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
                 if wheel_size_mb > VLLM_MAX_SIZE_MB:
-                    print(f"Not allowed: Wheel {wheel_path} is larger "
-                          f"({wheel_size_mb:.2f} MB) than the limit "
-                          f"({VLLM_MAX_SIZE_MB} MB).")
+                    print(
+                        f"Not allowed: Wheel {wheel_path} is larger "
+                        f"({wheel_size_mb:.2f} MB) than the limit "
+                        f"({VLLM_MAX_SIZE_MB} MB)."
+                    )
                     print_top_10_largest_files(wheel_path)
                     return 1
                 else:
-                    print(f"Wheel {wheel_path} is within the allowed size "
-                          f"({wheel_size_mb:.2f} MB).")
+                    print(
+                        f"Wheel {wheel_path} is within the allowed size "
+                        f"({wheel_size_mb:.2f} MB)."
+                    )
     return 0
 
 
@@ -45,4 +49,4 @@ if __name__ == "__main__":
         sys.exit(1)
 
     directory = sys.argv[1]
-    sys.exit(check_wheel_size(directory))
\ No newline at end of file
+    sys.exit(check_wheel_size(directory))
diff --git a/.buildkite/generate_index.py b/.buildkite/generate_index.py
index 36e1b6c013..270663c415 100644
--- a/.buildkite/generate_index.py
+++ b/.buildkite/generate_index.py
@@ -22,5 +22,5 @@ with open("index.html", "w") as f:
     print(f"Generated index.html for {args.wheel}")
     # cloudfront requires escaping the '+' character
     f.write(
-        template.format(wheel=filename,
-                        wheel_html_escaped=filename.replace("+", "%2B")))
+        template.format(wheel=filename, wheel_html_escaped=filename.replace("+", "%2B"))
+    )
diff --git a/.buildkite/lm-eval-harness/conftest.py b/.buildkite/lm-eval-harness/conftest.py
index a0bcc993ed..769d2efda4 100644
--- a/.buildkite/lm-eval-harness/conftest.py
+++ b/.buildkite/lm-eval-harness/conftest.py
@@ -8,11 +8,14 @@ def pytest_addoption(parser):
     parser.addoption(
         "--config-list-file",
         action="store",
-        help="Path to the file listing model config YAMLs (one per line)")
-    parser.addoption("--tp-size",
-                     action="store",
-                     default="1",
-                     help="Tensor parallel size to use for evaluation")
+        help="Path to the file listing model config YAMLs (one per line)",
+    )
+    parser.addoption(
+        "--tp-size",
+        action="store",
+        default="1",
+        help="Tensor parallel size to use for evaluation",
+    )
 
 
 @pytest.fixture(scope="session")
@@ -33,7 +36,8 @@ def pytest_generate_tests(metafunc):
         config_dir = config_list_file.parent
         with open(config_list_file, encoding="utf-8") as f:
             configs = [
-                config_dir / line.strip() for line in f
+                config_dir / line.strip()
+                for line in f
                 if line.strip() and not line.startswith("#")
             ]
         metafunc.parametrize("config_filename", configs)
diff --git a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
index c5411daf0d..409a6ca820 100644
--- a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
+++ b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
@@ -16,19 +16,22 @@ RTOL = 0.08
 
 
 def launch_lm_eval(eval_config, tp_size):
-    trust_remote_code = eval_config.get('trust_remote_code', False)
-    model_args = f"pretrained={eval_config['model_name']}," \
-                 f"tensor_parallel_size={tp_size}," \
-                 f"enforce_eager=true," \
-                 f"add_bos_token=true," \
-                 f"trust_remote_code={trust_remote_code}"
+    trust_remote_code = eval_config.get("trust_remote_code", False)
+    model_args = (
+        f"pretrained={eval_config['model_name']},"
+        f"tensor_parallel_size={tp_size},"
+        f"enforce_eager=true,"
+        f"add_bos_token=true,"
+        f"trust_remote_code={trust_remote_code}"
+    )
     results = lm_eval.simple_evaluate(
         model="vllm",
         model_args=model_args,
         tasks=[task["name"] for task in eval_config["tasks"]],
         num_fewshot=eval_config["num_fewshot"],
         limit=eval_config["limit"],
-        batch_size="auto")
+        batch_size="auto",
+    )
     return results
 
 
@@ -42,9 +45,10 @@ def test_lm_eval_correctness_param(config_filename, tp_size):
         for metric in task["metrics"]:
             ground_truth = metric["value"]
             measured_value = results["results"][task["name"]][metric["name"]]
-            print(f'{task["name"]} | {metric["name"]}: '
-                  f'ground_truth={ground_truth} | measured={measured_value}')
-            success = success and np.isclose(
-                ground_truth, measured_value, rtol=RTOL)
+            print(
+                f"{task['name']} | {metric['name']}: "
+                f"ground_truth={ground_truth} | measured={measured_value}"
+            )
+            success = success and np.isclose(ground_truth, measured_value, rtol=RTOL)
 
     assert success
diff --git a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
index 1030ec24e8..7f2a2d8dc2 100644
--- a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
+++ b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
@@ -65,18 +65,18 @@ def read_markdown(file):
 
 
 def results_to_json(latency, throughput, serving):
-    return json.dumps({
-        'latency': latency.to_dict(),
-        'throughput': throughput.to_dict(),
-        'serving': serving.to_dict()
-    })
+    return json.dumps(
+        {
+            "latency": latency.to_dict(),
+            "throughput": throughput.to_dict(),
+            "serving": serving.to_dict(),
+        }
+    )
 
 
 if __name__ == "__main__":
-
     # collect results
     for test_file in results_folder.glob("*.json"):
-
         with open(test_file) as f:
             raw_result = json.loads(f.read())
 
@@ -120,7 +120,8 @@ if __name__ == "__main__":
             for perc in [10, 25, 50, 75, 90, 99]:
                 # Multiply 1000 to convert the time unit from s to ms
                 raw_result.update(
-                    {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]})
+                    {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}
+                )
             raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
 
             # add the result to raw_result
@@ -153,26 +154,27 @@ if __name__ == "__main__":
     serving_results = pd.DataFrame.from_dict(serving_results)
     throughput_results = pd.DataFrame.from_dict(throughput_results)
 
-    raw_results_json = results_to_json(latency_results, throughput_results,
-                                       serving_results)
+    raw_results_json = results_to_json(
+        latency_results, throughput_results, serving_results
+    )
 
     # remapping the key, for visualization purpose
     if not latency_results.empty:
-        latency_results = latency_results[list(
-            latency_column_mapping.keys())].rename(
-                columns=latency_column_mapping)
+        latency_results = latency_results[list(latency_column_mapping.keys())].rename(
+            columns=latency_column_mapping
+        )
     if not serving_results.empty:
-        serving_results = serving_results[list(
-            serving_column_mapping.keys())].rename(
-                columns=serving_column_mapping)
+        serving_results = serving_results[list(serving_column_mapping.keys())].rename(
+            columns=serving_column_mapping
+        )
     if not throughput_results.empty:
-        throughput_results = throughput_results[list(
-            throughput_results_column_mapping.keys())].rename(
-                columns=throughput_results_column_mapping)
+        throughput_results = throughput_results[
+            list(throughput_results_column_mapping.keys())
+        ].rename(columns=throughput_results_column_mapping)
 
-    processed_results_json = results_to_json(latency_results,
-                                             throughput_results,
-                                             serving_results)
+    processed_results_json = results_to_json(
+        latency_results, throughput_results, serving_results
+    )
 
     for df in [latency_results, serving_results, throughput_results]:
         if df.empty:
@@ -184,38 +186,39 @@ if __name__ == "__main__":
         # The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",
         # we want to turn it into "8xGPUTYPE"
         df["GPU"] = df["GPU"].apply(
-            lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}")
+            lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}"
+        )
 
     # get markdown tables
-    latency_md_table = tabulate(latency_results,
-                                headers='keys',
-                                tablefmt='pipe',
-                                showindex=False)
-    serving_md_table = tabulate(serving_results,
-                                headers='keys',
-                                tablefmt='pipe',
-                                showindex=False)
-    throughput_md_table = tabulate(throughput_results,
-                                   headers='keys',
-                                   tablefmt='pipe',
-                                   showindex=False)
+    latency_md_table = tabulate(
+        latency_results, headers="keys", tablefmt="pipe", showindex=False
+    )
+    serving_md_table = tabulate(
+        serving_results, headers="keys", tablefmt="pipe", showindex=False
+    )
+    throughput_md_table = tabulate(
+        throughput_results, headers="keys", tablefmt="pipe", showindex=False
+    )
 
     # document the result
     with open(results_folder / "benchmark_results.md", "w") as f:
-
-        results = read_markdown("../.buildkite/nightly-benchmarks/" +
-                                "performance-benchmarks-descriptions.md")
+        results = read_markdown(
+            "../.buildkite/nightly-benchmarks/"
+            + "performance-benchmarks-descriptions.md"
+        )
         results = results.format(
             latency_tests_markdown_table=latency_md_table,
             throughput_tests_markdown_table=throughput_md_table,
             serving_tests_markdown_table=serving_md_table,
-            benchmarking_results_in_json_string=processed_results_json)
+            benchmarking_results_in_json_string=processed_results_json,
+        )
         f.write(results)
 
     # document benchmarking results in json
     with open(results_folder / "benchmark_results.json", "w") as f:
-
-        results = latency_results.to_dict(
-            orient='records') + throughput_results.to_dict(
-                orient='records') + serving_results.to_dict(orient='records')
+        results = (
+            latency_results.to_dict(orient="records")
+            + throughput_results.to_dict(orient="records")
+            + serving_results.to_dict(orient="records")
+        )
         f.write(json.dumps(results))
diff --git a/.buildkite/nightly-benchmarks/scripts/download-tokenizer.py b/.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
index 5e17b79d26..778a3a8d87 100644
--- a/.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
+++ b/.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
@@ -14,15 +14,12 @@ def main(model, cachedir):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Download and save Hugging Face tokenizer")
-    parser.add_argument("--model",
-                        type=str,
-                        required=True,
-                        help="Name of the model")
-    parser.add_argument("--cachedir",
-                        type=str,
-                        required=True,
-                        help="Directory to save the tokenizer")
+        description="Download and save Hugging Face tokenizer"
+    )
+    parser.add_argument("--model", type=str, required=True, help="Name of the model")
+    parser.add_argument(
+        "--cachedir", type=str, required=True, help="Directory to save the tokenizer"
+    )
 
     args = parser.parse_args()
     main(args.model, args.cachedir)
diff --git a/.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py b/.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
index 0ff95a0911..10a7a2f5a4 100644
--- a/.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
+++ b/.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
@@ -11,33 +11,33 @@ from tabulate import tabulate
 
 def parse_arguments():
     parser = argparse.ArgumentParser(
-        description=
-        'Parse command line arguments for summary-nightly-results script.')
-    parser.add_argument('--results-folder',
-                        type=str,
-                        required=True,
-                        help='The folder where the results are stored.')
-    parser.add_argument('--description',
-                        type=str,
-                        required=True,
-                        help='Description of the results.')
+        description="Parse command line arguments for summary-nightly-results script."
+    )
+    parser.add_argument(
+        "--results-folder",
+        type=str,
+        required=True,
+        help="The folder where the results are stored.",
+    )
+    parser.add_argument(
+        "--description", type=str, required=True, help="Description of the results."
+    )
 
     args = parser.parse_args()
     return args
 
 
 def get_perf(df, method, model, metric):
-
     means = []
 
     for qps in [2, 4, 8, 16, "inf"]:
-        target = df['Test name'].str.contains(model)
-        target = target & df['Engine'].str.contains(method)
-        target = target & df['Test name'].str.contains("qps_" + str(qps))
+        target = df["Test name"].str.contains(model)
+        target = target & df["Engine"].str.contains(method)
+        target = target & df["Test name"].str.contains("qps_" + str(qps))
         filtered_df = df[target]
 
         if filtered_df.empty:
-            means.append(0.)
+            means.append(0.0)
         else:
             means.append(filtered_df[metric].values[0])
 
@@ -45,7 +45,6 @@ def get_perf(df, method, model, metric):
 
 
 def get_perf_w_std(df, method, model, metric):
-
     if metric in ["TTFT", "ITL"]:
         mean = get_perf(df, method, model, "Mean " + metric + " (ms)")
         mean = mean.tolist()
@@ -60,7 +59,8 @@ def get_perf_w_std(df, method, model, metric):
     else:
         assert metric == "Tput"
         mean = get_perf(df, method, model, "Input Tput (tok/s)") + get_perf(
-            df, method, model, "Output Tput (tok/s)")
+            df, method, model, "Output Tput (tok/s)"
+        )
         mean = mean.tolist()
         std = None
 
@@ -80,18 +80,17 @@ def main(args):
     # generate markdown table
     df = pd.DataFrame.from_dict(results)
 
-    md_table = tabulate(df, headers='keys', tablefmt='pipe', showindex=False)
+    md_table = tabulate(df, headers="keys", tablefmt="pipe", showindex=False)
 
     with open(args.description) as f:
         description = f.read()
 
-    description = description.format(
-        nightly_results_benchmarking_table=md_table)
+    description = description.format(nightly_results_benchmarking_table=md_table)
 
     with open("nightly_results.md", "w") as f:
         f.write(description)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     args = parse_arguments()
     main(args)
diff --git a/.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py b/.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
index 62ee5e10b5..2a7b37991f 100644
--- a/.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
+++ b/.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
@@ -34,10 +34,8 @@ serving_column_mapping = {
 }
 
 if __name__ == "__main__":
-
     # collect results
     for test_file in results_folder.glob("*.json"):
-
         with open(test_file) as f:
             raw_result = json.loads(f.read())
 
@@ -56,17 +54,16 @@ if __name__ == "__main__":
     serving_results = pd.DataFrame.from_dict(serving_results)
 
     if not serving_results.empty:
-        serving_results = serving_results[list(
-            serving_column_mapping.keys())].rename(
-                columns=serving_column_mapping)
+        serving_results = serving_results[list(serving_column_mapping.keys())].rename(
+            columns=serving_column_mapping
+        )
 
-    serving_md_table_with_headers = tabulate(serving_results,
-                                             headers='keys',
-                                             tablefmt='pipe',
-                                             showindex=False)
+    serving_md_table_with_headers = tabulate(
+        serving_results, headers="keys", tablefmt="pipe", showindex=False
+    )
     # remove the first line of header
-    serving_md_table_lines = serving_md_table_with_headers.split('\n')
-    serving_md_table_without_header = '\n'.join(serving_md_table_lines[2:])
+    serving_md_table_lines = serving_md_table_with_headers.split("\n")
+    serving_md_table_without_header = "\n".join(serving_md_table_lines[2:])
 
     prefix = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     prefix = prefix + "_" + os.environ.get("CURRENT_LLM_SERVING_ENGINE")
@@ -76,10 +73,9 @@ if __name__ == "__main__":
         # document results with header.
         # for those who wants to reproduce our benchmark.
         f.write(serving_md_table_with_headers)
-        f.write('\n')
+        f.write("\n")
 
     # document benchmarking results in json
     with open(results_folder / f"{prefix}_nightly_results.json", "w") as f:
-
-        results = serving_results.to_dict(orient='records')
+        results = serving_results.to_dict(orient="records")
         f.write(json.dumps(results))
diff --git a/.buildkite/pyproject.toml b/.buildkite/pyproject.toml
new file mode 100644
index 0000000000..6ae0c2a399
--- /dev/null
+++ b/.buildkite/pyproject.toml
@@ -0,0 +1,55 @@
+# This local pyproject file is part of the migration from yapf to ruff format.
+# It uses the same core rules as the main pyproject.toml file, but with the
+# following differences:
+# - isort profile is set to black
+# - ruff line length is overridden to 88
+# - deprecated typing ignores (UP006, UP035) have been removed
+
+[tool.isort]
+profile = "black"
+
+[tool.ruff]
+line-length = 88
+exclude = [
+    # External file, leaving license intact
+    "examples/other/fp8/quantizer/quantize.py",
+    "vllm/vllm_flash_attn/flash_attn_interface.pyi"
+]
+
+[tool.ruff.lint.per-file-ignores]
+"vllm/third_party/**" = ["ALL"]
+"vllm/version.py" = ["F401"]
+"vllm/_version.py" = ["ALL"]
+
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+    # flake8-logging-format
+    "G",
+]
+ignore = [
+    # star imports
+    "F405", "F403",
+    # lambda expression assignment
+    "E731",
+    # Loop control variable not used within loop body
+    "B007",
+    # f-string format
+    "UP032",
+    # Can remove once 3.10+ is the minimum Python version
+    "UP007",
+]
+
+[tool.ruff.format]
+docstring-code-format = true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3dc06952c0..23f83db010 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -16,6 +16,8 @@ repos:
   hooks:
   - id: ruff
     args: [--output-format, github, --fix]
+  - id: ruff-format
+    files: ^(.buildkite).*
 - repo: https://github.com/codespell-project/codespell
   rev: v2.4.1
   hooks:
@@ -26,6 +28,8 @@ repos:
   rev: 6.0.1
   hooks:
   - id: isort
+    # necessary during the transition from yapf to ruff format
+    args: [--resolve-all-configs, --config-root, .]
 - repo: https://github.com/pre-commit/mirrors-clang-format
   rev: v20.1.3
   hooks:
diff --git a/pyproject.toml b/pyproject.toml
index 4147b6bdee..0393bb1ed2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,7 @@ include = ["vllm*"]
 
 [tool.yapfignore]
 ignore_patterns = [
+    ".buildkite/**",
     "build/**",
 ]
 
@@ -107,6 +108,7 @@ select = [
     "SIM",
     # isort
     # "I",
+    # flake8-logging-format
     "G",
 ]
 ignore = [