diff --git a/docs/cli/.nav.yml b/docs/cli/.nav.yml
index 6c2c09d566..d2d2905703 100644
--- a/docs/cli/.nav.yml
+++ b/docs/cli/.nav.yml
@@ -5,4 +5,4 @@ nav:
   - complete.md
   - run-batch.md
   - vllm bench:
-    - bench/*.md
+    - bench/**/*.md
diff --git a/docs/cli/bench/sweep/plot.md b/docs/cli/bench/sweep/plot.md
new file mode 100644
index 0000000000..f29bffb646
--- /dev/null
+++ b/docs/cli/bench/sweep/plot.md
@@ -0,0 +1,9 @@
+# vllm bench sweep plot
+
+## JSON CLI Arguments
+
+--8<-- "docs/cli/json_tip.inc.md"
+
+## Options
+
+--8<-- "docs/argparse/bench_sweep_plot.md"
diff --git a/docs/cli/bench/sweep/serve.md b/docs/cli/bench/sweep/serve.md
new file mode 100644
index 0000000000..5b5f91a951
--- /dev/null
+++ b/docs/cli/bench/sweep/serve.md
@@ -0,0 +1,9 @@
+# vllm bench sweep serve
+
+## JSON CLI Arguments
+
+--8<-- "docs/cli/json_tip.inc.md"
+
+## Options
+
+--8<-- "docs/argparse/bench_sweep_serve.md"
diff --git a/docs/cli/bench/sweep/serve_sla.md b/docs/cli/bench/sweep/serve_sla.md
new file mode 100644
index 0000000000..5f8ab6005e
--- /dev/null
+++ b/docs/cli/bench/sweep/serve_sla.md
@@ -0,0 +1,9 @@
+# vllm bench sweep serve_sla
+
+## JSON CLI Arguments
+
+--8<-- "docs/cli/json_tip.inc.md"
+
+## Options
+
+--8<-- "docs/argparse/bench_sweep_serve_sla.md"
diff --git a/docs/contributing/benchmarks.md b/docs/contributing/benchmarks.md
index e8b58dbbc9..be3e32a73a 100644
--- a/docs/contributing/benchmarks.md
+++ b/docs/contributing/benchmarks.md
@@ -1061,7 +1061,7 @@ Follow these steps to run the script:
 Example command:
 
 ```bash
-python -m vllm.benchmarks.sweep.serve \
+vllm bench sweep serve \
     --serve-cmd 'vllm serve meta-llama/Llama-2-7b-chat-hf' \
     --bench-cmd 'vllm bench serve --model meta-llama/Llama-2-7b-chat-hf --backend vllm --endpoint /v1/completions --dataset-name sharegpt --dataset-path benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json' \
     --serve-params benchmarks/serve_hparams.json \
@@ -1109,7 +1109,7 @@ For example, to ensure E2E latency within different target values for 99% of req
 Example command:
 
 ```bash
-python -m vllm.benchmarks.sweep.serve_sla \
+vllm bench sweep serve_sla \
     --serve-cmd 'vllm serve meta-llama/Llama-2-7b-chat-hf' \
     --bench-cmd 'vllm bench serve --model meta-llama/Llama-2-7b-chat-hf --backend vllm --endpoint /v1/completions --dataset-name sharegpt --dataset-path benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json' \
     --serve-params benchmarks/serve_hparams.json \
@@ -1138,7 +1138,7 @@ The algorithm for adjusting the SLA variable is as follows:
 Example command:
 
 ```bash
-python -m vllm.benchmarks.sweep.plot benchmarks/results/<timestamp> \
+vllm bench sweep plot benchmarks/results/<timestamp> \
     --var-x max_concurrency \
     --row-by random_input_len \
     --col-by random_output_len \
diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py
index 99d9a7bec3..ea89108f01 100644
--- a/docs/mkdocs/hooks/generate_argparse.py
+++ b/docs/mkdocs/hooks/generate_argparse.py
@@ -56,15 +56,20 @@ def auto_mock(module, attr, max_mocks=50):
     )
 
 
-latency = auto_mock("vllm.benchmarks", "latency")
-serve = auto_mock("vllm.benchmarks", "serve")
-throughput = auto_mock("vllm.benchmarks", "throughput")
+bench_latency = auto_mock("vllm.benchmarks", "latency")
+bench_serve = auto_mock("vllm.benchmarks", "serve")
+bench_sweep_plot = auto_mock("vllm.benchmarks.sweep.plot", "SweepPlotArgs")
+bench_sweep_serve = auto_mock("vllm.benchmarks.sweep.serve", "SweepServeArgs")
+bench_sweep_serve_sla = auto_mock(
+    "vllm.benchmarks.sweep.serve_sla", "SweepServeSLAArgs"
+)
+bench_throughput = auto_mock("vllm.benchmarks", "throughput")
 AsyncEngineArgs = auto_mock("vllm.engine.arg_utils", "AsyncEngineArgs")
 EngineArgs = auto_mock("vllm.engine.arg_utils", "EngineArgs")
 ChatCommand = auto_mock("vllm.entrypoints.cli.openai", "ChatCommand")
 CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand")
-cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
-run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
+openai_cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
+openai_run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
 FlexibleArgumentParser = auto_mock(
     "vllm.utils.argparse_utils", "FlexibleArgumentParser"
 )
@@ -114,6 +119,9 @@ class MarkdownFormatter(HelpFormatter):
                 self._markdown_output.append(f"{action.help}\n\n")
 
             if (default := action.default) != SUPPRESS:
+                # Make empty string defaults visible
+                if default == "":
+                    default = '""'
                 self._markdown_output.append(f"Default: `{default}`\n\n")
 
     def format_help(self):
@@ -150,17 +158,23 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
 
     # Create parsers to document
     parsers = {
+        # Engine args
         "engine_args": create_parser(EngineArgs.add_cli_args),
         "async_engine_args": create_parser(
             AsyncEngineArgs.add_cli_args, async_args_only=True
         ),
-        "serve": create_parser(cli_args.make_arg_parser),
+        # CLI
+        "serve": create_parser(openai_cli_args.make_arg_parser),
         "chat": create_parser(ChatCommand.add_cli_args),
         "complete": create_parser(CompleteCommand.add_cli_args),
-        "bench_latency": create_parser(latency.add_cli_args),
-        "bench_throughput": create_parser(throughput.add_cli_args),
-        "bench_serve": create_parser(serve.add_cli_args),
-        "run-batch": create_parser(run_batch.make_arg_parser),
+        "run-batch": create_parser(openai_run_batch.make_arg_parser),
+        # Benchmark CLI
+        "bench_latency": create_parser(bench_latency.add_cli_args),
+        "bench_serve": create_parser(bench_serve.add_cli_args),
+        "bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
+        "bench_sweep_serve": create_parser(bench_sweep_serve.add_cli_args),
+        "bench_sweep_serve_sla": create_parser(bench_sweep_serve_sla.add_cli_args),
+        "bench_throughput": create_parser(bench_throughput.add_cli_args),
     }
 
     # Generate documentation for each parser
diff --git a/setup.py b/setup.py
index 990fe4cde3..83a4e3eea5 100644
--- a/setup.py
+++ b/setup.py
@@ -709,7 +709,7 @@ setup(
     ext_modules=ext_modules,
     install_requires=get_requirements(),
     extras_require={
-        "bench": ["pandas", "datasets"],
+        "bench": ["pandas", "matplotlib", "seaborn", "datasets"],
         "tensorizer": ["tensorizer==2.10.1"],
         "fastsafetensors": ["fastsafetensors >= 0.1.10"],
         "runai": ["runai-model-streamer[s3,gcs] >= 0.14.0"],
diff --git a/tools/profiler/visualize_layerwise_profile.py b/tools/profiler/visualize_layerwise_profile.py
index a049dc0425..ed4bf0beb7 100644
--- a/tools/profiler/visualize_layerwise_profile.py
+++ b/tools/profiler/visualize_layerwise_profile.py
@@ -141,7 +141,7 @@ def attempt_to_make_names_unique(entries_and_traces):
 """
 
 
-def group_trace_by_operations(trace_df: pd.DataFrame) -> pd.DataFrame:
+def group_trace_by_operations(trace_df: "pd.DataFrame") -> "pd.DataFrame":
     def is_rms_norm(op_name: str):
         if "rms_norm_kernel" in op_name:
             return True
@@ -370,12 +370,12 @@ def group_trace_by_operations(trace_df: pd.DataFrame) -> pd.DataFrame:
 
 
 def plot_trace_df(
-    traces_df: pd.DataFrame,
+    traces_df: "pd.DataFrame",
     plot_metric: str,
     plot_title: str,
     output: Path | None = None,
 ):
-    def get_phase_description(traces_df: pd.DataFrame, phase: str) -> str:
+    def get_phase_description(traces_df: "pd.DataFrame", phase: str) -> str:
         phase_df = traces_df.query(f'phase == "{phase}"')
         descs = phase_df["phase_desc"].to_list()
         assert all([desc == descs[0] for desc in descs])
@@ -438,7 +438,7 @@ def main(
     top_k: int,
     json_nodes_to_fold: list[str],
 ):
-    def prepare_data(profile_json: dict, step_keys: list[str]) -> pd.DataFrame:
+    def prepare_data(profile_json: dict, step_keys: list[str]) -> "pd.DataFrame":
         def get_entries_and_traces(key: str):
             entries_and_traces: list[tuple[Any, Any]] = []
             for root in profile_json[key]["summary_stats"]:
@@ -449,8 +449,8 @@ def main(
             return entries_and_traces
 
         def keep_only_top_entries(
-            df: pd.DataFrame, metric: str, top_k: int = 9
-        ) -> pd.DataFrame:
+            df: "pd.DataFrame", metric: str, top_k: int = 9
+        ) -> "pd.DataFrame":
             df.loc[df.nsmallest(len(df) - top_k + 1, metric).index, ["name"]] = "others"
             return df
 
diff --git a/vllm/benchmarks/sweep/cli.py b/vllm/benchmarks/sweep/cli.py
new file mode 100644
index 0000000000..108cd75690
--- /dev/null
+++ b/vllm/benchmarks/sweep/cli.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+
+from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG
+
+from .plot import SweepPlotArgs
+from .plot import main as plot_main
+from .serve import SweepServeArgs
+from .serve import main as serve_main
+from .serve_sla import SweepServeSLAArgs
+from .serve_sla import main as serve_sla_main
+
+SUBCOMMANDS = (
+    (SweepServeArgs, serve_main),
+    (SweepServeSLAArgs, serve_sla_main),
+    (SweepPlotArgs, plot_main),
+)
+
+
+def add_cli_args(parser: argparse.ArgumentParser):
+    subparsers = parser.add_subparsers(required=True, dest="sweep_type")
+
+    for cmd, entrypoint in SUBCOMMANDS:
+        cmd_subparser = subparsers.add_parser(
+            cmd.parser_name,
+            description=cmd.parser_help,
+            usage=f"vllm bench sweep {cmd.parser_name} [options]",
+        )
+        cmd_subparser.set_defaults(dispatch_function=entrypoint)
+        cmd.add_cli_args(cmd_subparser)
+        cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
+            subcmd=f"sweep {cmd.parser_name}"
+        )
+
+
+def main(args: argparse.Namespace):
+    args.dispatch_function(args)
diff --git a/vllm/benchmarks/sweep/plot.py b/vllm/benchmarks/sweep/plot.py
index 92485c09b4..9947d6170d 100644
--- a/vllm/benchmarks/sweep/plot.py
+++ b/vllm/benchmarks/sweep/plot.py
@@ -8,16 +8,24 @@ from dataclasses import dataclass
 from functools import partial
 from pathlib import Path
 from types import TracebackType
+from typing import ClassVar
 
-import matplotlib.pyplot as plt
-import pandas as pd
-import seaborn as sns
 from typing_extensions import Self, override
 
 from vllm.utils.collection_utils import full_groupby
+from vllm.utils.import_utils import PlaceholderModule
 
 from .utils import sanitize_filename
 
+try:
+    import matplotlib.pyplot as plt
+    import pandas as pd
+    import seaborn as sns
+except ImportError:
+    plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
+    pd = PlaceholderModule("pandas")
+    seaborn = PlaceholderModule("seaborn")
+
 
 @dataclass
 class PlotFilterBase(ABC):
@@ -40,7 +48,7 @@ class PlotFilterBase(ABC):
             )
 
     @abstractmethod
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         """Applies this filter to a DataFrame."""
         raise NotImplementedError
 
@@ -48,7 +56,7 @@ class PlotFilterBase(ABC):
 @dataclass
 class PlotEqualTo(PlotFilterBase):
     @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         try:
             target = float(self.target)
         except ValueError:
@@ -60,28 +68,28 @@ class PlotEqualTo(PlotFilterBase):
 @dataclass
 class PlotLessThan(PlotFilterBase):
     @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         return df[df[self.var] < float(self.target)]
 
 
 @dataclass
 class PlotLessThanOrEqualTo(PlotFilterBase):
     @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         return df[df[self.var] <= float(self.target)]
 
 
 @dataclass
 class PlotGreaterThan(PlotFilterBase):
     @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         return df[df[self.var] > float(self.target)]
 
 
 @dataclass
 class PlotGreaterThanOrEqualTo(PlotFilterBase):
     @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         return df[df[self.var] >= float(self.target)]
 
 
@@ -103,7 +111,7 @@ class PlotFilters(list[PlotFilterBase]):
 
         return cls(PlotFilterBase.parse_str(e) for e in s.split(","))
 
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         for item in self:
             df = item.apply(df)
 
@@ -127,7 +135,7 @@ class PlotBinner:
                 f"Valid operators are: {sorted(PLOT_BINNERS)}",
             )
 
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         """Applies this binner to a DataFrame."""
         df = df.copy()
         df[self.var] = df[self.var] // self.bin_size * self.bin_size
@@ -147,7 +155,7 @@ class PlotBinners(list[PlotBinner]):
 
         return cls(PlotBinner.parse_str(e) for e in s.split(","))
 
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
         for item in self:
             df = item.apply(df)
 
@@ -396,135 +404,177 @@ def plot(
         )
 
 
-def add_cli_args(parser: argparse.ArgumentParser):
-    parser.add_argument(
-        "OUTPUT_DIR",
-        type=str,
-        default="results",
-        help="The directory containing the results to plot, "
-        "i.e., the `--output-dir` argument to the parameter sweep script.",
-    )
-    parser.add_argument(
-        "--fig-dir",
-        type=str,
-        default="",
-        help="The directory to save the figures, relative to `OUTPUT_DIR`. "
-        "By default, the same directory is used.",
-    )
-    parser.add_argument(
-        "--fig-by",
-        type=str,
-        default="",
-        help="A comma-separated list of variables, such that a separate figure "
-        "is created for each combination of these variables.",
-    )
-    parser.add_argument(
-        "--row-by",
-        type=str,
-        default="",
-        help="A comma-separated list of variables, such that a separate row "
-        "is created for each combination of these variables.",
-    )
-    parser.add_argument(
-        "--col-by",
-        type=str,
-        default="",
-        help="A comma-separated list of variables, such that a separate column "
-        "is created for each combination of these variables.",
-    )
-    parser.add_argument(
-        "--curve-by",
-        type=str,
-        default=None,
-        help="A comma-separated list of variables, such that a separate curve "
-        "is created for each combination of these variables.",
-    )
-    parser.add_argument(
-        "--var-x",
-        type=str,
-        default="request_throughput",
-        help="The variable for the x-axis.",
-    )
-    parser.add_argument(
-        "--var-y",
-        type=str,
-        default="p99_e2el_ms",
-        help="The variable for the y-axis",
-    )
-    parser.add_argument(
-        "--filter-by",
-        type=str,
-        default="",
-        help="A comma-separated list of statements indicating values to filter by. "
-        "This is useful to remove outliers. "
-        "Example: `max_concurrency<1000,max_num_batched_tokens<=4096` means "
-        "plot only the points where `max_concurrency` is less than 1000 and "
-        "`max_num_batched_tokens` is no greater than 4096.",
-    )
-    parser.add_argument(
-        "--bin-by",
-        type=str,
-        default="",
-        help="A comma-separated list of statements indicating values to bin by. "
-        "This is useful to avoid plotting points that are too close together. "
-        "Example: `request_throughput%1` means "
-        "use a bin size of 1 for the `request_throughput` variable.",
-    )
-    parser.add_argument(
-        "--scale-x",
-        type=str,
-        default=None,
-        help="The scale to use for the x-axis. "
-        "Currently only accepts string values such as 'log' and 'sqrt'. "
-        "See also: https://seaborn.pydata.org/generated/seaborn.objects.Plot.scale.html",
-    )
-    parser.add_argument(
-        "--scale-y",
-        type=str,
-        default=None,
-        help="The scale to use for the y-axis. "
-        "Currently only accepts string values such as 'log' and 'sqrt'. "
-        "See also: https://seaborn.pydata.org/generated/seaborn.objects.Plot.scale.html",
-    )
-    parser.add_argument(
-        "--dry-run",
-        action="store_true",
-        help="If set, prints the information about each figure to plot, "
-        "then exits without drawing them.",
-    )
+@dataclass
+class SweepPlotArgs:
+    output_dir: Path
+    fig_dir: Path
+    fig_by: list[str]
+    row_by: list[str]
+    col_by: list[str]
+    curve_by: list[str]
+    var_x: str
+    var_y: str
+    filter_by: PlotFilters
+    bin_by: PlotBinners
+    scale_x: str | None
+    scale_y: str | None
+    dry_run: bool
+
+    parser_name: ClassVar[str] = "plot"
+    parser_help: ClassVar[str] = "Plot performance curves from parameter sweep results."
+
+    @classmethod
+    def from_cli_args(cls, args: argparse.Namespace):
+        output_dir = Path(args.OUTPUT_DIR)
+        if not output_dir.exists():
+            raise ValueError(f"No parameter sweep results under {output_dir}")
+
+        curve_by = [] if not args.curve_by else args.curve_by.split(",")
+        row_by = [] if not args.row_by else args.row_by.split(",")
+        col_by = [] if not args.col_by else args.col_by.split(",")
+        fig_by = [] if not args.fig_by else args.fig_by.split(",")
+
+        return cls(
+            output_dir=output_dir,
+            fig_dir=output_dir / args.fig_dir,
+            fig_by=fig_by,
+            row_by=row_by,
+            col_by=col_by,
+            curve_by=curve_by,
+            var_x=args.var_x,
+            var_y=args.var_y,
+            filter_by=PlotFilters.parse_str(args.filter_by),
+            bin_by=PlotBinners.parse_str(args.bin_by),
+            scale_x=args.scale_x,
+            scale_y=args.scale_y,
+            dry_run=args.dry_run,
+        )
+
+    @classmethod
+    def add_cli_args(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
+        parser.add_argument(
+            "OUTPUT_DIR",
+            type=str,
+            default="results",
+            help="The directory containing the results to plot, "
+            "i.e., the `--output-dir` argument to the parameter sweep script.",
+        )
+        parser.add_argument(
+            "--fig-dir",
+            type=str,
+            default="",
+            help="The directory to save the figures, relative to `OUTPUT_DIR`. "
+            "By default, the same directory is used.",
+        )
+        parser.add_argument(
+            "--fig-by",
+            type=str,
+            default="",
+            help="A comma-separated list of variables, such that a separate figure "
+            "is created for each combination of these variables.",
+        )
+        parser.add_argument(
+            "--row-by",
+            type=str,
+            default="",
+            help="A comma-separated list of variables, such that a separate row "
+            "is created for each combination of these variables.",
+        )
+        parser.add_argument(
+            "--col-by",
+            type=str,
+            default="",
+            help="A comma-separated list of variables, such that a separate column "
+            "is created for each combination of these variables.",
+        )
+        parser.add_argument(
+            "--curve-by",
+            type=str,
+            default=None,
+            help="A comma-separated list of variables, such that a separate curve "
+            "is created for each combination of these variables.",
+        )
+        parser.add_argument(
+            "--var-x",
+            type=str,
+            default="request_throughput",
+            help="The variable for the x-axis.",
+        )
+        parser.add_argument(
+            "--var-y",
+            type=str,
+            default="p99_e2el_ms",
+            help="The variable for the y-axis",
+        )
+        parser.add_argument(
+            "--filter-by",
+            type=str,
+            default="",
+            help="A comma-separated list of statements indicating values to filter by. "
+            "This is useful to remove outliers. "
+            "Example: `max_concurrency<1000,max_num_batched_tokens<=4096` means "
+            "plot only the points where `max_concurrency` is less than 1000 and "
+            "`max_num_batched_tokens` is no greater than 4096.",
+        )
+        parser.add_argument(
+            "--bin-by",
+            type=str,
+            default="",
+            help="A comma-separated list of statements indicating values to bin by. "
+            "This is useful to avoid plotting points that are too close together. "
+            "Example: `request_throughput%%1` means "
+            "use a bin size of 1 for the `request_throughput` variable.",
+        )
+        parser.add_argument(
+            "--scale-x",
+            type=str,
+            default=None,
+            help="The scale to use for the x-axis. "
+            "Currently only accepts string values such as 'log' and 'sqrt'. "
+            "See also: https://seaborn.pydata.org/generated/seaborn.objects.Plot.scale.html",
+        )
+        parser.add_argument(
+            "--scale-y",
+            type=str,
+            default=None,
+            help="The scale to use for the y-axis. "
+            "Currently only accepts string values such as 'log' and 'sqrt'. "
+            "See also: https://seaborn.pydata.org/generated/seaborn.objects.Plot.scale.html",
+        )
+        parser.add_argument(
+            "--dry-run",
+            action="store_true",
+            help="If set, prints the information about each figure to plot, "
+            "then exits without drawing them.",
+        )
+
+        return parser
 
 
-def main(args: argparse.Namespace):
-    output_dir = Path(args.OUTPUT_DIR)
-    if not output_dir.exists():
-        raise ValueError(f"No parameter sweep results under {output_dir}")
-
-    curve_by = [] if not args.curve_by else args.curve_by.split(",")
-    row_by = [] if not args.row_by else args.row_by.split(",")
-    col_by = [] if not args.col_by else args.col_by.split(",")
-    fig_by = [] if not args.fig_by else args.fig_by.split(",")
-
-    plot(
-        output_dir=output_dir,
-        fig_dir=output_dir / args.fig_dir,
-        fig_by=fig_by,
-        row_by=row_by,
-        col_by=col_by,
-        curve_by=curve_by,
+def run_main(args: SweepPlotArgs):
+    return plot(
+        output_dir=args.output_dir,
+        fig_dir=args.fig_dir,
+        fig_by=args.fig_by,
+        row_by=args.row_by,
+        col_by=args.col_by,
+        curve_by=args.curve_by,
         var_x=args.var_x,
         var_y=args.var_y,
-        filter_by=PlotFilters.parse_str(args.filter_by),
-        bin_by=PlotBinners.parse_str(args.bin_by),
+        filter_by=args.filter_by,
+        bin_by=args.bin_by,
         scale_x=args.scale_x,
         scale_y=args.scale_y,
         dry_run=args.dry_run,
     )
 
 
+def main(args: argparse.Namespace):
+    run_main(SweepPlotArgs.from_cli_args(args))
+
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Plot performance curves from parameter sweep results."
-    )
-    add_cli_args(parser)
+    parser = argparse.ArgumentParser(description=SweepPlotArgs.parser_help)
+    SweepPlotArgs.add_cli_args(parser)
 
     main(parser.parse_args())
diff --git a/vllm/benchmarks/sweep/serve.py b/vllm/benchmarks/sweep/serve.py
index a06d4d6d60..45ac446a7a 100644
--- a/vllm/benchmarks/sweep/serve.py
+++ b/vllm/benchmarks/sweep/serve.py
@@ -7,13 +7,19 @@ import shlex
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
+from typing import ClassVar
 
-import pandas as pd
+from vllm.utils.import_utils import PlaceholderModule
 
 from .param_sweep import ParameterSweep, ParameterSweepItem
 from .server import ServerProcess
 from .utils import sanitize_filename
 
+try:
+    import pandas as pd
+except ImportError:
+    pd = PlaceholderModule("pandas")
+
 
 @contextlib.contextmanager
 def run_server(
@@ -257,6 +263,9 @@ class SweepServeArgs:
     dry_run: bool
     resume: str | None
 
+    parser_name: ClassVar[str] = "serve"
+    parser_help: ClassVar[str] = "Run vLLM server benchmark under multiple settings."
+
     @classmethod
     def from_cli_args(cls, args: argparse.Namespace):
         serve_cmd = shlex.split(args.serve_cmd)
@@ -401,9 +410,7 @@ def main(args: argparse.Namespace):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Run vLLM server benchmark under multiple settings."
-    )
+    parser = argparse.ArgumentParser(description=SweepServeArgs.parser_help)
     SweepServeArgs.add_cli_args(parser)
 
     main(parser.parse_args())
diff --git a/vllm/benchmarks/sweep/serve_sla.py b/vllm/benchmarks/sweep/serve_sla.py
index 6159aba4bb..0403d1ddfd 100644
--- a/vllm/benchmarks/sweep/serve_sla.py
+++ b/vllm/benchmarks/sweep/serve_sla.py
@@ -7,17 +7,23 @@ import math
 from dataclasses import asdict, dataclass
 from datetime import datetime
 from pathlib import Path
-from typing import Literal, get_args
+from typing import ClassVar, Literal, get_args
 
-import pandas as pd
 from typing_extensions import assert_never
 
+from vllm.utils.import_utils import PlaceholderModule
+
 from .param_sweep import ParameterSweep, ParameterSweepItem
 from .serve import SweepServeArgs, run_benchmark, run_server
 from .server import ServerProcess
 from .sla_sweep import SLASweep, SLASweepItem
 from .utils import sanitize_filename
 
+try:
+    import pandas as pd
+except ImportError:
+    pd = PlaceholderModule("pandas")
+
 
 def _get_sla_base_path(
     output_dir: Path,
@@ -399,6 +405,9 @@ class SweepServeSLAArgs(SweepServeArgs):
     sla_params: SLASweep
     sla_variable: SLAVariable
 
+    parser_name: ClassVar[str] = "serve_sla"
+    parser_help: ClassVar[str] = "Tune a variable to meet SLAs under multiple settings."
+
     @classmethod
     def from_cli_args(cls, args: argparse.Namespace):
         # NOTE: Don't use super() as `from_cli_args` calls `cls()`
@@ -419,7 +428,8 @@ class SweepServeSLAArgs(SweepServeArgs):
     def add_cli_args(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
         parser = super().add_cli_args(parser)
 
-        parser.add_argument(
+        sla_group = parser.add_argument_group("sla options")
+        sla_group.add_argument(
             "--sla-params",
             type=str,
             required=True,
@@ -431,7 +441,7 @@ class SweepServeSLAArgs(SweepServeArgs):
             "the maximum `sla_variable` that satisfies the constraints for "
             "each combination of `serve_params`, `bench_params`, and `sla_params`.",
         )
-        parser.add_argument(
+        sla_group.add_argument(
             "--sla-variable",
             type=str,
             choices=get_args(SLAVariable),
@@ -476,9 +486,7 @@ def main(args: argparse.Namespace):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Tune a variable to meet SLAs under multiple settings."
-    )
+    parser = argparse.ArgumentParser(description=SweepServeSLAArgs.parser_help)
     SweepServeSLAArgs.add_cli_args(parser)
 
     main(parser.parse_args())
diff --git a/vllm/entrypoints/cli/__init__.py b/vllm/entrypoints/cli/__init__.py
index 211e157fc7..9dff68236f 100644
--- a/vllm/entrypoints/cli/__init__.py
+++ b/vllm/entrypoints/cli/__init__.py
@@ -2,10 +2,12 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from vllm.entrypoints.cli.benchmark.latency import BenchmarkLatencySubcommand
 from vllm.entrypoints.cli.benchmark.serve import BenchmarkServingSubcommand
+from vllm.entrypoints.cli.benchmark.sweep import BenchmarkSweepSubcommand
 from vllm.entrypoints.cli.benchmark.throughput import BenchmarkThroughputSubcommand
 
 __all__: list[str] = [
     "BenchmarkLatencySubcommand",
     "BenchmarkServingSubcommand",
+    "BenchmarkSweepSubcommand",
     "BenchmarkThroughputSubcommand",
 ]
diff --git a/vllm/entrypoints/cli/benchmark/base.py b/vllm/entrypoints/cli/benchmark/base.py
index 3263459fd6..d8543822cf 100644
--- a/vllm/entrypoints/cli/benchmark/base.py
+++ b/vllm/entrypoints/cli/benchmark/base.py
@@ -6,7 +6,7 @@ from vllm.entrypoints.cli.types import CLISubcommand
 
 
 class BenchmarkSubcommandBase(CLISubcommand):
-    """The base class of subcommands for vllm bench."""
+    """The base class of subcommands for `vllm bench`."""
 
     help: str
 
diff --git a/vllm/entrypoints/cli/benchmark/latency.py b/vllm/entrypoints/cli/benchmark/latency.py
index 548ddf4d60..60f2b03341 100644
--- a/vllm/entrypoints/cli/benchmark/latency.py
+++ b/vllm/entrypoints/cli/benchmark/latency.py
@@ -7,7 +7,7 @@ from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
 
 
 class BenchmarkLatencySubcommand(BenchmarkSubcommandBase):
-    """The `latency` subcommand for vllm bench."""
+    """The `latency` subcommand for `vllm bench`."""
 
     name = "latency"
     help = "Benchmark the latency of a single batch of requests."
diff --git a/vllm/entrypoints/cli/benchmark/serve.py b/vllm/entrypoints/cli/benchmark/serve.py
index b085f52afb..6616305c74 100644
--- a/vllm/entrypoints/cli/benchmark/serve.py
+++ b/vllm/entrypoints/cli/benchmark/serve.py
@@ -7,7 +7,7 @@ from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
 
 
 class BenchmarkServingSubcommand(BenchmarkSubcommandBase):
-    """The `serve` subcommand for vllm bench."""
+    """The `serve` subcommand for `vllm bench`."""
 
     name = "serve"
     help = "Benchmark the online serving throughput."
diff --git a/vllm/entrypoints/cli/benchmark/sweep.py b/vllm/entrypoints/cli/benchmark/sweep.py
new file mode 100644
index 0000000000..c385207690
--- /dev/null
+++ b/vllm/entrypoints/cli/benchmark/sweep.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+
+from vllm.benchmarks.sweep.cli import add_cli_args, main
+from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
+
+
+class BenchmarkSweepSubcommand(BenchmarkSubcommandBase):
+    """The `sweep` subcommand for `vllm bench`."""
+
+    name = "sweep"
+    help = "Benchmark for a parameter sweep."
+
+    @classmethod
+    def add_cli_args(cls, parser: argparse.ArgumentParser) -> None:
+        add_cli_args(parser)
+
+    @staticmethod
+    def cmd(args: argparse.Namespace) -> None:
+        main(args)
diff --git a/vllm/entrypoints/cli/benchmark/throughput.py b/vllm/entrypoints/cli/benchmark/throughput.py
index c25be75ec1..2097f9ea07 100644
--- a/vllm/entrypoints/cli/benchmark/throughput.py
+++ b/vllm/entrypoints/cli/benchmark/throughput.py
@@ -7,7 +7,7 @@ from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
 
 
 class BenchmarkThroughputSubcommand(BenchmarkSubcommandBase):
-    """The `throughput` subcommand for vllm bench."""
+    """The `throughput` subcommand for `vllm bench`."""
 
     name = "throughput"
     help = "Benchmark offline inference throughput."
diff --git a/vllm/profiler/layerwise_profile.py b/vllm/profiler/layerwise_profile.py
index 1c0fce702b..829b63d8a7 100644
--- a/vllm/profiler/layerwise_profile.py
+++ b/vllm/profiler/layerwise_profile.py
@@ -7,7 +7,6 @@ from collections.abc import Callable
 from dataclasses import asdict, dataclass, field
 from typing import Any, Optional, TypeAlias
 
-import pandas as pd
 from torch._C._autograd import DeviceType, _KinetoEvent, _ProfilerResult
 from torch._C._profiler import _EventType, _ExperimentalConfig, _ProfilerEvent
 from torch.autograd.profiler import FunctionEvent
@@ -21,6 +20,12 @@ from vllm.profiler.utils import (
     event_torch_op_stack_trace,
     indent_string,
 )
+from vllm.utils.import_utils import PlaceholderModule
+
+try:
+    import pandas as pd
+except ImportError:
+    pd = PlaceholderModule("pandas")
 
 
 @dataclass