Run v1 benchmark and integrate with PyTorch OSS benchmark database (#13068)
Signed-off-by: Huy Do <huydhn@gmail.com>
This commit is contained in:
@ -56,6 +56,8 @@ try:
|
||||
except ImportError:
|
||||
from argparse import ArgumentParser as FlexibleArgumentParser
|
||||
|
||||
from benchmark_utils import convert_to_pytorch_benchmark_format
|
||||
|
||||
MILLISECONDS_TO_SECONDS_CONVERSION = 1000
|
||||
|
||||
|
||||
@ -402,21 +404,21 @@ async def get_request(
|
||||
burstiness: float = 1.0,
|
||||
) -> AsyncGenerator[Tuple[str, int, int], None]:
|
||||
"""
|
||||
Asynchronously generates requests at a specified rate
|
||||
Asynchronously generates requests at a specified rate
|
||||
with OPTIONAL burstiness.
|
||||
|
||||
|
||||
Args:
|
||||
input_requests:
|
||||
input_requests:
|
||||
A list of input requests, each represented as a tuple.
|
||||
request_rate:
|
||||
request_rate:
|
||||
The rate at which requests are generated (requests/s).
|
||||
burstiness (optional):
|
||||
The burstiness factor of the request generation.
|
||||
burstiness (optional):
|
||||
The burstiness factor of the request generation.
|
||||
Only takes effect when request_rate is not inf.
|
||||
Default value is 1, which follows a Poisson process.
|
||||
Otherwise, the request intervals follow a gamma distribution.
|
||||
A lower burstiness value (0 < burstiness < 1) results
|
||||
in more bursty requests, while a higher burstiness value
|
||||
A lower burstiness value (0 < burstiness < 1) results
|
||||
in more bursty requests, while a higher burstiness value
|
||||
(burstiness > 1) results in a more uniform arrival of requests.
|
||||
"""
|
||||
input_requests = iter(input_requests)
|
||||
@ -817,6 +819,32 @@ def parse_goodput(slo_pairs):
|
||||
return goodput_config_dict
|
||||
|
||||
|
||||
def save_to_pytorch_benchmark_format(args: argparse.Namespace,
|
||||
results: Dict[str, Any],
|
||||
file_name: str) -> None:
|
||||
metrics = [
|
||||
"median_ttft_ms", "mean_ttft_ms", "std_ttft_ms", "p99_ttft_ms",
|
||||
"mean_tpot_ms", "median_tpot_ms", "std_tpot_ms", "p99_tpot_ms",
|
||||
"median_itl_ms", "mean_itl_ms", "std_itl_ms", "p99_itl_ms"
|
||||
]
|
||||
# These raw data might be useful, but they are rather big. They can be added
|
||||
# later if needed
|
||||
ignored_metrics = ["ttfts", "itls", "generated_texts", "errors"]
|
||||
pt_records = convert_to_pytorch_benchmark_format(
|
||||
args=args,
|
||||
metrics={k: [results[k]]
|
||||
for k in metrics},
|
||||
extra_info={
|
||||
k: results[k]
|
||||
for k in results if k not in metrics and k not in ignored_metrics
|
||||
})
|
||||
if pt_records:
|
||||
# Don't use json suffix here as we don't want CI to pick it up
|
||||
pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json"
|
||||
with open(pt_file, "w") as f:
|
||||
json.dump(pt_records, f)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
print(args)
|
||||
random.seed(args.seed)
|
||||
@ -997,6 +1025,7 @@ def main(args: argparse.Namespace):
|
||||
file_name = os.path.join(args.result_dir, file_name)
|
||||
with open(file_name, "w", encoding='utf-8') as outfile:
|
||||
json.dump(result_json, outfile)
|
||||
save_to_pytorch_benchmark_format(args, result_json, file_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -1014,7 +1043,8 @@ if __name__ == "__main__":
|
||||
default=None,
|
||||
help="Server or API base url if not using http host and port.",
|
||||
)
|
||||
parser.add_argument("--host", type=str, default="localhost")
|
||||
# Use 127.0.0.1 here instead of localhost to force the use of ipv4
|
||||
parser.add_argument("--host", type=str, default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, default=8000)
|
||||
parser.add_argument(
|
||||
"--endpoint",
|
||||
|
||||
Reference in New Issue
Block a user