Run v1 benchmark and integrate with PyTorch OSS benchmark database (#13068)

Signed-off-by: Huy Do <huydhn@gmail.com>
2025-02-17 00:16:32 -08:00
parent f857311d13
commit 45186834a0
7 changed files with 167 additions and 45 deletions
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@ -3,13 +3,15 @@
 import argparse
 import dataclasses
 import json
+import os
 import random
 import time
 from functools import cache
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple

 import torch
 import uvloop
+from benchmark_utils import convert_to_pytorch_benchmark_format
 from PIL import Image
 from tqdm import tqdm
 from transformers import (AutoModelForCausalLM, AutoTokenizer,
@ -338,6 +340,25 @@ def run_mii(
    return end - start


+def save_to_pytorch_benchmark_format(args: argparse.Namespace,
+                                     results: Dict[str, Any]) -> None:
+    pt_records = convert_to_pytorch_benchmark_format(
+        args=args,
+        metrics={
+            "requests_per_second": [results["requests_per_second"]],
+            "tokens_per_second": [results["tokens_per_second"]],
+        },
+        extra_info={
+            k: results[k]
+            for k in ["elapsed_time", "num_requests", "total_num_tokens"]
+        })
+    if pt_records:
+        # Don't use json suffix here as we don't want CI to pick it up
+        pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
+        with open(pt_file, "w") as f:
+            json.dump(pt_records, f)
+
+
 def main(args: argparse.Namespace):
    print(args)
    random.seed(args.seed)
@ -435,6 +456,7 @@ def main(args: argparse.Namespace):
        }
        with open(args.output_json, "w") as f:
            json.dump(results, f, indent=4)
+        save_to_pytorch_benchmark_format(args, results)


 if __name__ == "__main__":