ragflow/test/benchmark/report.py

from typing import Dict, List, Optional


def _fmt_seconds(value: Optional[float]) -> str:
    if value is None:
        return "n/a"
    return f"{value:.4f}s"


def _fmt_ms(value: Optional[float]) -> str:
    if value is None:
        return "n/a"
    return f"{value * 1000.0:.2f}ms"


def _fmt_qps(qps: Optional[float]) -> str:
    if qps is None or qps <= 0:
        return "n/a"
    return f"{qps:.2f}"


def _calc_qps(total_duration_s: Optional[float], total_requests: int) -> Optional[float]:
    if total_duration_s is None or total_duration_s <= 0:
        return None
    return total_requests / total_duration_s


def render_report(lines: List[str]) -> str:
    return "\n".join(lines).strip() + "\n"


def chat_report(
    *,
    interface: str,
    concurrency: int,
    total_duration_s: Optional[float],
    iterations: int,
    success: int,
    failure: int,
    model: str,
    total_stats: Dict[str, Optional[float]],
    first_token_stats: Dict[str, Optional[float]],
    errors: List[str],
    created: Dict[str, str],
) -> str:
    lines = [
        f"Interface: {interface}",
        f"Concurrency: {concurrency}",
        f"Iterations: {iterations}",
        f"Success: {success}",
        f"Failure: {failure}",
        f"Model: {model}",
    ]
    for key, value in created.items():
        lines.append(f"{key}: {value}")
    lines.extend(
        [
            "Latency (total): "
            f"avg={_fmt_ms(total_stats['avg'])}, min={_fmt_ms(total_stats['min'])}, "
            f"p50={_fmt_ms(total_stats['p50'])}, p90={_fmt_ms(total_stats['p90'])}, p95={_fmt_ms(total_stats['p95'])}",
            "Latency (first token): "
            f"avg={_fmt_ms(first_token_stats['avg'])}, min={_fmt_ms(first_token_stats['min'])}, "
            f"p50={_fmt_ms(first_token_stats['p50'])}, p90={_fmt_ms(first_token_stats['p90'])}, p95={_fmt_ms(first_token_stats['p95'])}",
            f"Total Duration: {_fmt_seconds(total_duration_s)}",
            f"QPS (requests / total duration): {_fmt_qps(_calc_qps(total_duration_s, iterations))}",
        ]
    )
    if errors:
        lines.append("Errors: " + "; ".join(errors[:5]))
    return render_report(lines)


def retrieval_report(
    *,
    interface: str,
    concurrency: int,
    total_duration_s: Optional[float],
    iterations: int,
    success: int,
    failure: int,
    stats: Dict[str, Optional[float]],
    errors: List[str],
    created: Dict[str, str],
) -> str:
    lines = [
        f"Interface: {interface}",
        f"Concurrency: {concurrency}",
        f"Iterations: {iterations}",
        f"Success: {success}",
        f"Failure: {failure}",
    ]
    for key, value in created.items():
        lines.append(f"{key}: {value}")
    lines.extend(
        [
            "Latency: "
            f"avg={_fmt_ms(stats['avg'])}, min={_fmt_ms(stats['min'])}, "
            f"p50={_fmt_ms(stats['p50'])}, p90={_fmt_ms(stats['p90'])}, p95={_fmt_ms(stats['p95'])}",
            f"Total Duration: {_fmt_seconds(total_duration_s)}",
            f"QPS (requests / total duration): {_fmt_qps(_calc_qps(total_duration_s, iterations))}",
        ]
    )
    if errors:
        lines.append("Errors: " + "; ".join(errors[:5]))
    return render_report(lines)