[Frontend] correctly record prefill and decode time metrics (#10853)

Signed-off-by: Tomer Asida <tomera@ai21.com>
This commit is contained in:
tomeras91
2024-12-03 21:13:31 +02:00
committed by GitHub
parent 7090c27bb2
commit 7c32b6861e

View File

@ -599,9 +599,9 @@ class PrometheusStatLogger(StatLoggerBase):
stats.time_queue_requests)
self._log_histogram(self.metrics.histogram_inference_time_request,
stats.time_inference_requests)
self._log_histogram(self.metrics.histogram_decode_time_request,
stats.time_prefill_requests)
self._log_histogram(self.metrics.histogram_prefill_time_request,
stats.time_prefill_requests)
self._log_histogram(self.metrics.histogram_decode_time_request,
stats.time_decode_requests)
self._log_histogram(self.metrics.histogram_time_in_queue_request,
stats.time_in_queue_requests)