[Frontend] correctly record prefill and decode time metrics (#10853)
Signed-off-by: Tomer Asida <tomera@ai21.com>
This commit is contained in:
@ -599,9 +599,9 @@ class PrometheusStatLogger(StatLoggerBase):
|
||||
stats.time_queue_requests)
|
||||
self._log_histogram(self.metrics.histogram_inference_time_request,
|
||||
stats.time_inference_requests)
|
||||
self._log_histogram(self.metrics.histogram_decode_time_request,
|
||||
stats.time_prefill_requests)
|
||||
self._log_histogram(self.metrics.histogram_prefill_time_request,
|
||||
stats.time_prefill_requests)
|
||||
self._log_histogram(self.metrics.histogram_decode_time_request,
|
||||
stats.time_decode_requests)
|
||||
self._log_histogram(self.metrics.histogram_time_in_queue_request,
|
||||
stats.time_in_queue_requests)
|
||||
|
||||
Reference in New Issue
Block a user