Reduce GPU memory utilization to make sure OOM doesn't happen (#153)
This commit is contained in:
@ -21,7 +21,7 @@ class EngineArgs:
|
||||
tensor_parallel_size: int = 1
|
||||
block_size: int = 16
|
||||
swap_space: int = 4 # GiB
|
||||
gpu_memory_utilization: float = 0.95
|
||||
gpu_memory_utilization: float = 0.90
|
||||
max_num_batched_tokens: int = 2560
|
||||
max_num_seqs: int = 256
|
||||
disable_log_stats: bool = False
|
||||
|
||||
Reference in New Issue
Block a user