[Bugfix][fast] Fix the get_num_blocks_touched logic (#6849)

This commit is contained in:
Zach Zheng
2024-08-08 10:43:30 -07:00
committed by GitHub
parent 21b9c49aa3
commit 782e53ab59
6 changed files with 172 additions and 10 deletions

View File

@ -15,13 +15,15 @@ def create_dummy_prompt(
lora_request: Optional[LoRARequest] = None,
use_beam_search: bool = False,
best_of: int = 1,
prompt_tokens: Optional[List[int]] = None,
) -> Tuple[Sequence, SequenceGroup]:
if not block_size:
block_size = prompt_length
# Create dummy prompt sequence with tokens 0...block_size-1
# and prompt "0 ... block_size".
prompt_tokens = list(range(prompt_length))
if prompt_tokens is None:
# Create dummy prompt sequence with tokens 0...block_size-1
# and prompt "0 ... block_size".
prompt_tokens = list(range(prompt_length))
prompt_str = " ".join([str(t) for t in prompt_tokens])
prompt = Sequence(int(request_id),
inputs={