Implement stop strings and best_of (#114)

This commit is contained in:
Woosuk Kwon
2023-05-21 11:18:00 -07:00
committed by GitHub
parent c3442c1f6f
commit f746ced08d
9 changed files with 162 additions and 116 deletions

View File

@ -80,7 +80,7 @@ class BlockSpaceManager:
def can_allocate(self, seq_group: SequenceGroup) -> bool:
# FIXME(woosuk): Here we assume that all sequences in the group share
# the same prompt. This may not be true for preempted sequences.
seq = seq_group.seqs[0]
seq = seq_group.get_seqs()[0]
num_required_blocks = len(seq.logical_token_blocks)
num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()
# Use watermark to avoid frequent cache eviction.
@ -88,7 +88,7 @@ class BlockSpaceManager:
def allocate(self, seq_group: SequenceGroup) -> None:
# NOTE: Here we assume that all sequences in the group have the same prompt.
seq = seq_group.seqs[0]
seq = seq_group.get_seqs()[0]
# Allocate new physical token blocks that will store the prompt tokens.
block_table: BlockTable = []
@ -99,7 +99,7 @@ class BlockSpaceManager:
block_table.append(block)
# Assign the block table for each sequence.
for seq in seq_group.seqs:
for seq in seq_group.get_seqs():
self.block_tables[seq.seq_id] = block_table.copy()
def can_append_slot(self, seq_group: SequenceGroup) -> bool:
@ -147,7 +147,7 @@ class BlockSpaceManager:
# NOTE: Here, we assume that the physical blocks are only shared by
# the sequences in the same group.
blocks: Set[PhysicalTokenBlock] = set()
for seq in seq_group.seqs:
for seq in seq_group.get_seqs():
if seq.status == SequenceStatus.FINISHED:
continue
block_table = self.block_tables[seq.seq_id]
@ -168,7 +168,7 @@ class BlockSpaceManager:
def swap_in(self, seq_group: SequenceGroup) -> Dict[int, int]:
# CPU block -> GPU block.
mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
for seq in seq_group.seqs:
for seq in seq_group.get_seqs():
if seq.status == SequenceStatus.FINISHED:
continue
new_block_table: BlockTable = []
@ -199,7 +199,7 @@ class BlockSpaceManager:
def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
# GPU block -> CPU block.
mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
for seq in seq_group.seqs:
for seq in seq_group.get_seqs():
if seq.status == SequenceStatus.FINISHED:
continue
new_block_table: BlockTable = []