OpenAI Compatible Frontend (#116)

This commit is contained in:
Zhuohan Li
2023-05-23 21:39:50 -07:00
committed by GitHub
parent e86717833d
commit 057daef778
20 changed files with 644 additions and 169 deletions

View File

@ -148,7 +148,7 @@ class BlockSpaceManager:
# the sequences in the same group.
blocks: Set[PhysicalTokenBlock] = set()
for seq in seq_group.get_seqs():
if seq.status == SequenceStatus.FINISHED:
if SequenceStatus.is_finished(seq.status):
continue
block_table = self.block_tables[seq.seq_id]
for block in block_table:
@ -169,7 +169,7 @@ class BlockSpaceManager:
# CPU block -> GPU block.
mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
for seq in seq_group.get_seqs():
if seq.status == SequenceStatus.FINISHED:
if SequenceStatus.is_finished(seq.status):
continue
new_block_table: BlockTable = []
block_table = self.block_tables[seq.seq_id]
@ -200,7 +200,7 @@ class BlockSpaceManager:
# GPU block -> CPU block.
mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
for seq in seq_group.get_seqs():
if seq.status == SequenceStatus.FINISHED:
if SequenceStatus.is_finished(seq.status):
continue
new_block_table: BlockTable = []
block_table = self.block_tables[seq.seq_id]

View File

@ -292,10 +292,12 @@ class Scheduler:
# Append a new token to the sequence.
output = seq_outputs[seq.seq_id]
seq.append_token_id(output.output_token, output.logprobs)
# Return a shallow copy of the running queue to prevent the queue
# from being modified by the caller.
return self.running.copy()
def free_seq(self, seq: Sequence) -> None:
seq.status = SequenceStatus.FINISHED
def free_seq(self, seq: Sequence, finish_status: SequenceStatus) -> None:
seq.status = finish_status
self.block_manager.free(seq)
def free_finished_seq_groups(self) -> None: