[V1][Spec Decode] Share input embedding of target model with EAGLE draft model to free ~1GB for llama 3 model (#17326)
Co-authored-by: root <root@ekagra-8xh100.us-east5-a.c.serving-efficiency-poc.internal> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@ -105,6 +105,13 @@ def main():
|
||||
outputs = llm.generate(prompt_token_ids=prompt_ids,
|
||||
sampling_params=sampling_params)
|
||||
|
||||
# print the generated text
|
||||
for output in outputs:
|
||||
print("-" * 50)
|
||||
print(f"prompt: {output.prompt}")
|
||||
print(f"generated text: {output.outputs[0].text}")
|
||||
print("-" * 50)
|
||||
|
||||
if not hasattr(outputs, "metrics") or outputs.metrics is None:
|
||||
return
|
||||
|
||||
|
||||
Reference in New Issue
Block a user