Convert examples to ruff-format (#18400)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-05-26 17:57:54 +01:00
committed by GitHub
parent e7523c2e03
commit 27bebcd897
83 changed files with 2529 additions and 2405 deletions

View File

@ -5,12 +5,12 @@ import os
from vllm import LLM, SamplingParams
# creates XLA hlo graphs for all the context length buckets.
os.environ['NEURON_CONTEXT_LENGTH_BUCKETS'] = "128,512,1024,2048"
os.environ["NEURON_CONTEXT_LENGTH_BUCKETS"] = "128,512,1024,2048"
# creates XLA hlo graphs for all the token gen buckets.
os.environ['NEURON_TOKEN_GEN_BUCKETS'] = "128,512,1024,2048"
os.environ["NEURON_TOKEN_GEN_BUCKETS"] = "128,512,1024,2048"
# Quantizes neuron model weight to int8 ,
# The default config for quantization is int8 dtype.
os.environ['NEURON_QUANT_DTYPE'] = "s8"
os.environ["NEURON_QUANT_DTYPE"] = "s8"
# Sample prompts.
prompts = [
@ -44,7 +44,8 @@ def main():
override_neuron_config={
"cast_logits_dtype": "bfloat16",
},
tensor_parallel_size=2)
tensor_parallel_size=2,
)
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)