[Misc] refactor examples (#16563)

Signed-off-by: reidliu41 <reid201711@gmail.com>
Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
Reid
2025-04-14 17:59:15 +08:00
committed by GitHub
parent ce4ddd2d1a
commit 7cbfc10943
5 changed files with 102 additions and 63 deletions

View File

@ -95,7 +95,7 @@ def run_decode(prefill_done):
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
if __name__ == "__main__":
def main():
prefill_done = Event()
prefill_process = Process(target=run_prefill, args=(prefill_done, ))
decode_process = Process(target=run_decode, args=(prefill_done, ))
@ -109,3 +109,7 @@ if __name__ == "__main__":
# Terminate the prefill node when decode is finished
decode_process.join()
prefill_process.terminate()
if __name__ == "__main__":
main()

View File

@ -38,6 +38,10 @@ os.environ["LMCACHE_REMOTE_URL"] = f"lm://localhost:{port}"
# `naive` indicates using raw bytes of the tensor without any compression
os.environ["LMCACHE_REMOTE_SERDE"] = "naive"
prompts = [
"Hello, how are you?" * 1000,
]
def run_prefill(prefill_done, prompts):
# We use GPU 0 for prefill node.
@ -106,12 +110,7 @@ def run_lmcache_server(port):
return server_proc
if __name__ == "__main__":
prompts = [
"Hello, how are you?" * 1000,
]
def main():
prefill_done = Event()
prefill_process = Process(target=run_prefill, args=(prefill_done, prompts))
decode_process = Process(target=run_decode, args=(prefill_done, prompts))
@ -128,3 +127,7 @@ if __name__ == "__main__":
prefill_process.terminate()
lmcache_server_process.terminate()
lmcache_server_process.wait()
if __name__ == "__main__":
main()