[Misc] refactor examples (#16563)

Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
2025-04-14 17:59:15 +08:00
parent ce4ddd2d1a
commit 7cbfc10943
5 changed files with 102 additions and 63 deletions
--- a/examples/offline_inference/disaggregated_prefill.py
+++ b/examples/offline_inference/disaggregated_prefill.py
@ -95,7 +95,7 @@ def run_decode(prefill_done):
        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")


-if __name__ == "__main__":
+def main():
    prefill_done = Event()
    prefill_process = Process(target=run_prefill, args=(prefill_done, ))
    decode_process = Process(target=run_decode, args=(prefill_done, ))
@ -109,3 +109,7 @@ if __name__ == "__main__":
    # Terminate the prefill node when decode is finished
    decode_process.join()
    prefill_process.terminate()
+
+
+if __name__ == "__main__":
+    main()
--- a/examples/offline_inference/disaggregated_prefill_lmcache.py
+++ b/examples/offline_inference/disaggregated_prefill_lmcache.py
@ -38,6 +38,10 @@ os.environ["LMCACHE_REMOTE_URL"] = f"lm://localhost:{port}"
 # `naive` indicates using raw bytes of the tensor without any compression
 os.environ["LMCACHE_REMOTE_SERDE"] = "naive"

+prompts = [
+    "Hello, how are you?" * 1000,
+]
+

 def run_prefill(prefill_done, prompts):
    # We use GPU 0 for prefill node.
@ -106,12 +110,7 @@ def run_lmcache_server(port):
    return server_proc


-if __name__ == "__main__":
-
-    prompts = [
-        "Hello, how are you?" * 1000,
-    ]
-
+def main():
    prefill_done = Event()
    prefill_process = Process(target=run_prefill, args=(prefill_done, prompts))
    decode_process = Process(target=run_decode, args=(prefill_done, prompts))
@ -128,3 +127,7 @@ if __name__ == "__main__":
    prefill_process.terminate()
    lmcache_server_process.terminate()
    lmcache_server_process.wait()
+
+
+if __name__ == "__main__":
+    main()