misc fixes. lm_eval still gets a wrong answer but it no longer hangs

Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
Sage Moore
2025-06-04 22:46:18 +00:00
parent 2e3484c237
commit f8848bb201
6 changed files with 84 additions and 82 deletions

View File

@ -84,6 +84,10 @@ def main(args, dp_size, local_dp_rank, global_dp_rank, dp_master_ip,
"The capital of France is",
"The future of AI is",
] * 100
# import random
# import string
# prompts = [''.join(random.choices(string.ascii_letters, k=128)) for _ in range(2048)]
# with DP, each rank should process different prompts.
# usually all the DP ranks process a full dataset,
@ -177,7 +181,7 @@ if __name__ == "__main__":
procs.append(proc)
exit_code = 0
for proc in procs:
proc.join(timeout=300)
proc.join(timeout=1200)
if proc.exitcode is None:
print(f"Killing process {proc.pid} that didn't stop within 5 minutes.")
proc.kill()