Compare commits
2 Commits
v0.11.0rc2
...
v0.11.0rc3
| Author | SHA1 | Date | |
|---|---|---|---|
| 8ce5d3198d | |||
| 09c2cbc04a |
@ -474,8 +474,11 @@ class NixlConnectorWorker:
|
||||
"backends", ["UCX"])
|
||||
# Agent.
|
||||
non_ucx_backends = [b for b in self.nixl_backends if b != "UCX"]
|
||||
config = nixl_agent_config(backends=self.nixl_backends) if len(
|
||||
non_ucx_backends) > 0 and nixl_agent_config is not None else None
|
||||
if nixl_agent_config is None:
|
||||
config = None
|
||||
else:
|
||||
config = nixl_agent_config(backends=self.nixl_backends) if len(
|
||||
non_ucx_backends) > 0 else nixl_agent_config(num_threads=8)
|
||||
|
||||
self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), config)
|
||||
# Map of engine_id -> {rank0: agent_name0, rank1: agent_name1..}.
|
||||
|
||||
@ -212,6 +212,8 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
|
||||
# attempted to load as other weights later
|
||||
is_expert_weight = True
|
||||
name_mapped = name.replace(weight_name, param_name)
|
||||
if is_pp_missing_parameter(name_mapped, self):
|
||||
continue
|
||||
if is_fused_expert:
|
||||
loaded_weight = loaded_weight.transpose(-1,
|
||||
-2) # no bias
|
||||
@ -230,8 +232,6 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
|
||||
name_mapped, params_dict, loaded_weight,
|
||||
shard_id, num_experts)
|
||||
else:
|
||||
if is_pp_missing_parameter(name_mapped, self):
|
||||
continue
|
||||
# Skip loading extra parameters for GPTQ/modelopt models
|
||||
if name_mapped.endswith(
|
||||
ignore_suffixes
|
||||
|
||||
@ -1288,4 +1288,9 @@ class Scheduler(SchedulerInterface):
|
||||
self.finished_recving_kv_req_ids.add(req_id)
|
||||
for req_id in (kv_connector_output.finished_sending or ()):
|
||||
logger.debug("Finished sending KV transfer for request %s", req_id)
|
||||
self._free_blocks(self.requests[req_id])
|
||||
if req_id not in self.requests:
|
||||
logger.warning(
|
||||
"Got finished sending KV transfer for request %s,"
|
||||
"but the request is already freed.", req_id)
|
||||
else:
|
||||
self._free_blocks(self.requests[req_id])
|
||||
|
||||
Reference in New Issue
Block a user