Optimize tensor parallel execution speed (#17)

This commit is contained in:
Zhuohan Li
2023-04-01 00:51:08 +08:00
committed by GitHub
parent 7a7929abe8
commit c45f3c3ab6
3 changed files with 103 additions and 287 deletions

View File

@ -6,8 +6,6 @@ from .layers import (
set_defaults_if_not_set_tensor_model_parallel_attributes,
copy_tensor_model_parallel_attributes,
param_is_not_tensor_parallel_duplicate,
linear_with_grad_accumulation_and_async_allreduce
)
from .mappings import (
@ -39,7 +37,6 @@ __all__ = [
"set_defaults_if_not_set_tensor_model_parallel_attributes",
"copy_tensor_model_parallel_attributes",
"param_is_not_tensor_parallel_duplicate",
"linear_with_grad_accumulation_and_async_allreduce",
# mappings.py
"copy_to_tensor_model_parallel_region",
"gather_from_tensor_model_parallel_region",