simplify - get rid of tokenshape
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
@ -470,18 +470,6 @@ TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _cache_ops), cache_ops) {
|
||||
cache_ops.impl("reshape_and_cache_flash", torch::kCUDA,
|
||||
&reshape_and_cache_flash);
|
||||
|
||||
// Reshape the key and value tensors and cache them.
|
||||
cache_ops.def(
|
||||
"reshape_and_cache_flash_full_cuda(Tensor tensorshape,"
|
||||
" Tensor key, Tensor value,"
|
||||
" Tensor! key_cache,"
|
||||
" Tensor! value_cache,"
|
||||
" Tensor slot_mapping,"
|
||||
" str kv_cache_dtype,"
|
||||
" Tensor k_scale, Tensor v_scale) -> ()");
|
||||
cache_ops.impl("reshape_and_cache_flash_full_cuda", torch::kCUDA,
|
||||
&reshape_and_cache_flash_full_cuda);
|
||||
|
||||
// Concat kv_c and k_pe and cache them.
|
||||
cache_ops.def(
|
||||
"concat_and_cache_mla(Tensor kv_c, Tensor k_pe,"
|
||||
|
||||
Reference in New Issue
Block a user