[Kernel] GGUF MoeVec kernel (#16780)

Signed-off-by: SzymonOzog <szymon.ozog@aleph-alpha.com> Signed-off-by: SzymonOzog <szymon.ozog@gmail.com> Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
2025-05-07 14:07:23 +08:00
parent c3e9d5060e
commit 1a45a61387
8 changed files with 544 additions and 16 deletions
--- a/csrc/torch_bindings.cpp
+++ b/csrc/torch_bindings.cpp
@ -337,6 +337,12 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
      "int type, SymInt row, SymInt top_k, SymInt tokens) -> Tensor");
  ops.impl("ggml_moe_a8", torch::kCUDA, &ggml_moe_a8);

+  ops.def(
+      "ggml_moe_a8_vec(Tensor X, Tensor W, "
+      "Tensor topk_ids, int top_k, "
+      "int type, SymInt row, SymInt tokens) -> Tensor");
+  ops.impl("ggml_moe_a8_vec", torch::kCUDA, &ggml_moe_a8_vec);
+
  ops.def("ggml_moe_get_block_size", &ggml_moe_get_block_size);

 #ifndef USE_ROCM