[Kernel] Zero point support in fused MarlinMoE kernel + AWQ Fused MoE (#8973)
Co-authored-by: Dipika <dipikasikka1@gmail.com> Co-authored-by: Dipika Sikka <ds3822@columbia.edu>
This commit is contained in:
@ -3,3 +3,4 @@ compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-channel-quantize
|
||||
compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W8A16-quantized, main
|
||||
compressed-tensors, mgoin/DeepSeek-Coder-V2-Lite-Instruct-FP8, main
|
||||
gptq_marlin, TheBloke/Mixtral-8x7B-v0.1-GPTQ, main
|
||||
awq_marlin, casperhansen/deepseek-coder-v2-instruct-awq, main
|
||||
@ -1,7 +1,20 @@
|
||||
#!/bin/bash
|
||||
SUCCESS=0
|
||||
|
||||
IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < "weight_loading/models.txt"
|
||||
while getopts "c:" OPT; do
|
||||
case ${OPT} in
|
||||
c )
|
||||
CONFIG="$OPTARG"
|
||||
;;
|
||||
\? )
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
||||
IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < $CONFIG
|
||||
|
||||
for MODEL_CONFIG in "${MODEL_CONFIGS[@]}"
|
||||
do
|
||||
|
||||
Reference in New Issue
Block a user