[Bugfix] Get available quantization methods from quantization registry (#4098)

This commit is contained in:
Michael Goin
2024-04-18 03:21:55 -04:00
committed by GitHub
parent 66ded03067
commit 53b018edcb
6 changed files with 18 additions and 13 deletions

View File

@ -10,6 +10,8 @@ from tqdm import tqdm
from transformers import (AutoModelForCausalLM, AutoTokenizer,
PreTrainedTokenizerBase)
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
def sample_requests(
dataset_path: str,
@ -267,7 +269,7 @@ if __name__ == "__main__":
parser.add_argument("--tokenizer", type=str, default=None)
parser.add_argument('--quantization',
'-q',
choices=['awq', 'gptq', 'squeezellm', None],
choices=[*QUANTIZATION_METHODS, None],
default=None)
parser.add_argument("--tensor-parallel-size", "-tp", type=int, default=1)
parser.add_argument("--n",