diff --git a/requirements-common.txt b/requirements-common.txt index 777b2bb124..16253cbbb2 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -5,7 +5,7 @@ requests >= 2.26.0 tqdm blake3 py-cpuinfo -transformers >= 4.45.2 # Required for Llama 3.2 and Qwen2-VL. +transformers >= 4.48.2 # Required for Bamba model and Transformers backend. tokenizers >= 0.19.1 # Required for Llama 3. protobuf # Required by LlamaTokenizer. fastapi >= 0.107.0, < 0.113.0; python_version < '3.9' @@ -34,6 +34,6 @@ pyyaml six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 einops # Required for Qwen2-VL. -compressed-tensors == 0.8.1 # required for compressed-tensors +compressed-tensors == 0.9.1 # required for compressed-tensors depyf==0.18.0 # required for profiling and debugging with compilation config -cloudpickle # allows pickling lambda functions in model_executor/models/registry.py +cloudpickle # allows pickling lambda functions in model_executor/models/registry.py \ No newline at end of file diff --git a/requirements-test.txt b/requirements-test.txt index 09e009c2e2..8f35343de3 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# python3.12 -m piptools compile requirements-test.in -o requirements-test.txt +# python3.12 -m piptools compile requirements-test.in -o requirements-test.txt # absl-py==2.1.0 # via rouge-score @@ -106,9 +106,17 @@ dnspython==2.7.0 docutils==0.16 # via awscli einops==0.8.0 - # via -r requirements-test.in + # via + # -r requirements-test.in + # encodec + # vector-quantize-pytorch + # vocos +einx==0.3.0 + # via vector-quantize-pytorch email-validator==2.2.0 # via pydantic +encodec==0.1.1 + # via vocos evaluate==0.4.3 # via lm-eval fastparquet==2024.11.0 @@ -125,6 +133,8 @@ filelock==3.16.1 # triton fonttools==4.54.1 # via matplotlib +frozendict==2.4.6 + # via einx frozenlist==1.5.0 # via # aiohttp @@ -159,6 +169,7 @@ huggingface-hub==0.26.2 # timm # tokenizers # transformers + # vocos idna==3.10 # via # anyio @@ -261,6 +272,8 @@ numpy==1.26.4 # cupy-cuda12x # datasets # decord + # einx + # encodec # evaluate # fastparquet # genai-perf @@ -283,6 +296,7 @@ numpy==1.26.4 # torchvision # transformers # tritonclient + # vocos nvidia-cublas-cu12==12.4.5.8 # via # nvidia-cudnn-cu12 @@ -455,6 +469,7 @@ pyyaml==6.0.2 # responses # timm # transformers + # vocos ray[adag]==2.40.0 # via -r requirements-test.in redis==5.2.0 @@ -517,6 +532,7 @@ scipy==1.13.1 # scikit-learn # sentence-transformers # statsmodels + # vocos sentence-transformers==3.2.1 # via -r requirements-test.in sentencepiece==0.2.0 @@ -540,7 +556,9 @@ sqlitedict==2.1.0 statsmodels==0.14.4 # via genai-perf sympy==1.13.1 - # via torch + # via + # einx + # torch tabledata==1.3.3 # via pytablewriter tabulate==0.9.0 @@ -568,12 +586,21 @@ torch==2.5.1 # -r requirements-test.in # accelerate # bitsandbytes + # encodec # lm-eval # peft # sentence-transformers # tensorizer # timm + # torchaudio # torchvision + # vector-quantize-pytorch + # vocos +torchaudio==2.5.1 + # via + # -r requirements-test.in + # encodec + # vocos torchvision==0.20.1 # via timm tqdm==4.66.6 @@ -584,13 +611,15 @@ tqdm==4.66.6 # lm-eval # nltk # peft + # pqdm # sentence-transformers # tqdm-multiprocess # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.47.0 +transformers==4.48.2 # via + # -r requirements-test.in # genai-perf # lm-eval # peft @@ -615,6 +644,7 @@ typing-extensions==4.12.2 # huggingface-hub # librosa # mistral-common + # pqdm # pydantic # pydantic-core # torch @@ -626,6 +656,10 @@ urllib3==2.2.3 # requests # responses # tritonclient +vector-quantize-pytorch==1.21.2 + # via -r requirements-test.in +vocos==0.1.0 + # via -r requirements-test.in word2number==1.1 # via lm-eval xxhash==3.5.0 @@ -638,4 +672,4 @@ zstandard==0.23.0 # via lm-eval # The following packages are considered to be unsafe in a requirements file: -# setuptools +# setuptools \ No newline at end of file diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 568e2f0cef..de3dba02cb 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -766,11 +766,6 @@ class TPUModelRunner(ModelRunnerBase): logger.info(" -- Compilation for decode done in %.2f [secs].", end - start) - def _initialize_kv_cache(self): - kv_cache_spec = self.get_kv_cache_spec() - - kv_cache_config = get_kv_cache_config(vllm_config, kv_cache_spec, - availble_gpu_memory) def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None: """ Initialize KV cache based on `kv_cache_config`.