[CI/Build] drop support for Python 3.8 EOL (#8464)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2024-11-06 02:11:55 -05:00
parent 4be3a45158
commit 21063c11c7
115 changed files with 239 additions and 321 deletions
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@ -79,7 +79,7 @@ async def async_request_tgi(
                        # any data, we should skip it.
                        if chunk_bytes.startswith(":"):
                            continue
-                        chunk = remove_prefix(chunk_bytes, "data:")
+                        chunk = chunk_bytes.removeprefix("data:")

                        data = json.loads(chunk)
                        timestamp = time.perf_counter()
@ -144,8 +144,8 @@ async def async_request_trt_llm(
                        if not chunk_bytes:
                            continue

-                        chunk = remove_prefix(chunk_bytes.decode("utf-8"),
-                                              "data:")
+                        chunk = chunk_bytes.decode("utf-8").removeprefix(
+                            "data:")

                        data = json.loads(chunk)
                        output.generated_text += data["text_output"]
@ -261,8 +261,8 @@ async def async_request_openai_completions(
                        if not chunk_bytes:
                            continue

-                        chunk = remove_prefix(chunk_bytes.decode("utf-8"),
-                                              "data: ")
+                        chunk = chunk_bytes.decode("utf-8").removeprefix(
+                            "data: ")
                        if chunk == "[DONE]":
                            latency = time.perf_counter() - st
                        else:
@ -349,8 +349,8 @@ async def async_request_openai_chat_completions(
                        if not chunk_bytes:
                            continue

-                        chunk = remove_prefix(chunk_bytes.decode("utf-8"),
-                                              "data: ")
+                        chunk = chunk_bytes.decode("utf-8").removeprefix(
+                            "data: ")
                        if chunk == "[DONE]":
                            latency = time.perf_counter() - st
                        else:
@ -389,14 +389,6 @@ async def async_request_openai_chat_completions(
    return output


-# Since vllm must support Python 3.8, we can't use str.removeprefix(prefix)
-# introduced in Python 3.9
-def remove_prefix(text: str, prefix: str) -> str:
-    if text.startswith(prefix):
-        return text[len(prefix):]
-    return text
-
-
 def get_model(pretrained_model_name_or_path: str) -> str:
    if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
        from modelscope import snapshot_download
--- a/benchmarks/kernels/benchmark_machete.py
+++ b/benchmarks/kernels/benchmark_machete.py
@ -269,10 +269,10 @@ def run_square_bench(args):


 def run_range_bench(args):
-    m_start, k_start, n_start = [int(x) for x in args.dim_start.split(",")]
-    m_end, k_end, n_end = [int(x) for x in args.dim_end.split(",")]
+    m_start, k_start, n_start = (int(x) for x in args.dim_start.split(","))
+    m_end, k_end, n_end = (int(x) for x in args.dim_end.split(","))
    m_increment, k_increment, n_increment = \
-        [int(x) for x in args.dim_increment.split(",")]
+        (int(x) for x in args.dim_increment.split(","))
    Ms = list(range(m_start, m_end + 1, m_increment))
    Ks = list(range(k_start, k_end + 1, k_increment))
    Ns = list(range(n_start, n_end + 1, n_increment))