[Misc] fix line length for entire codebase (#3444)

2024-03-16 00:36:29 -07:00
parent ad50bf4b25
commit 8e67598aa6
9 changed files with 174 additions and 128 deletions
--- a/examples/multilora_inference.py
+++ b/examples/multilora_inference.py
@ -1,5 +1,6 @@
 """
-This example shows how to use the multi-LoRA functionality for offline inference.
+This example shows how to use the multi-LoRA functionality
+for offline inference.

 Requires HuggingFace credentials for access to Llama2.
 """
@ -16,7 +17,7 @@ def create_test_prompts(
        lora_path: str
 ) -> List[Tuple[str, SamplingParams, Optional[LoRARequest]]]:
    """Create a list of test prompts with their sampling parameters.
-    
+
    2 requests for base model, 4 requests for the LoRA. We define 2
    different LoRA adapters (using the same model for demo purposes).
    Since we also set `max_loras=1`, the expectation is that the requests
@ -34,36 +35,40 @@ def create_test_prompts(
                        top_k=5,
                        presence_penalty=0.2,
                        max_tokens=128), None),
-        ("[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",
-         SamplingParams(temperature=0.0,
-                        logprobs=1,
-                        prompt_logprobs=1,
-                        max_tokens=128,
-                        stop_token_ids=[32003]),
-         LoRARequest("sql-lora", 1, lora_path)),
-        ("[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",
-         SamplingParams(n=3,
-                        best_of=3,
-                        use_beam_search=True,
-                        temperature=0,
-                        max_tokens=128,
-                        stop_token_ids=[32003]),
-         LoRARequest("sql-lora", 1, lora_path)),
-        ("[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",
-         SamplingParams(temperature=0.0,
-                        logprobs=1,
-                        prompt_logprobs=1,
-                        max_tokens=128,
-                        stop_token_ids=[32003]),
-         LoRARequest("sql-lora2", 2, lora_path)),
-        ("[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",
-         SamplingParams(n=3,
-                        best_of=3,
-                        use_beam_search=True,
-                        temperature=0,
-                        max_tokens=128,
-                        stop_token_ids=[32003]),
-         LoRARequest("sql-lora", 1, lora_path)),
+        (
+            "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",  # noqa: E501
+            SamplingParams(temperature=0.0,
+                           logprobs=1,
+                           prompt_logprobs=1,
+                           max_tokens=128,
+                           stop_token_ids=[32003]),
+            LoRARequest("sql-lora", 1, lora_path)),
+        (
+            "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",  # noqa: E501
+            SamplingParams(n=3,
+                           best_of=3,
+                           use_beam_search=True,
+                           temperature=0,
+                           max_tokens=128,
+                           stop_token_ids=[32003]),
+            LoRARequest("sql-lora", 1, lora_path)),
+        (
+            "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",  # noqa: E501
+            SamplingParams(temperature=0.0,
+                           logprobs=1,
+                           prompt_logprobs=1,
+                           max_tokens=128,
+                           stop_token_ids=[32003]),
+            LoRARequest("sql-lora2", 2, lora_path)),
+        (
+            "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",  # noqa: E501
+            SamplingParams(n=3,
+                           best_of=3,
+                           use_beam_search=True,
+                           temperature=0,
+                           max_tokens=128,
+                           stop_token_ids=[32003]),
+            LoRARequest("sql-lora", 1, lora_path)),
    ]


--- a/examples/offline_inference_with_prefix.py
+++ b/examples/offline_inference_with_prefix.py
@ -37,9 +37,10 @@ for output in outputs:

 print("-" * 80)

-# The llm.generate call will batch all prompts and send the batch at once if resources allow.
-# The prefix will only be cached after the first batch is processed, so we need to call generate once
-# to calculate the prefix and cache it.
+# The llm.generate call will batch all prompts and send the batch at once
+# if resources allow. The prefix will only be cached after the first batch
+# is processed, so we need to call generate once to calculate the prefix
+# and cache it.
 outputs = llm.generate(generating_prompts[0], sampling_params)

 # Subsequent batches can leverage the cached prefix