ran make pre-commit for formatting fixes

llermaly · llermaly · commit 5d98a5691a87 · 2024-10-28T19:39:51.000-05:00
diff --git a/supporting-blog-content/using-openelm-models/OpenELM/generate_openelm.py b/supporting-blog-content/using-openelm-models/OpenELM/generate_openelm.py
@@ -13,17 +13,18 @@
 
 from transformers import AutoTokenizer, AutoModelForCausalLM
 
+
 def generate(
     prompt: str,
     model: Union[str, AutoModelForCausalLM],
     hf_access_token: str = None,
-    tokenizer: Union[str, AutoTokenizer] = 'meta-llama/Llama-2-7b-hf',
+    tokenizer: Union[str, AutoTokenizer] = "meta-llama/Llama-2-7b-hf",
     device: Optional[str] = None,
     max_length: int = 1024,
     assistant_model: Optional[Union[str, AutoModelForCausalLM]] = None,
     generate_kwargs: Optional[dict] = None,
 ) -> str:
-    """ Generates output given a prompt.
+    """Generates output given a prompt.
 
     Args:
         prompt: The string prompt.
@@ -53,43 +54,40 @@ def generate(
         if torch.cuda.is_available() and torch.cuda.device_count():
             device = "cuda:0"
             logging.warning(
-                'inference device is not set, using cuda:0, %s',
-                torch.cuda.get_device_name(0)
+                "inference device is not set, using cuda:0, %s",
+                torch.cuda.get_device_name(0),
             )
         else:
-            device = 'cpu'
+            device = "cpu"
             logging.warning(
-                (
-                    'No CUDA device detected, using cpu, '
-                    'expect slower speeds.'
-                )
+                ("No CUDA device detected, using cpu, " "expect slower speeds.")
             )
 
-    if 'cuda' in device and not torch.cuda.is_available():
-        raise ValueError('CUDA device requested but no CUDA device detected.')
+    if "cuda" in device and not torch.cuda.is_available():
+        raise ValueError("CUDA device requested but no CUDA device detected.")
 
     if not tokenizer:
-        raise ValueError('Tokenizer is not set in the generate function.')
+        raise ValueError("Tokenizer is not set in the generate function.")
 
     if not hf_access_token:
-        raise ValueError((
-            'Hugging face access token needs to be specified. '
-            'Please refer to https://huggingface.co/docs/hub/security-tokens'
-            ' to obtain one.'
+        raise ValueError(
+            (
+                "Hugging face access token needs to be specified. "
+                "Please refer to https://huggingface.co/docs/hub/security-tokens"
+                " to obtain one."
             )
         )
 
     if isinstance(model, str):
         checkpoint_path = model
         model = AutoModelForCausalLM.from_pretrained(
-            checkpoint_path,
-            trust_remote_code=True
+            checkpoint_path, trust_remote_code=True
         )
     model.to(device).eval()
     if isinstance(tokenizer, str):
         tokenizer = AutoTokenizer.from_pretrained(
-           tokenizer,
-           token=hf_access_token,
+            tokenizer,
+            token=hf_access_token,
         )
 
     # Speculative mode
@@ -98,17 +96,13 @@ def generate(
         draft_model = assistant_model
         if isinstance(assistant_model, str):
             draft_model = AutoModelForCausalLM.from_pretrained(
-                assistant_model,
-                trust_remote_code=True
+                assistant_model, trust_remote_code=True
             )
         draft_model.to(device).eval()
 
     # Prepare the prompt
     tokenized_prompt = tokenizer(prompt)
-    tokenized_prompt = torch.tensor(
-        tokenized_prompt['input_ids'],
-        device=device
-    )
+    tokenized_prompt = torch.tensor(tokenized_prompt["input_ids"], device=device)
 
     tokenized_prompt = tokenized_prompt.unsqueeze(0)
 
@@ -123,10 +117,7 @@ def generate(
     )
     generation_time = time.time() - stime
 
-    output_text = tokenizer.decode(
-        output_ids[0].tolist(),
-        skip_special_tokens=True
-    )
+    output_text = tokenizer.decode(output_ids[0].tolist(), skip_special_tokens=True)
 
     return output_text, generation_time
 
@@ -136,83 +127,84 @@ def openelm_generate_parser():
 
     class KwargsParser(argparse.Action):
         """Parser action class to parse kwargs of form key=value"""
+
         def __call__(self, parser, namespace, values, option_string=None):
             setattr(namespace, self.dest, dict())
             for val in values:
-                if '=' not in val:
+                if "=" not in val:
                     raise ValueError(
                         (
-                            'Argument parsing error, kwargs are expected in'
-                            ' the form of key=value.'
+                            "Argument parsing error, kwargs are expected in"
+                            " the form of key=value."
                         )
                     )
-                kwarg_k, kwarg_v = val.split('=')
+                kwarg_k, kwarg_v = val.split("=")
                 try:
                     converted_v = int(kwarg_v)
                 except ValueError:
                     try:
                         converted_v = float(kwarg_v)
                     except ValueError:
-                        converted_v = kwarg_v            
+                        converted_v = kwarg_v
                 getattr(namespace, self.dest)[kwarg_k] = converted_v
 
-    parser = argparse.ArgumentParser('OpenELM Generate Module')
+    parser = argparse.ArgumentParser("OpenELM Generate Module")
     parser.add_argument(
-        '--model',
-        dest='model',
-        help='Path to the hf converted model.',
+        "--model",
+        dest="model",
+        help="Path to the hf converted model.",
         required=True,
         type=str,
     )
     parser.add_argument(
-        '--hf_access_token',
-        dest='hf_access_token',
+        "--hf_access_token",
+        dest="hf_access_token",
         help='Hugging face access token, starting with "hf_".',
         type=str,
     )
     parser.add_argument(
-      '--prompt',
-      dest='prompt',
-      help='Prompt for LLM call.',
-      default='',
-      type=str,
+        "--prompt",
+        dest="prompt",
+        help="Prompt for LLM call.",
+        default="",
+        type=str,
     )
     parser.add_argument(
-        '--device',
-        dest='device',
-        help='Device used for inference.',
+        "--device",
+        dest="device",
+        help="Device used for inference.",
         type=str,
     )
     parser.add_argument(
-        '--max_length',
-        dest='max_length',
-        help='Maximum length of tokens.',
+        "--max_length",
+        dest="max_length",
+        help="Maximum length of tokens.",
         default=256,
         type=int,
     )
     parser.add_argument(
-        '--assistant_model',
-        dest='assistant_model',
+        "--assistant_model",
+        dest="assistant_model",
         help=(
             (
-                'If set, this is used as a draft model '
-                'for assisted speculative generation.'
+                "If set, this is used as a draft model "
+                "for assisted speculative generation."
             )
         ),
         type=str,
     )
     parser.add_argument(
-        '--generate_kwargs',
-        dest='generate_kwargs',
-        help='Additional kwargs passed to the HF generate function.',
+        "--generate_kwargs",
+        dest="generate_kwargs",
+        help="Additional kwargs passed to the HF generate function.",
         type=str,
-        nargs='*',
+        nargs="*",
         action=KwargsParser,
     )
     return parser.parse_args()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     args = openelm_generate_parser()
     prompt = args.prompt
 
@@ -228,12 +220,12 @@ def __call__(self, parser, namespace, values, option_string=None):
 
     print_txt = (
         f'\r\n{"=" * os.get_terminal_size().columns}\r\n'
-        '\033[1m Prompt + Generated Output\033[0m\r\n'
+        "\033[1m Prompt + Generated Output\033[0m\r\n"
         f'{"-" * os.get_terminal_size().columns}\r\n'
-        f'{output_text}\r\n'
+        f"{output_text}\r\n"
         f'{"-" * os.get_terminal_size().columns}\r\n'
-        '\r\nGeneration took'
-        f'\033[1m\033[92m {round(genertaion_time, 2)} \033[0m'
-        'seconds.\r\n'
+        "\r\nGeneration took"
+        f"\033[1m\033[92m {round(genertaion_time, 2)} \033[0m"
+        "seconds.\r\n"
     )
     print(print_txt)