Cache HF API results (#233)

mrwyattii · web-flow · commit 0182fa565d3f · 2023-09-12T16:03:31.000-07:00
* cache HF API results
* add option for modifying cache expiration
diff --git a/mii/__init__.py b/mii/__init__.py
@@ -9,7 +9,7 @@
 from .terminate import terminate
 from .constants import DeploymentType, Tasks
 from .aml_related.utils import aml_output_path
-
+from .utils import get_supported_models
 from .config import MIIConfig, LoadBalancerConfig
 from .grpc_related.proto import modelresponse_pb2_grpc
 
diff --git a/mii/constants.py b/mii/constants.py
@@ -104,6 +104,9 @@ class ModelProvider(enum.Enum):
 MII_CACHE_PATH = "MII_CACHE_PATH"
 MII_CACHE_PATH_DEFAULT = "/tmp/mii_cache"
 
+MII_HF_CACHE_EXPIRATION = "MII_HF_CACHE_EXPIRATION"
+MII_HF_CACHE_EXPIRATION_DEFAULT = 60 * 60  # 1 hour
+
 MII_DEBUG_MODE = "MII_DEBUG_MODE"
 MII_DEBUG_MODE_DEFAULT = "0"
 
diff --git a/mii/utils.py b/mii/utils.py
@@ -3,9 +3,12 @@
 
 # DeepSpeed Team
 import os
+import pickle
+import time
 import importlib
 import torch
 import mii
+from types import SimpleNamespace
 from huggingface_hub import HfApi
 
 from mii.models.score.generate import generated_score_path
@@ -20,7 +23,9 @@
                            SUPPORTED_MODEL_TYPES,
                            ModelProvider,
                            REQUIRED_KEYS_PER_TASK,
-                           TEXT2IMG_NAME)
+                           TEXT2IMG_NAME,
+                           MII_HF_CACHE_EXPIRATION,
+                           MII_HF_CACHE_EXPIRATION_DEFAULT)
 
 from mii.constants import Tasks
 
@@ -75,21 +80,55 @@ def get_task(task_name):
     assert False, f"Unknown Task {task_name}"
 
 
-def _get_hf_models_by_type(model_type, task=None):
-    api = HfApi()
-    models = api.list_models(filter=model_type)
-    models = ([m.modelId for m in models]
-              if task is None else [m.modelId for m in models if m.pipeline_tag == task])
+def _get_hf_models_by_type(model_type=None, task=None):
+    cache_file_path = os.path.join(mii_cache_path(), "HF_model_cache.pkl")
+    cache_expiration_seconds = os.getenv(MII_HF_CACHE_EXPIRATION,
+                                         MII_HF_CACHE_EXPIRATION_DEFAULT)
+
+    # Load or initialize the cache
+    model_data = {"cache_time": 0, "model_list": []}
+    if os.path.isfile(cache_file_path):
+        with open(cache_file_path, 'rb') as f:
+            model_data = pickle.load(f)
+
+    current_time = time.time()
+
+    # Update the cache if it has expired
+    if (model_data["cache_time"] + cache_expiration_seconds) < current_time:
+        api = HfApi()
+        model_data["model_list"] = [
+            SimpleNamespace(modelId=m.modelId,
+                            pipeline_tag=m.pipeline_tag,
+                            tags=m.tags) for m in api.list_models()
+        ]
+        model_data["cache_time"] = current_time
+
+        # Save the updated cache
+        with open(cache_file_path, 'wb') as f:
+            pickle.dump(model_data, f)
+
+    # Filter the model list
+    models = model_data["model_list"]
+    if model_type is not None:
+        models = [m for m in models if model_type in m.tags]
+    if task is not None:
+        models = [m for m in models if m.pipeline_tag == task]
+
+    # Extract model IDs
+    model_ids = [m.modelId for m in models]
+
     if task == TEXT_GENERATION_NAME:
         # TODO: this is a temp solution to get around some HF models not having the correct tags
-        models.append("microsoft/bloom-deepspeed-inference-fp16")
-        models.append("microsoft/bloom-deepspeed-inference-int8")
-        models.append("EleutherAI/gpt-neox-20b")
-    return models
+        model_ids.extend([
+            "microsoft/bloom-deepspeed-inference-fp16",
+            "microsoft/bloom-deepspeed-inference-int8",
+            "EleutherAI/gpt-neox-20b"
+        ])
 
+    return model_ids
 
-# TODO read this from a file containing list of files supported for each task
-def _get_supported_models_name(task):
+
+def get_supported_models(task):
     supported_models = []
     task_name = get_task_name(task)
 
@@ -109,16 +148,18 @@ def _get_supported_models_name(task):
 
 
 def check_if_task_and_model_is_supported(task, model_name):
-    supported_models = _get_supported_models_name(task)
-    assert model_name in supported_models, f"{task} only supports {supported_models}"
+    supported_models = get_supported_models(task)
+    assert (
+        model_name in supported_models
+    ), f"{task} is not supported by {model_name}. This task is supported by {len(supported_models)} other models. See which models with `mii.get_supported_models(mii.{task})`."
 
 
 def check_if_task_and_model_is_valid(task, model_name):
     task_name = get_task_name(task)
     valid_task_models = _get_hf_models_by_type(None, task_name)
     assert (
         model_name in valid_task_models
-    ), f"{task_name} only supports {valid_task_models}"
+    ), f"{task_name} is not supported by {model_name}. This task is supported by {len(valid_task_models)} other models. See which models with `mii.get_supported_models(mii.{task})`."
 
 
 def full_model_path(model_path):