get latest inference framework tag from configmap (#505)

saiatmakuri · web-flow · commit 6fe18d39ffae · 2024-05-03T14:11:20.000-07:00
* get latest inference framework tag from configmap

* comments

* fix for test

* make namespace a config

* fix s3 prefix bug

* fix checkpoint path fn + tests

* values change

* quotes
diff --git a/charts/model-engine/templates/inference_framework_config.yaml b/charts/model-engine/templates/inference_framework_config.yaml
@@ -0,0 +1,16 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: model-engine-inference-framework-latest-config
+  labels:
+    product: common
+    team: infra
+  annotations:
+    "helm.sh/hook": pre-install
+    "helm.sh/hook-weight": "-2"
+data:
+  deepspeed: "latest"
+  text_generation_inference: "latest"
+  vllm: "latest"
+  lightllm: "latest"
+  tensorrt_llm: "latest"
diff --git a/charts/model-engine/templates/service_config_map.yaml b/charts/model-engine/templates/service_config_map.yaml
@@ -11,6 +11,7 @@ metadata:
 data:
   launch_service_config: |-
     dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
+    gateway_namespace: {{ .Release.Namespace | quote }}
     {{- with .Values.config.values.launch }}
     {{- range $key, $value := . }}
     {{ $key }}: {{ $value | quote }}
@@ -39,6 +40,7 @@ metadata:
 data:
   launch_service_config: |-
     dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
+    gateway_namespace: {{ .Release.Namespace | quote }}
     {{- with .Values.config.values.launch }}
     {{- range $key, $value := . }}
     {{ $key }}: {{ $value | quote }}
diff --git a/model-engine/model_engine_server/common/config.py b/model-engine/model_engine_server/common/config.py
@@ -47,6 +47,7 @@ def get_model_cache_directory_name(model_name: str):
 
 @dataclass
 class HostedModelInferenceServiceConfig:
+    gateway_namespace: str
     endpoint_namespace: str
     billing_queue_arn: str
     sqs_profile: str
diff --git a/model-engine/model_engine_server/core/configmap.py b/model-engine/model_engine_server/core/configmap.py
@@ -0,0 +1,35 @@
+"""Read configmap from k8s."""
+
+from typing import Dict
+
+from kubernetes_asyncio import client
+from kubernetes_asyncio import config as kube_config
+from kubernetes_asyncio.client.rest import ApiException
+from kubernetes_asyncio.config.config_exception import ConfigException
+from model_engine_server.common.config import hmi_config
+from model_engine_server.core.loggers import logger_name, make_logger
+
+DEFAULT_NAMESPACE = "default"
+
+logger = make_logger(logger_name())
+
+
+async def read_config_map(
+    config_map_name: str, namespace: str = hmi_config.gateway_namespace
+) -> Dict[str, str]:
+    try:
+        kube_config.load_incluster_config()
+    except ConfigException:
+        logger.info("No incluster kubernetes config, falling back to local")
+        await kube_config.load_kube_config()
+
+    core_api = client.CoreV1Api()
+
+    try:
+        config_map = await core_api.read_namespaced_config_map(
+            name=config_map_name, namespace=namespace
+        )
+        return config_map.data
+    except ApiException as e:
+        logger.exception(f"Error reading configmap {config_map_name}")
+        raise e
diff --git a/model-engine/model_engine_server/domain/exceptions.py b/model-engine/model_engine_server/domain/exceptions.py
@@ -182,3 +182,9 @@ class PostInferenceHooksException(DomainException):
     """
     Thrown if the post inference hooks are invalid.
     """
+
+
+class LatestImageTagNotFoundException(DomainException):
+    """
+    Thrown if the latest image tag cannot be found.
+    """
diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py
@@ -39,6 +39,7 @@
 from model_engine_server.common.dtos.tasks import SyncEndpointPredictV1Request, TaskStatus
 from model_engine_server.common.resource_limits import validate_resource_requests
 from model_engine_server.core.auth.authentication_repository import User
+from model_engine_server.core.configmap import read_config_map
 from model_engine_server.core.loggers import (
     LoggerTagKey,
     LoggerTagManager,
@@ -67,6 +68,7 @@
     EndpointLabelsException,
     EndpointUnsupportedInferenceTypeException,
     InvalidRequestException,
+    LatestImageTagNotFoundException,
     ObjectHasInvalidValueException,
     ObjectNotAuthorizedException,
     ObjectNotFoundException,
@@ -82,7 +84,10 @@
 )
 from model_engine_server.domain.services import LLMModelEndpointService, ModelEndpointService
 from model_engine_server.infra.gateways.filesystem_gateway import FilesystemGateway
-from model_engine_server.infra.repositories.live_tokenizer_repository import SUPPORTED_MODELS_INFO
+from model_engine_server.infra.repositories.live_tokenizer_repository import (
+    SUPPORTED_MODELS_INFO,
+    get_models_s3_uri,
+)
 
 from ...common.datadog_utils import add_trace_request_id
 from ..authorization.live_authorization_module import LiveAuthorizationModule
@@ -246,6 +251,8 @@
 NUM_DOWNSTREAM_REQUEST_RETRIES = 80  # has to be high enough so that the retries take the 5 minutes
 DOWNSTREAM_REQUEST_TIMEOUT_SECONDS = 5 * 60  # 5 minutes
 
+LATEST_INFERENCE_FRAMEWORK_CONFIG_MAP_NAME = "model-engine-inference-framework-latest-config"
+
 
 def count_tokens(input: str, model_name: str, tokenizer_repository: TokenizerRepository) -> int:
     """
@@ -255,6 +262,15 @@ def count_tokens(input: str, model_name: str, tokenizer_repository: TokenizerRep
     return len(tokenizer.encode(input))
 
 
+async def _get_latest_tag(inference_framework: LLMInferenceFramework) -> str:
+    config_map = await read_config_map(LATEST_INFERENCE_FRAMEWORK_CONFIG_MAP_NAME)
+    if inference_framework not in config_map:
+        raise LatestImageTagNotFoundException(
+            f"Could not find latest tag for inference framework {inference_framework}."
+        )
+    return config_map[inference_framework]
+
+
 def _include_safetensors_bin_or_pt(model_files: List[str]) -> Optional[str]:
     """
     This function is used to determine whether to include "*.safetensors", "*.bin", or "*.pt" files
@@ -337,11 +353,11 @@ def validate_checkpoint_path_uri(checkpoint_path: str) -> None:
 
 
 def get_checkpoint_path(model_name: str, checkpoint_path_override: Optional[str]) -> str:
-    checkpoint_path = (
-        SUPPORTED_MODELS_INFO[model_name].s3_repo
-        if not checkpoint_path_override
-        else checkpoint_path_override
-    )
+    checkpoint_path = None
+    if SUPPORTED_MODELS_INFO[model_name].s3_repo:
+        checkpoint_path = get_models_s3_uri(SUPPORTED_MODELS_INFO[model_name].s3_repo, "")
+    if checkpoint_path_override:
+        checkpoint_path = checkpoint_path_override
 
     if not checkpoint_path:
         raise InvalidRequestException(f"No checkpoint path found for model {model_name}")
@@ -931,8 +947,8 @@ async def execute(
                 )
 
         if request.inference_framework_image_tag == "latest":
-            request.inference_framework_image_tag = self.docker_repository.get_latest_image_tag(
-                INFERENCE_FRAMEWORK_REPOSITORY[request.inference_framework]
+            request.inference_framework_image_tag = await _get_latest_tag(
+                request.inference_framework
             )
 
         bundle = await self.create_llm_model_bundle_use_case.execute(
@@ -1149,9 +1165,7 @@ async def execute(
             inference_framework = llm_metadata["inference_framework"]
 
             if request.inference_framework_image_tag == "latest":
-                inference_framework_image_tag = self.docker_repository.get_latest_image_tag(
-                    INFERENCE_FRAMEWORK_REPOSITORY[inference_framework]
-                )
+                inference_framework_image_tag = await _get_latest_tag(inference_framework)
             else:
                 inference_framework_image_tag = (
                     request.inference_framework_image_tag
diff --git a/model-engine/service_configs/service_config_circleci.yaml b/model-engine/service_configs/service_config_circleci.yaml
@@ -1,3 +1,6 @@
+# Config to know where model-engine is running
+gateway_namespace: default
+
 # Config for Model Engine running in CircleCI
 model_primitive_host: "none"
 
diff --git a/model-engine/tests/unit/domain/conftest.py b/model-engine/tests/unit/domain/conftest.py
@@ -222,7 +222,7 @@ def create_llm_model_endpoint_request_async() -> CreateLLMModelEndpointV1Request
         labels={"team": "infra", "product": "my_product"},
         aws_role="test_aws_role",
         results_s3_bucket="test_s3_bucket",
-        checkpoint_path="s3://test_checkpoint_path",
+        checkpoint_path="s3://test-s3.tar",
     )
 
 
@@ -286,6 +286,7 @@ def create_llm_model_endpoint_request_llama_2() -> CreateLLMModelEndpointV1Reque
         labels={"team": "infra", "product": "my_product"},
         aws_role="test_aws_role",
         results_s3_bucket="test_s3_bucket",
+        checkpoint_path="s3://test-s3.tar",
     )
 
 
diff --git a/model-engine/tests/unit/domain/test_llm_use_cases.py b/model-engine/tests/unit/domain/test_llm_use_cases.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, Dict, List, Tuple
+from typing import Any, List, Tuple
 from unittest import mock
 
 import pytest
@@ -54,21 +54,19 @@
     validate_and_update_completion_params,
 )
 from model_engine_server.domain.use_cases.model_bundle_use_cases import CreateModelBundleV2UseCase
-from model_engine_server.infra.repositories import live_tokenizer_repository
-from model_engine_server.infra.repositories.live_tokenizer_repository import ModelInfo
 
 
-def good_models_info() -> Dict[str, ModelInfo]:
-    return {
-        k: ModelInfo(v.hf_repo, "s3://test-s3.tar")
-        for k, v in live_tokenizer_repository.SUPPORTED_MODELS_INFO.items()
-    }
+def mocked__get_latest_tag():
+    async def async_mock(*args, **kwargs):  # noqa
+        return "fake_docker_repository_latest_image_tag"
+
+    return mock.AsyncMock(side_effect=async_mock)
 
 
 @pytest.mark.asyncio
 @mock.patch(
-    "model_engine_server.domain.use_cases.llm_model_endpoint_use_cases.SUPPORTED_MODELS_INFO",
-    good_models_info(),
+    "model_engine_server.domain.use_cases.llm_model_endpoint_use_cases._get_latest_tag",
+    mocked__get_latest_tag(),
 )
 async def test_create_model_endpoint_use_case_success(
     test_api_key: str,
@@ -183,40 +181,33 @@ async def test_create_model_endpoint_use_case_success(
     assert "--max-total-tokens" in bundle.flavor.command[-1] and "4096" in bundle.flavor.command[-1]
 
 
-def bad_models_info() -> Dict[str, ModelInfo]:
-    info = {
-        k: ModelInfo(v.hf_repo, v.s3_repo)
-        for k, v in live_tokenizer_repository.SUPPORTED_MODELS_INFO.items()
-    }
-    info.update(
-        {
-            "mpt-7b": ModelInfo("mosaicml/mpt-7b", None),
-            "mpt-7b-instruct": ModelInfo("mosaicml/mpt-7b-instruct", "gibberish"),
-        }
-    )
-    return info
-
-
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
-    "inference_framework, model_name, expected_error",
+    "inference_framework, model_name, checkpoint_path, expected_error",
     [
-        (LLMInferenceFramework.TEXT_GENERATION_INFERENCE, "mpt-7b", InvalidRequestException),
+        (LLMInferenceFramework.TEXT_GENERATION_INFERENCE, "mpt-7b", None, InvalidRequestException),
         (
             LLMInferenceFramework.TEXT_GENERATION_INFERENCE,
             "mpt-7b-instruct",
+            "gibberish",
+            ObjectHasInvalidValueException,
+        ),
+        (LLMInferenceFramework.LIGHTLLM, "mpt-7b", None, InvalidRequestException),
+        (
+            LLMInferenceFramework.LIGHTLLM,
+            "mpt-7b-instruct",
+            "gibberish",
+            ObjectHasInvalidValueException,
+        ),
+        (LLMInferenceFramework.VLLM, "mpt-7b", None, InvalidRequestException),
+        (
+            LLMInferenceFramework.VLLM,
+            "mpt-7b-instruct",
+            "gibberish",
             ObjectHasInvalidValueException,
         ),
-        (LLMInferenceFramework.LIGHTLLM, "mpt-7b", InvalidRequestException),
-        (LLMInferenceFramework.LIGHTLLM, "mpt-7b-instruct", ObjectHasInvalidValueException),
-        (LLMInferenceFramework.VLLM, "mpt-7b", InvalidRequestException),
-        (LLMInferenceFramework.VLLM, "mpt-7b-instruct", ObjectHasInvalidValueException),
     ],
 )
-@mock.patch(
-    "model_engine_server.domain.use_cases.llm_model_endpoint_use_cases.SUPPORTED_MODELS_INFO",
-    bad_models_info(),
-)
 async def test_create_model_bundle_fails_if_no_checkpoint(
     test_api_key: str,
     fake_model_bundle_repository,
@@ -227,6 +218,7 @@ async def test_create_model_bundle_fails_if_no_checkpoint(
     create_llm_model_endpoint_text_generation_inference_request_streaming: CreateLLMModelEndpointV1Request,
     inference_framework,
     model_name,
+    checkpoint_path,
     expected_error,
 ):
     fake_model_endpoint_service.model_bundle_repository = fake_model_bundle_repository
@@ -255,7 +247,7 @@ async def test_create_model_bundle_fails_if_no_checkpoint(
             endpoint_type=request.endpoint_type,
             num_shards=request.num_shards,
             quantize=request.quantize,
-            checkpoint_path=None,
+            checkpoint_path=checkpoint_path,
         )
 
 
@@ -269,10 +261,6 @@ async def test_create_model_bundle_fails_if_no_checkpoint(
         (True, LLMInferenceFramework.VLLM, "0.1.3.6"),
     ],
 )
-@mock.patch(
-    "model_engine_server.domain.use_cases.llm_model_endpoint_use_cases.SUPPORTED_MODELS_INFO",
-    good_models_info(),
-)
 async def test_create_model_bundle_inference_framework_image_tag_validation(
     test_api_key: str,
     fake_model_bundle_repository,
@@ -307,6 +295,7 @@ async def test_create_model_bundle_inference_framework_image_tag_validation(
     request = create_llm_model_endpoint_text_generation_inference_request_streaming.copy()
     request.inference_framework = inference_framework
     request.inference_framework_image_tag = inference_framework_image_tag
+    request.checkpoint_path = "s3://test-s3.tar"
     user = User(user_id=test_api_key, team_id=test_api_key, is_privileged_user=True)
     if valid:
         await use_case.execute(user=user, request=request)
@@ -592,6 +581,10 @@ async def test_get_llm_model_endpoint_use_case_raises_not_authorized(
 
 
 @pytest.mark.asyncio
+@mock.patch(
+    "model_engine_server.domain.use_cases.llm_model_endpoint_use_cases._get_latest_tag",
+    mocked__get_latest_tag(),
+)
 async def test_update_model_endpoint_use_case_success(
     test_api_key: str,
     fake_model_bundle_repository,

Original file line number	Diff line number	Diff line change
`@@ -222,7 +222,7 @@ def create_llm_model_endpoint_request_async() -> CreateLLMModelEndpointV1Request`
`222`	`222`	`labels={"team": "infra", "product": "my_product"},`
`223`	`223`	`aws_role="test_aws_role",`
`224`	`224`	`results_s3_bucket="test_s3_bucket",`
`225`		`- checkpoint_path="s3://test_checkpoint_path",`
	`225`	`+ checkpoint_path="s3://test-s3.tar",`
`226`	`226`	`)`
`227`	`227`
`228`	`228`
`@@ -286,6 +286,7 @@ def create_llm_model_endpoint_request_llama_2() -> CreateLLMModelEndpointV1Reque`
`286`	`286`	`labels={"team": "infra", "product": "my_product"},`
`287`	`287`	`aws_role="test_aws_role",`
`288`	`288`	`results_s3_bucket="test_s3_bucket",`
	`289`	`+ checkpoint_path="s3://test-s3.tar",`
`289`	`290`	`)`
`290`	`291`
`291`	`292`