@@ -7573,6 +7573,7 @@ def create_inference_endpoint(
75737573 revision : Optional [str ] = None ,
75747574 task : Optional [str ] = None ,
75757575 custom_image : Optional [Dict ] = None ,
7576+ env : Optional [Dict [str , str ]] = None ,
75767577 secrets : Optional [Dict [str , str ]] = None ,
75777578 type : InferenceEndpointType = InferenceEndpointType .PROTECTED ,
75787579 domain : Optional [str ] = None ,
@@ -7616,6 +7617,8 @@ def create_inference_endpoint(
76167617 custom_image (`Dict`, *optional*):
76177618 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
76187619 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7620+ env (`Dict[str, str]`, *optional*):
7621+ Non-secret environment variables to inject in the container environment.
76197622 secrets (`Dict[str, str]`, *optional*):
76207623 Secret values to inject in the container environment.
76217624 type ([`InferenceEndpointType]`, *optional*):
@@ -7678,14 +7681,14 @@ def create_inference_endpoint(
76787681 ... type="protected",
76797682 ... instance_size="x1",
76807683 ... instance_type="nvidia-a10g",
7684+ ... env={
7685+ ... "MAX_BATCH_PREFILL_TOKENS": "2048",
7686+ ... "MAX_INPUT_LENGTH": "1024",
7687+ ... "MAX_TOTAL_TOKENS": "1512",
7688+ ... "MODEL_ID": "/repository"
7689+ ... },
76817690 ... custom_image={
76827691 ... "health_route": "/health",
7683- ... "env": {
7684- ... "MAX_BATCH_PREFILL_TOKENS": "2048",
7685- ... "MAX_INPUT_LENGTH": "1024",
7686- ... "MAX_TOTAL_TOKENS": "1512",
7687- ... "MODEL_ID": "/repository"
7688- ... },
76897692 ... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
76907693 ... },
76917694 ... secrets={"MY_SECRET_KEY": "secret_value"},
@@ -7723,6 +7726,8 @@ def create_inference_endpoint(
77237726 },
77247727 "type" : type ,
77257728 }
7729+ if env :
7730+ payload ["model" ]["env" ] = env
77267731 if secrets :
77277732 payload ["model" ]["secrets" ] = secrets
77287733 if domain is not None or path is not None :
@@ -7897,6 +7902,7 @@ def update_inference_endpoint(
78977902 revision : Optional [str ] = None ,
78987903 task : Optional [str ] = None ,
78997904 custom_image : Optional [Dict ] = None ,
7905+ env : Optional [Dict [str , str ]] = None ,
79007906 secrets : Optional [Dict [str , str ]] = None ,
79017907 # Route update
79027908 domain : Optional [str ] = None ,
@@ -7942,6 +7948,8 @@ def update_inference_endpoint(
79427948 custom_image (`Dict`, *optional*):
79437949 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
79447950 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7951+ env (`Dict[str, str]`, *optional*):
7952+ Non-secret environment variables to inject in the container environment
79457953 secrets (`Dict[str, str]`, *optional*):
79467954 Secret values to inject in the container environment.
79477955
@@ -7992,6 +8000,8 @@ def update_inference_endpoint(
79928000 payload ["model" ]["task" ] = task
79938001 if custom_image is not None :
79948002 payload ["model" ]["image" ] = {"custom" : custom_image }
8003+ if env is not None :
8004+ payload ["model" ]["env" ] = env
79958005 if secrets is not None :
79968006 payload ["model" ]["secrets" ] = secrets
79978007 if domain is not None :
0 commit comments