File tree Expand file tree Collapse file tree 2 files changed +7
-0
lines changed
plugins/flytekit-inference/flytekitplugins/inference Expand file tree Collapse file tree 2 files changed +7
-0
lines changed Original file line number Diff line number Diff line change @@ -33,6 +33,7 @@ def __init__(
3333 cpu : int = 1 ,
3434 gpu : int = 1 ,
3535 mem : str = "20Gi" ,
36+ ephemeral_storage : str = "20Gi" ,
3637 shm_size : str = "16Gi" ,
3738 env : Optional [
3839 dict [str , str ]
@@ -49,6 +50,7 @@ def __init__(
4950 :param cpu: The number of CPU cores requested for the model server container. Default is 1.
5051 :param gpu: The number of GPU cores requested for the model server container. Default is 1.
5152 :param mem: The amount of memory requested for the model server container. Default is "20Gi".
53+ :param ephemeral_storage: The amount of ephemeral storage requested for the model server container. Default is "20Gi".
5254 :param shm_size: The size of the shared memory volume. Default is "16Gi".
5355 :param env: A dictionary of environment variables to be set in the model server container.
5456 :param hf_repo_ids: A list of Hugging Face repository IDs for LoRA adapters to be downloaded.
@@ -72,6 +74,7 @@ def __init__(
7274 cpu = cpu ,
7375 gpu = gpu ,
7476 mem = mem ,
77+ ephemeral_storage = ephemeral_storage ,
7578 env = env ,
7679 )
7780
Original file line number Diff line number Diff line change @@ -13,6 +13,7 @@ def __init__(
1313 cpu : int = 1 ,
1414 gpu : int = 1 ,
1515 mem : str = "1Gi" ,
16+ ephemeral_storage : str = "1Gi" ,
1617 env : Optional [dict [str , str ]] = None ,
1718 download_inputs : bool = False ,
1819 download_inputs_mem : str = "500Mi" ,
@@ -36,6 +37,7 @@ def __init__(
3637 self ._cpu = cpu
3738 self ._gpu = gpu
3839 self ._mem = mem
40+ self ._ephemeral_storage = ephemeral_storage
3941 self ._download_inputs_mem = download_inputs_mem
4042 self ._download_inputs_cpu = download_inputs_cpu
4143 self ._env = env
@@ -58,11 +60,13 @@ def __init__(
5860 "cpu" : self ._cpu ,
5961 "nvidia.com/gpu" : self ._gpu ,
6062 "memory" : self ._mem ,
63+ "ephemeral-storage" : self ._ephemeral_storage ,
6164 },
6265 limits = {
6366 "cpu" : self ._cpu ,
6467 "nvidia.com/gpu" : self ._gpu ,
6568 "memory" : self ._mem ,
69+ "ephemeral-storage" : self ._ephemeral_storage ,
6670 },
6771 ),
6872 restart_policy = "Always" , # treat this container as a sidecar
You can’t perform that action at this time.
0 commit comments