1515 MODELS_CACHE_AUTH_CACHE_MAX_SIZE ,
1616 MODELS_CACHE_AUTH_CACHE_TTL ,
1717 MODELS_CACHE_AUTH_ENABLED ,
18+ PRELOAD_HF_IDS ,
1819 PROJECT ,
1920 ROBOFLOW_INTERNAL_SERVICE_SECRET ,
2021 WEBRTC_MODAL_APP_NAME ,
8283 "enable_memory_snapshot" : WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT ,
8384 "max_inputs" : WEBRTC_MODAL_FUNCTION_MAX_INPUTS ,
8485 "env" : {
85- "ROBOFLOW_INTERNAL_SERVICE_SECRET" : ROBOFLOW_INTERNAL_SERVICE_SECRET ,
86- "ROBOFLOW_INTERNAL_SERVICE_NAME" : WEBRTC_MODAL_ROBOFLOW_INTERNAL_SERVICE_NAME ,
87- "PROJECT" : PROJECT ,
88- "LOG_LEVEL" : LOG_LEVEL ,
89- "INTERNAL_WEIGHTS_URL_SUFFIX" : INTERNAL_WEIGHTS_URL_SUFFIX ,
90- "MODELS_CACHE_AUTH_ENABLED" : str (MODELS_CACHE_AUTH_ENABLED ),
91- "MODELS_CACHE_AUTH_CACHE_TTL" : str (MODELS_CACHE_AUTH_CACHE_TTL ),
92- "MODELS_CACHE_AUTH_CACHE_MAX_SIZE" : str (MODELS_CACHE_AUTH_CACHE_MAX_SIZE ),
93- "METRICS_ENABLED" : "False" ,
9486 "ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS" : str (
9587 ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS
9688 ),
97- "WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE" : WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE ,
89+ "ALLOW_WORKFLOW_BLOCKS_ACCESSING_ENVIRONMENTAL_VARIABLES" : "False" ,
90+ "DISABLE_INFERENCE_CACHE" : "True" ,
91+ "DISABLE_VERSION_CHECK" : "True" ,
92+ "HF_HOME" : Path (MODEL_CACHE_DIR ).joinpath ("hf_home" ).as_posix (),
93+ "INTERNAL_WEIGHTS_URL_SUFFIX" : INTERNAL_WEIGHTS_URL_SUFFIX ,
94+ "METRICS_ENABLED" : "False" ,
9895 "MODAL_TOKEN_ID" : MODAL_TOKEN_ID ,
9996 "MODAL_TOKEN_SECRET" : MODAL_TOKEN_SECRET ,
10097 "MODAL_WORKSPACE_NAME" : MODAL_WORKSPACE_NAME ,
101- "ALLOW_WORKFLOW_BLOCKS_ACCESSING_ENVIRONMENTAL_VARIABLES" : "False" ,
102- "DISABLE_VERSION_CHECK" : "True" ,
10398 "MODEL_CACHE_DIR" : MODEL_CACHE_DIR ,
104- "HF_HOME" : Path (MODEL_CACHE_DIR ).joinpath ("hf_home" ).as_posix (),
99+ "MODELS_CACHE_AUTH_CACHE_MAX_SIZE" : str (MODELS_CACHE_AUTH_CACHE_MAX_SIZE ),
100+ "MODELS_CACHE_AUTH_CACHE_TTL" : str (MODELS_CACHE_AUTH_CACHE_TTL ),
101+ "MODELS_CACHE_AUTH_ENABLED" : str (MODELS_CACHE_AUTH_ENABLED ),
102+ "LOG_LEVEL" : LOG_LEVEL ,
103+ "ONNXRUNTIME_EXECUTION_PROVIDERS" : "[CUDAExecutionProvider,CPUExecutionProvider]" ,
104+ "PRELOAD_HF_IDS" : PRELOAD_HF_IDS ,
105+ "PROJECT" : PROJECT ,
106+ "ROBOFLOW_INTERNAL_SERVICE_NAME" : WEBRTC_MODAL_ROBOFLOW_INTERNAL_SERVICE_NAME ,
107+ "ROBOFLOW_INTERNAL_SERVICE_SECRET" : ROBOFLOW_INTERNAL_SERVICE_SECRET ,
108+ "WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE" : WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE ,
105109 "TELEMETRY_USE_PERSISTENT_QUEUE" : "False" ,
106- "DISABLE_INFERENCE_CACHE" : "True" ,
107- "WEBRTC_MODAL_FUNCTION_GPU" : WEBRTC_MODAL_FUNCTION_GPU ,
108- "WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW" : str (
109- WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW
110- ),
111110 "WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS" : str (
112111 WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS
113112 ),
113+ "WEBRTC_MODAL_FUNCTION_GPU" : WEBRTC_MODAL_FUNCTION_GPU ,
114114 "WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS" : str (
115115 WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS
116116 ),
117+ "WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW" : str (
118+ WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW
119+ ),
117120 "WEBRTC_MODAL_FUNCTION_TIME_LIMIT" : str (WEBRTC_MODAL_FUNCTION_TIME_LIMIT ),
118121 "WEBRTC_MODAL_IMAGE_NAME" : WEBRTC_MODAL_IMAGE_NAME ,
119122 "WEBRTC_MODAL_IMAGE_TAG" : WEBRTC_MODAL_IMAGE_TAG ,
120123 "WEBRTC_MODAL_RTSP_PLACEHOLDER" : WEBRTC_MODAL_RTSP_PLACEHOLDER ,
121124 "WEBRTC_MODAL_RTSP_PLACEHOLDER_URL" : WEBRTC_MODAL_RTSP_PLACEHOLDER_URL ,
122- "ONNXRUNTIME_EXECUTION_PROVIDERS" : "[CUDAExecutionProvider,CPUExecutionProvider]" ,
123125 },
124126 "volumes" : {MODEL_CACHE_DIR : rfcache_volume },
125127 }
@@ -134,6 +136,7 @@ def rtc_peer_connection_modal(
134136 q : modal .Queue ,
135137 ):
136138 logger .info ("*** Spawning %s:" , self .__class__ .__name__ )
139+ logger .info ("Inference tag: %s" , docker_tag )
137140 _exec_session_started = datetime .datetime .now ()
138141 webrtc_request .processing_session_started = _exec_session_started
139142 logger .info (
@@ -211,9 +214,10 @@ def send_answer(obj: WebRTCWorkerResult):
211214 logger .info ("Function completed" )
212215
213216 # https://modal.com/docs/reference/modal.enter
214- # Modal usage calculation is relying on no concurrency and no hot instances
215- @modal .enter ()
217+ # https://modal.com/docs/guide/memory-snapshot#gpu-memory-snapshot
218+ @modal .enter (snap = True )
216219 def start (self ):
220+ # TODO: pre-load models
217221 logger .info ("Starting container" )
218222
219223 @modal .exit ()
@@ -231,7 +235,6 @@ class RTCPeerConnectionModalCPU(RTCPeerConnectionModal):
231235 @app .cls (
232236 ** {
233237 ** decorator_kwargs ,
234- "enable_memory_snapshot" : False ,
235238 "gpu" : WEBRTC_MODAL_FUNCTION_GPU , # https://modal.com/docs/guide/gpu#specifying-gpu-type
236239 "experimental_options" : {
237240 "enable_gpu_snapshot" : WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT
@@ -280,18 +283,22 @@ def spawn_rtc_peer_connection_modal(
280283 )
281284 except modal .exception .NotFoundError :
282285 logger .info ("Deploying webrtc modal app %s" , WEBRTC_MODAL_APP_NAME )
283- app .deploy (name = WEBRTC_MODAL_APP_NAME , client = client )
286+ app .deploy (name = WEBRTC_MODAL_APP_NAME , client = client , tag = docker_tag )
284287
285- workspace_id = None
286- try :
287- workspace_id = get_roboflow_workspace (api_key = webrtc_request .api_key )
288- except Exception as e :
289- pass
288+ workspace_id = webrtc_request .workflow_configuration .workspace_name
289+ if not workspace_id :
290+ try :
291+ workspace_id = get_roboflow_workspace (api_key = webrtc_request .api_key )
292+ webrtc_request .workflow_configuration .workspace_name = workspace_id
293+ except Exception :
294+ pass
290295
291296 tags = {"tag" : docker_tag }
292297 if workspace_id :
293298 tags ["workspace_id" ] = workspace_id
294299
300+ # TODO: tag function run
301+
295302 if webrtc_request .requested_gpu :
296303 RTCPeerConnectionModal = RTCPeerConnectionModalGPU
297304 else :
0 commit comments