Skip to content

Commit ec05ef8

Browse files
Enable snapshot
1 parent c724341 commit ec05ef8

File tree

2 files changed

+36
-30
lines changed

2 files changed

+36
-30
lines changed

inference/core/interfaces/webrtc_worker/modal.py

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
MODELS_CACHE_AUTH_CACHE_MAX_SIZE,
1616
MODELS_CACHE_AUTH_CACHE_TTL,
1717
MODELS_CACHE_AUTH_ENABLED,
18+
PRELOAD_HF_IDS,
1819
PROJECT,
1920
ROBOFLOW_INTERNAL_SERVICE_SECRET,
2021
WEBRTC_MODAL_APP_NAME,
@@ -82,44 +83,45 @@
8283
"enable_memory_snapshot": WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT,
8384
"max_inputs": WEBRTC_MODAL_FUNCTION_MAX_INPUTS,
8485
"env": {
85-
"ROBOFLOW_INTERNAL_SERVICE_SECRET": ROBOFLOW_INTERNAL_SERVICE_SECRET,
86-
"ROBOFLOW_INTERNAL_SERVICE_NAME": WEBRTC_MODAL_ROBOFLOW_INTERNAL_SERVICE_NAME,
87-
"PROJECT": PROJECT,
88-
"LOG_LEVEL": LOG_LEVEL,
89-
"INTERNAL_WEIGHTS_URL_SUFFIX": INTERNAL_WEIGHTS_URL_SUFFIX,
90-
"MODELS_CACHE_AUTH_ENABLED": str(MODELS_CACHE_AUTH_ENABLED),
91-
"MODELS_CACHE_AUTH_CACHE_TTL": str(MODELS_CACHE_AUTH_CACHE_TTL),
92-
"MODELS_CACHE_AUTH_CACHE_MAX_SIZE": str(MODELS_CACHE_AUTH_CACHE_MAX_SIZE),
93-
"METRICS_ENABLED": "False",
9486
"ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS": str(
9587
ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS
9688
),
97-
"WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE": WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE,
89+
"ALLOW_WORKFLOW_BLOCKS_ACCESSING_ENVIRONMENTAL_VARIABLES": "False",
90+
"DISABLE_INFERENCE_CACHE": "True",
91+
"DISABLE_VERSION_CHECK": "True",
92+
"HF_HOME": Path(MODEL_CACHE_DIR).joinpath("hf_home").as_posix(),
93+
"INTERNAL_WEIGHTS_URL_SUFFIX": INTERNAL_WEIGHTS_URL_SUFFIX,
94+
"METRICS_ENABLED": "False",
9895
"MODAL_TOKEN_ID": MODAL_TOKEN_ID,
9996
"MODAL_TOKEN_SECRET": MODAL_TOKEN_SECRET,
10097
"MODAL_WORKSPACE_NAME": MODAL_WORKSPACE_NAME,
101-
"ALLOW_WORKFLOW_BLOCKS_ACCESSING_ENVIRONMENTAL_VARIABLES": "False",
102-
"DISABLE_VERSION_CHECK": "True",
10398
"MODEL_CACHE_DIR": MODEL_CACHE_DIR,
104-
"HF_HOME": Path(MODEL_CACHE_DIR).joinpath("hf_home").as_posix(),
99+
"MODELS_CACHE_AUTH_CACHE_MAX_SIZE": str(MODELS_CACHE_AUTH_CACHE_MAX_SIZE),
100+
"MODELS_CACHE_AUTH_CACHE_TTL": str(MODELS_CACHE_AUTH_CACHE_TTL),
101+
"MODELS_CACHE_AUTH_ENABLED": str(MODELS_CACHE_AUTH_ENABLED),
102+
"LOG_LEVEL": LOG_LEVEL,
103+
"ONNXRUNTIME_EXECUTION_PROVIDERS": "[CUDAExecutionProvider,CPUExecutionProvider]",
104+
"PRELOAD_HF_IDS": PRELOAD_HF_IDS,
105+
"PROJECT": PROJECT,
106+
"ROBOFLOW_INTERNAL_SERVICE_NAME": WEBRTC_MODAL_ROBOFLOW_INTERNAL_SERVICE_NAME,
107+
"ROBOFLOW_INTERNAL_SERVICE_SECRET": ROBOFLOW_INTERNAL_SERVICE_SECRET,
108+
"WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE": WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE,
105109
"TELEMETRY_USE_PERSISTENT_QUEUE": "False",
106-
"DISABLE_INFERENCE_CACHE": "True",
107-
"WEBRTC_MODAL_FUNCTION_GPU": WEBRTC_MODAL_FUNCTION_GPU,
108-
"WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW": str(
109-
WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW
110-
),
111110
"WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS": str(
112111
WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS
113112
),
113+
"WEBRTC_MODAL_FUNCTION_GPU": WEBRTC_MODAL_FUNCTION_GPU,
114114
"WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS": str(
115115
WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS
116116
),
117+
"WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW": str(
118+
WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW
119+
),
117120
"WEBRTC_MODAL_FUNCTION_TIME_LIMIT": str(WEBRTC_MODAL_FUNCTION_TIME_LIMIT),
118121
"WEBRTC_MODAL_IMAGE_NAME": WEBRTC_MODAL_IMAGE_NAME,
119122
"WEBRTC_MODAL_IMAGE_TAG": WEBRTC_MODAL_IMAGE_TAG,
120123
"WEBRTC_MODAL_RTSP_PLACEHOLDER": WEBRTC_MODAL_RTSP_PLACEHOLDER,
121124
"WEBRTC_MODAL_RTSP_PLACEHOLDER_URL": WEBRTC_MODAL_RTSP_PLACEHOLDER_URL,
122-
"ONNXRUNTIME_EXECUTION_PROVIDERS": "[CUDAExecutionProvider,CPUExecutionProvider]",
123125
},
124126
"volumes": {MODEL_CACHE_DIR: rfcache_volume},
125127
}
@@ -134,6 +136,7 @@ def rtc_peer_connection_modal(
134136
q: modal.Queue,
135137
):
136138
logger.info("*** Spawning %s:", self.__class__.__name__)
139+
logger.info("Inference tag: %s", docker_tag)
137140
_exec_session_started = datetime.datetime.now()
138141
webrtc_request.processing_session_started = _exec_session_started
139142
logger.info(
@@ -211,9 +214,10 @@ def send_answer(obj: WebRTCWorkerResult):
211214
logger.info("Function completed")
212215

213216
# https://modal.com/docs/reference/modal.enter
214-
# Modal usage calculation is relying on no concurrency and no hot instances
215-
@modal.enter()
217+
# https://modal.com/docs/guide/memory-snapshot#gpu-memory-snapshot
218+
@modal.enter(snap=True)
216219
def start(self):
220+
# TODO: pre-load models
217221
logger.info("Starting container")
218222

219223
@modal.exit()
@@ -231,7 +235,6 @@ class RTCPeerConnectionModalCPU(RTCPeerConnectionModal):
231235
@app.cls(
232236
**{
233237
**decorator_kwargs,
234-
"enable_memory_snapshot": False,
235238
"gpu": WEBRTC_MODAL_FUNCTION_GPU, # https://modal.com/docs/guide/gpu#specifying-gpu-type
236239
"experimental_options": {
237240
"enable_gpu_snapshot": WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT
@@ -280,18 +283,22 @@ def spawn_rtc_peer_connection_modal(
280283
)
281284
except modal.exception.NotFoundError:
282285
logger.info("Deploying webrtc modal app %s", WEBRTC_MODAL_APP_NAME)
283-
app.deploy(name=WEBRTC_MODAL_APP_NAME, client=client)
286+
app.deploy(name=WEBRTC_MODAL_APP_NAME, client=client, tag=docker_tag)
284287

285-
workspace_id = None
286-
try:
287-
workspace_id = get_roboflow_workspace(api_key=webrtc_request.api_key)
288-
except Exception as e:
289-
pass
288+
workspace_id = webrtc_request.workflow_configuration.workspace_name
289+
if not workspace_id:
290+
try:
291+
workspace_id = get_roboflow_workspace(api_key=webrtc_request.api_key)
292+
webrtc_request.workflow_configuration.workspace_name = workspace_id
293+
except Exception:
294+
pass
290295

291296
tags = {"tag": docker_tag}
292297
if workspace_id:
293298
tags["workspace_id"] = workspace_id
294299

300+
# TODO: tag function run
301+
295302
if webrtc_request.requested_gpu:
296303
RTCPeerConnectionModal = RTCPeerConnectionModalGPU
297304
else:

inference/core/interfaces/webrtc_worker/webrtc.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,7 @@ def on_track(track: RemoteStreamTrack):
657657

658658
@peer_connection.on("connectionstatechange")
659659
async def on_connectionstatechange():
660-
logger.info("Connection state is %s", peer_connection.connectionState)
660+
logger.info("on_connectionstatechange: %s", peer_connection.connectionState)
661661
if peer_connection.connectionState in {"failed", "closed"}:
662662
if video_processor.track:
663663
logger.info("Stopping video processor track")
@@ -666,7 +666,6 @@ async def on_connectionstatechange():
666666
logger.info("Stopping WebRTC peer")
667667
await peer_connection.close()
668668
terminate_event.set()
669-
logger.info("'connectionstatechange' event handler finished")
670669

671670
@peer_connection.on("datachannel")
672671
def on_datachannel(channel: RTCDataChannel):

0 commit comments

Comments
 (0)