code-kern-ai · andhreljaKern · Oct 21, 2025 · Sep 25, 2025 · Sep 25, 2025 · Oct 15, 2025
diff --git a/app.py b/app.py
@@ -1,17 +1,35 @@
-# -*- coding: utf-8 -*-
 from fastapi import FastAPI, responses, status, Request
 from typing import Union
 
 import torch
+import logging
+import os
 
 from src.util import request_util
 from src.data import data_type
 import controller
 
 from submodules.model.business_objects import general
-from submodules.model import session
+from submodules.model import session, telemetry
+
+
+OTLP_GRPC_ENDPOINT = os.getenv("OTLP_GRPC_ENDPOINT", "tempo:4317")
+
+app_name = "refinery-embedder"
+app = FastAPI(title=app_name)
+
+if telemetry.ENABLE_TELEMETRY:
+    print("WARNING:  Running telemetry.", flush=True)
+    telemetry.setting_app_name(app_name)
+    telemetry.setting_otlp(app, app_name=app_name, endpoint=OTLP_GRPC_ENDPOINT)
+    app.add_middleware(telemetry.PrometheusMiddleware, app_name=app_name)
+    app.add_route("/metrics", telemetry.metrics)
+
+    # Filter out /metrics
+    logging.getLogger("uvicorn.access").addFilter(
+        lambda record: "GET /metrics" not in record.getMessage()
+    )
 
-app = FastAPI()
 
 if torch.cuda.is_available():
     print(

diff --git a/controller.py b/controller.py
@@ -25,13 +25,13 @@
     PrivatemodeAISentenceEmbedder,
 )
 from src.embedders.classification.reduce import PCASentenceReducer  # noqa: F401
-from src.util import daemon, request_util
+from src.util import request_util
 from src.util.decorator import param_throttle
 from src.util.embedders import get_embedder
 from src.util.notification import send_project_update, embedding_warning_templates
 
 from submodules.s3 import controller as s3
-from submodules.model import enums
+from submodules.model import enums, daemon
 from submodules.model.business_objects import (
     attribute,
     embedding,
@@ -120,7 +120,7 @@ def get_docbins(
 
 
 def manage_encoding_thread(project_id: str, embedding_id: str) -> int:
-    daemon.run(prepare_run, project_id, embedding_id)
+    daemon.run_without_db_token(prepare_run, project_id, embedding_id)
     return status.HTTP_200_OK
 
 

diff --git a/src/util/config_handler.py b/src/util/config_handler.py
@@ -1,7 +1,7 @@
 from typing import Dict, Any, Optional, Union
 import requests
 import time
-from src.util import daemon
+from submodules.model import daemon
 
 __config = None
 
@@ -25,7 +25,7 @@ def refresh_config():
         )
     global __config
     __config = response.json()
-    daemon.run(invalidate_after, 3600)  # one hour
+    daemon.run_without_db_token(invalidate_after, 3600)  # one hour
 
 
 def get_config_value(

diff --git a/src/util/daemon.py b/src/util/daemon.py
diff --git a/start b/start
@@ -2,11 +2,13 @@
 
 DEBUG_MODE=false
 DEBUG_PORT=15675
+ENABLE_TELEMETRY=false
 
-while getopts d flag
+while getopts dg flag
 do
     case "${flag}" in
         d) DEBUG_MODE=true;;
+        g) ENABLE_TELEMETRY=true;;
     esac
 done
 
@@ -68,6 +70,7 @@ docker run -d --rm \
 -e MODEL_PROVIDER=http://refinery-model-provider:80 \
 -e WS_NOTIFY_ENDPOINT="http://refinery-websocket:8080" \
 -e NEURAL_SEARCH=http://refinery-neural-search:80 \
+-e ENABLE_TELEMETRY=$ENABLE_TELEMETRY \
 --mount type=bind,source="$(pwd)"/,target=/app \
 -v /var/run/docker.sock:/var/run/docker.sock \
 -v "$MODEL_DIR":/models \

diff --git a/submodules/model b/submodules/model
+4 −4		business_objects/monitor.py
+51 −4		daemon.py
+188 −0		telemetry.py