diff --git a/.github/workflows/client-linters.yml b/.github/workflows/client-linters.yml
index 7fab009f..203a7934 100644
--- a/.github/workflows/client-linters.yml
+++ b/.github/workflows/client-linters.yml
@@ -34,6 +34,3 @@ jobs:
       - name: Run Type Check
         run: npx --package=typescript@latest -- tsc --build .
         working-directory: client
-
-
-
diff --git a/.github/workflows/deploy-to-k8s.yml b/.github/workflows/deploy-to-k8s.yml
index 05bc1571..3d40c0de 100644
--- a/.github/workflows/deploy-to-k8s.yml
+++ b/.github/workflows/deploy-to-k8s.yml
@@ -217,7 +217,6 @@ jobs:
           build-args: API_URL=${{ needs.setup.outputs.api_url }}
           platforms: linux/amd64
 
-
   deploy:
     needs:
       - build-client
@@ -248,6 +247,7 @@ jobs:
             AUTH_URL=auth.whiteboard.student.k8s.aet.cit.tum.de
             GENAI_URL=genai.whiteboard.student.k8s.aet.cit.tum.de
             REALTIME_URL=realtime.whiteboard.student.k8s.aet.cit.tum.de
+            METRICS_URL=metrics.whiteboard.student.k8s.aet.cit.tum.de
             echo "NAMESPACE=production" >> $GITHUB_ENV
             echo "IMAGE_TAG=latest" >> $GITHUB_ENV
             echo "VALUES_FILE=./infrastructure/whiteboard-app/production.values.yaml" >> $GITHUB_ENV
@@ -261,6 +261,7 @@ jobs:
             AUTH_URL=staging.auth.whiteboard.student.k8s.aet.cit.tum.de
             GENAI_URL=staging.genai.whiteboard.student.k8s.aet.cit.tum.de
             REALTIME_URL=staging.realtime.whiteboard.student.k8s.aet.cit.tum.de
+            METRICS_URL=staging.metrics.whiteboard.student.k8s.aet.cit.tum.de
             echo "NAMESPACE=staging" >> $GITHUB_ENV
             echo "IMAGE_TAG=develop" >> $GITHUB_ENV
             echo "VALUES_FILE=./infrastructure/whiteboard-app/staging.values.yaml" >> $GITHUB_ENV
@@ -275,6 +276,7 @@ jobs:
             AUTH_URL=$BRANCH_SAFE.auth.whiteboard.student.k8s.aet.cit.tum.de
             GENAI_URL=$BRANCH_SAFE.genai.whiteboard.student.k8s.aet.cit.tum.de
             REALTIME_URL=$BRANCH_SAFE.realtime.whiteboard.student.k8s.aet.cit.tum.de
+            METRICS_URL=$BRANCH_SAFE.metrics.whiteboard.student.k8s.aet.cit.tum.de
             echo "NAMESPACE=$BRANCH_SAFE" >> $GITHUB_ENV
             echo "IMAGE_TAG=$BRANCH_SAFE" >> $GITHUB_ENV
             echo "VALUES_FILE=./infrastructure/whiteboard-app/pullrequest.values.yaml" >> $GITHUB_ENV
@@ -290,6 +292,7 @@ jobs:
           echo "GENAI_URL=$GENAI_URL" >> $GITHUB_ENV
           echo "OPEN_WEB_UI_API_KEY=${{ secrets.OPEN_WEB_UI_API_KEY }}" >> $GITHUB_ENV
           echo "REALTIME_URL=$REALTIME_URL" >> $GITHUB_ENV
+          echo "METRICS_URL=$METRICS_URL" >> $GITHUB_ENV
           echo "KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET" >> $GITHUB_ENV
           echo "NEXTAUTH_SECRET=$NEXTAUTH_SECRET" >> $GITHUB_ENV
           echo "POSTGRESQL_SECRET=$POSTGRESQL_SECRET" >> $GITHUB_ENV
@@ -313,15 +316,17 @@ jobs:
       
           if [[ "$BRANCH" == "main" ]]; then
             RELEASE_NAME="whiteboard-production"
+            OBSERVABILITY_RELEASE_NAME="whiteboard-observability-production"
           elif [[ "$BRANCH" == "develop" ]]; then
             RELEASE_NAME="whiteboard-staging"
+            OBSERVABILITY_RELEASE_NAME="whiteboard-observability-staging"
           else
             PR_NUMBER=${{ github.event.pull_request.number }}
             RELEASE_NAME="whiteboard-pr-${PR_NUMBER}"
           fi
       
           echo "RELEASE_NAME=${RELEASE_NAME}" >> $GITHUB_ENV
-          echo "release-name=${RELEASE_NAME}" >> $GITHUB_OUTPUT
+          echo "OBSERVABILITY_RELEASE_NAME=${OBSERVABILITY_RELEASE_NAME}" >> $GITHUB_ENV
 
       - name: Install Helm
         uses: azure/setup-helm@v3
@@ -352,6 +357,23 @@ jobs:
             --set keycloak.externalDatabase.password="${{ env.POSTGRESQL_SECRET }}" \
             --set keycloak.auth.adminPassword="${{ env.KEYCLOAK_SECRET }}" \
 
+      - name: Deploy Observability Stack with Helm
+        if: github.ref == 'refs/heads/develop' || github.ref == 'refs/heads/main'
+        run: |
+          helm upgrade ${{ env.OBSERVABILITY_RELEASE_NAME }} ./infrastructure/whiteboard-observability/ \
+            -f ${{ env.VALUES_FILE }} \
+            -n tsd-${{ env.NAMESPACE }} \
+            --create-namespace \
+            --install \
+            --atomic \
+            --kubeconfig ${{ env.KUBECONFIG }} \
+            --set namespace="${{ env.NAMESPACE }}" \
+            --set client.url="${{ env.CLIENT_URL }}" \
+            --set server.url="${{ env.SERVER_URL }}" \
+            --set genai.url="${{ env.GENAI_URL }}" \
+            --set realtime.url="${{ env.REALTIME_URL }}" \
+            --set metrics.url="${{ env.METRICS_URL }}"
+
   comment-pr:
     needs: deploy
     runs-on: ubuntu-latest
diff --git a/.github/workflows/genai-linters.yml b/.github/workflows/genai-linters.yml
index 209e78e7..0f871a48 100644
--- a/.github/workflows/genai-linters.yml
+++ b/.github/workflows/genai-linters.yml
@@ -29,7 +29,7 @@ jobs:
         run: |
           cd genai
           ruff check .
-      
+
       - name: GenAI format (auto-fix)
         run: |
           cd genai
diff --git a/.github/workflows/genai-tests.yml b/.github/workflows/genai-tests.yml
index 0578a198..2ea3caa7 100644
--- a/.github/workflows/genai-tests.yml
+++ b/.github/workflows/genai-tests.yml
@@ -26,7 +26,7 @@ jobs:
           uv pip install -r ./genai/requirements.txt --system
 
       - name: GenAI tests
-        env: 
+        env:
          OPEN_WEB_UI_API_KEY: ${{ secrets.OPEN_WEB_UI_API_KEY }}
          API_URL: ${{ vars.API_URL }}
         run: |
diff --git a/client/src/api/genai/generated/api.ts b/client/src/api/genai/generated/api.ts
index fb44ba26..74d24aba 100644
--- a/client/src/api/genai/generated/api.ts
+++ b/client/src/api/genai/generated/api.ts
@@ -60,10 +60,10 @@ export interface HTTPValidationError {
 export interface TextRequest {
   /**
    *
-   * @type {Array<string>}
+   * @type {string}
    * @memberof TextRequest
    */
-  user_text: Array<string>;
+  user_text: string;
 }
 /**
  *
@@ -211,6 +211,45 @@ export const DefaultApiAxiosParamCreator = function (
         options: localVarRequestOptions,
       };
     },
+    /**
+     * Endpoint that serves Prometheus metrics.
+     * @summary Metrics
+     * @param {*} [options] Override http request option.
+     * @throws {RequiredError}
+     */
+    metricsMetricsGet: async (
+      options: RawAxiosRequestConfig = {},
+    ): Promise<RequestArgs> => {
+      const localVarPath = `/metrics`;
+      // use dummy base URL string because the URL constructor only accepts absolute URLs.
+      const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL);
+      let baseOptions;
+      if (configuration) {
+        baseOptions = configuration.baseOptions;
+      }
+
+      const localVarRequestOptions = {
+        method: "GET",
+        ...baseOptions,
+        ...options,
+      };
+      const localVarHeaderParameter = {} as any;
+      const localVarQueryParameter = {} as any;
+
+      setSearchParams(localVarUrlObj, localVarQueryParameter);
+      let headersFromBaseOptions =
+        baseOptions && baseOptions.headers ? baseOptions.headers : {};
+      localVarRequestOptions.headers = {
+        ...localVarHeaderParameter,
+        ...headersFromBaseOptions,
+        ...options.headers,
+      };
+
+      return {
+        url: toPathString(localVarUrlObj),
+        options: localVarRequestOptions,
+      };
+    },
     /**
      *
      * @summary Rephrase Text
@@ -382,6 +421,32 @@ export const DefaultApiFp = function (configuration?: Configuration) {
           configuration,
         )(axios, localVarOperationServerBasePath || basePath);
     },
+    /**
+     * Endpoint that serves Prometheus metrics.
+     * @summary Metrics
+     * @param {*} [options] Override http request option.
+     * @throws {RequiredError}
+     */
+    async metricsMetricsGet(
+      options?: RawAxiosRequestConfig,
+    ): Promise<
+      (axios?: AxiosInstance, basePath?: string) => AxiosPromise<any>
+    > {
+      const localVarAxiosArgs =
+        await localVarAxiosParamCreator.metricsMetricsGet(options);
+      const localVarOperationServerIndex = configuration?.serverIndex ?? 0;
+      const localVarOperationServerBasePath =
+        operationServerMap["DefaultApi.metricsMetricsGet"]?.[
+          localVarOperationServerIndex
+        ]?.url;
+      return (axios, basePath) =>
+        createRequestFunction(
+          localVarAxiosArgs,
+          globalAxios,
+          BASE_PATH,
+          configuration,
+        )(axios, localVarOperationServerBasePath || basePath);
+    },
     /**
      *
      * @summary Rephrase Text
@@ -484,6 +549,17 @@ export const DefaultApiFactory = function (
         .healthCheckHealthGet(options)
         .then((request) => request(axios, basePath));
     },
+    /**
+     * Endpoint that serves Prometheus metrics.
+     * @summary Metrics
+     * @param {*} [options] Override http request option.
+     * @throws {RequiredError}
+     */
+    metricsMetricsGet(options?: RawAxiosRequestConfig): AxiosPromise<any> {
+      return localVarFp
+        .metricsMetricsGet(options)
+        .then((request) => request(axios, basePath));
+    },
     /**
      *
      * @summary Rephrase Text
@@ -554,6 +630,19 @@ export class DefaultApi extends BaseAPI {
       .then((request) => request(this.axios, this.basePath));
   }
 
+  /**
+   * Endpoint that serves Prometheus metrics.
+   * @summary Metrics
+   * @param {*} [options] Override http request option.
+   * @throws {RequiredError}
+   * @memberof DefaultApi
+   */
+  public metricsMetricsGet(options?: RawAxiosRequestConfig) {
+    return DefaultApiFp(this.configuration)
+      .metricsMetricsGet(options)
+      .then((request) => request(this.axios, this.basePath));
+  }
+
   /**
    *
    * @summary Rephrase Text
diff --git a/client/src/api/genai/generated/docs/DefaultApi.md b/client/src/api/genai/generated/docs/DefaultApi.md
index 6024228d..f5b18353 100644
--- a/client/src/api/genai/generated/docs/DefaultApi.md
+++ b/client/src/api/genai/generated/docs/DefaultApi.md
@@ -6,6 +6,7 @@ All URIs are relative to *http://localhost:8000*
 |------------- | ------------- | -------------|
 |[**completeTextCompletionPost**](#completetextcompletionpost) | **POST** /completion | Complete Text|
 |[**healthCheckHealthGet**](#healthcheckhealthget) | **GET** /health | Health Check|
+|[**metricsMetricsGet**](#metricsmetricsget) | **GET** /metrics | Metrics|
 |[**rephraseTextRephrasePost**](#rephrasetextrephrasepost) | **POST** /rephrase | Rephrase Text|
 |[**summarizeTextSummarizationPost**](#summarizetextsummarizationpost) | **POST** /summarization | Summarize Text|
 
@@ -83,6 +84,50 @@ const { status, data } = await apiInstance.healthCheckHealthGet();
 This endpoint does not have any parameters.
 
 
+### Return type
+
+**any**
+
+### Authorization
+
+No authorization required
+
+### HTTP request headers
+
+ - **Content-Type**: Not defined
+ - **Accept**: application/json
+
+
+### HTTP response details
+| Status code | Description | Response headers |
+|-------------|-------------|------------------|
+|**200** | Successful Response |  -  |
+
+[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
+
+# **metricsMetricsGet**
+> any metricsMetricsGet()
+
+Endpoint that serves Prometheus metrics.
+
+### Example
+
+```typescript
+import {
+    DefaultApi,
+    Configuration
+} from './api';
+
+const configuration = new Configuration();
+const apiInstance = new DefaultApi(configuration);
+
+const { status, data } = await apiInstance.metricsMetricsGet();
+```
+
+### Parameters
+This endpoint does not have any parameters.
+
+
 ### Return type
 
 **any**
diff --git a/client/src/api/genai/generated/docs/TextRequest.md b/client/src/api/genai/generated/docs/TextRequest.md
index 4501c09f..98c5e74a 100644
--- a/client/src/api/genai/generated/docs/TextRequest.md
+++ b/client/src/api/genai/generated/docs/TextRequest.md
@@ -5,7 +5,7 @@
 
 Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
-**user_text** | **Array&lt;string&gt;** |  | [default to undefined]
+**user_text** | **string** |  | [default to undefined]
 
 ## Example
 
diff --git a/client/src/components/style-bar/StyleBar.tsx b/client/src/components/style-bar/StyleBar.tsx
index 68a6832a..955dea3d 100644
--- a/client/src/components/style-bar/StyleBar.tsx
+++ b/client/src/components/style-bar/StyleBar.tsx
@@ -100,11 +100,11 @@ const StyleBar = ({
       let data;
 
       if (action === "rephrase") {
-        data = await rephraseText({ user_text: [selectedNodeLabel] });
+        data = await rephraseText({ user_text: selectedNodeLabel });
       } else if (action === "complete") {
-        data = await completeText({ user_text: [selectedNodeLabel] });
+        data = await completeText({ user_text: selectedNodeLabel });
       } else {
-        data = await summarizedText({ user_text: [selectedNodeLabel] });
+        data = await summarizedText({ user_text: selectedNodeLabel });
       }
 
       const llmResponse = data.llm_response;
diff --git a/client/src/components/text-node/TextNode.tsx b/client/src/components/text-node/TextNode.tsx
index 0b7d7aca..c275efb9 100644
--- a/client/src/components/text-node/TextNode.tsx
+++ b/client/src/components/text-node/TextNode.tsx
@@ -7,8 +7,6 @@ import {
   Position,
   useReactFlow,
 } from "@xyflow/react";
-import { Eye, EyeOff } from "lucide-react";
-import { Button } from "@/components/ui/button";
 import {
   getFontStyle,
   handleStyle,
@@ -16,6 +14,9 @@ import {
 } from "@/types/NodeProperties";
 import StyleBar from "@/components/style-bar/StyleBar";
 import { updateNode } from "@/util/updateNode";
+import { useAmIOwner } from "@/hooks/api/whiteboard.api";
+import { useGetMe } from "@/hooks/api/account.api";
+import { useParams } from "next/navigation";
 
 interface TextNodeProps extends NodeProps {
   id: string;
@@ -40,9 +41,14 @@ function hexToRgb(hex: string) {
 export default function TextNode({ id, data, selected }: TextNodeProps) {
   const [isEditing, setIsEditing] = useState(false);
   const [text, setText] = useState<string>(data.label as string);
-  const [showStyleBar, setShowStyleBar] = useState(true);
   const { setNodes } = useReactFlow();
 
+  const params = useParams();
+  const whiteboardId = Number(params.id);
+
+  const { data: user } = useGetMe();
+  const { data: isOwner } = useAmIOwner(whiteboardId, user?.id);
+
   const { nodeProperties, label } = data;
   const bgRgb = hexToRgb(nodeProperties.color);
   const borderRgb = hexToRgb(nodeProperties.borderColor);
@@ -68,7 +74,7 @@ export default function TextNode({ id, data, selected }: TextNodeProps) {
 
   return (
     <>
-      {showStyleBar && (
+      {isOwner && (
         <NodeToolbar isVisible={selected} position={Position.Top}>
           <StyleBar
             nodeProperties={nodeProperties}
@@ -83,29 +89,13 @@ export default function TextNode({ id, data, selected }: TextNodeProps) {
         </NodeToolbar>
       )}
 
-      <NodeToolbar position={Position.Right}>
-        <div className="mt-2">
-          <Button
-            variant="ghost"
-            size="sm"
-            onClick={() => setShowStyleBar(!showStyleBar)}
-            className="h-7 w-7 p-1"
-          >
-            {showStyleBar ? (
-              <Eye className="h-4 w-4" />
-            ) : (
-              <EyeOff className="h-4 w-4" />
-            )}
-          </Button>
-        </div>
-      </NodeToolbar>
-
       <NodeResizer
         color="#3859ff"
         isVisible={selected}
         minWidth={100}
         minHeight={50}
       />
+
       <div className="h-full w-full">
         <div
           className="flex h-full w-full items-center justify-center p-4"
diff --git a/compose.yml b/compose.yml
index f6795cfe..24e3ed4c 100644
--- a/compose.yml
+++ b/compose.yml
@@ -1,6 +1,17 @@
 name: team-server-down
 
 services:
+  mailhog:
+    restart: always
+    build:
+      context: ./docker/mailhog
+      dockerfile: Dockerfile
+    ports:
+      - "1025:1025"
+      - "8025:8025"
+    networks:
+      - server
+
   keycloak:
     restart: always
     build:
@@ -170,6 +181,63 @@ services:
     networks:
       - server
 
+  grafana:
+    restart: always
+    build:
+      context: ./docker/grafana
+      dockerfile: Dockerfile
+    ports:
+      - "3001:3000"
+    volumes:
+      - grafana-data:/var/lib/grafana
+      - ./docker/grafana/provisioning:/etc/grafana/provisioning:ro
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+      - GF_FEATURE_TOGGLES_ENABLE=logsInExplore
+      - GF_LOG_CONSOLECOLORS=true
+    depends_on:
+      - prometheus
+    networks:
+      - server
+
+  prometheus:
+    restart: always
+    build:
+      context: ./docker/prometheus
+      dockerfile: Dockerfile
+    ports:
+      - "9092:9090"
+    volumes:
+      - ./docker/prometheus/config:/etc/prometheus
+      - prometheus-data:/prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/etc/prometheus/console_libraries'
+      - '--web.console.templates=/etc/prometheus/consoles'
+      - '--web.enable-lifecycle'
+    networks:
+      - server
+
+  alertmanager:
+    restart: always
+    build:
+      context: ./docker/alertmanager
+      dockerfile: Dockerfile
+    ports:
+      - "9093:9093"
+    volumes:
+      - ./docker/alertmanager/config.yml:/etc/alertmanager/config.yml:ro
+      - alertmanager-data:/alertmanager
+    command:
+      - '--config.file=/etc/alertmanager/config.yml'
+      - '--storage.path=/alertmanager'
+      - '--log.level=debug'
+    networks:
+      - server
+
 networks:
   server:
 
@@ -177,4 +245,7 @@ volumes:
   db-data:
   node-modules-client:
   redis-data:
-  redis-insight-data:
\ No newline at end of file
+  redis-insight-data:
+  grafana-data:
+  prometheus-data:
+  alertmanager-data:
\ No newline at end of file
diff --git a/docker/alertmanager/Dockerfile b/docker/alertmanager/Dockerfile
new file mode 100644
index 00000000..55f59f7a
--- /dev/null
+++ b/docker/alertmanager/Dockerfile
@@ -0,0 +1 @@
+FROM prom/alertmanager:v0.26.0
\ No newline at end of file
diff --git a/docker/alertmanager/config.yml b/docker/alertmanager/config.yml
new file mode 100644
index 00000000..32b599f1
--- /dev/null
+++ b/docker/alertmanager/config.yml
@@ -0,0 +1,17 @@
+global:
+  smtp_smarthost: 'mailhog:1025'
+  smtp_from: 'alertmanager@whiteboard.student.k8s.aet.cit.tum.de'
+  smtp_require_tls: false
+route:
+  receiver: 'mailhog-alerts'
+  group_by: [ 'alertname' ]
+  group_wait: 10s
+  group_interval: 1m
+  repeat_interval: 30m
+receivers:
+  - name: 'mailhog-alerts'
+    email_configs:
+      - to: 'teamserverdown@whiteboard.student.k8s.aet.cit.tum.de'
+        from: 'alertmanager@whiteboard.student.k8s.aet.cit.tum.de'
+        smarthost: 'mailhog:1025'
+        send_resolved: true
\ No newline at end of file
diff --git a/docker/grafana/Dockerfile b/docker/grafana/Dockerfile
new file mode 100644
index 00000000..e48a79f8
--- /dev/null
+++ b/docker/grafana/Dockerfile
@@ -0,0 +1 @@
+FROM grafana/grafana-oss:12.0.2-ubuntu
diff --git a/docker/grafana/provisioning/dashboards/dashboards.yml b/docker/grafana/provisioning/dashboards/dashboards.yml
new file mode 100644
index 00000000..0e8f7220
--- /dev/null
+++ b/docker/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,23 @@
+apiVersion: 1
+
+providers:
+  - name: "Server System Metrics Dashboard"
+    type: file
+    editable: true
+    updateIntervalSeconds: 10
+    options:
+      path: /etc/grafana/provisioning/dashboards/server-dashboard.json
+
+  - name: "GenAi System Metrics Dashboard"
+    type: file
+    editable: true
+    updateIntervalSeconds: 10
+    options:
+      path: /etc/grafana/provisioning/dashboards/genai-dashboard.json
+
+  - name: "Realtime System Metrics Dashboard"
+    type: file
+    editable: true
+    updateIntervalSeconds: 10
+    options:
+      path: /etc/grafana/provisioning/dashboards/realtime-dashboard.json
\ No newline at end of file
diff --git a/docker/grafana/provisioning/dashboards/genai-dashboard.json b/docker/grafana/provisioning/dashboards/genai-dashboard.json
new file mode 100644
index 00000000..bc3a1fc6
--- /dev/null
+++ b/docker/grafana/provisioning/dashboards/genai-dashboard.json
@@ -0,0 +1,522 @@
+{
+  "annotations":{
+    "list":[
+      {
+        "builtIn":1,
+        "datasource":{
+          "type":"grafana",
+          "uid":"-- Grafana --"
+        },
+        "enable":true,
+        "hide":true,
+        "iconColor":"rgba(0, 211, 255, 1)",
+        "name":"Annotations & Alerts",
+        "type":"dashboard"
+      },
+      {
+        "datasource":{
+          "type":"prometheus",
+          "uid":"PBFA97CFB590B2093"
+        },
+        "enable":true,
+        "hide":false,
+        "iconColor":"orange",
+        "name":"Client Error Spike",
+        "target":{
+          "expr":"sum(rate(http_requests_total{status=~\"4xx\"}[5m])) > 0.1",
+          "interval":"",
+          "refId":"Anno"
+        },
+        "textFormat":"High rate of client errors detected, exceeding 0.1 requests per second",
+        "titleFormat":"Client Error Spike"
+      },
+      {
+        "datasource":{
+          "type":"prometheus",
+          "uid":"PBFA97CFB590B2093"
+        },
+        "enable":true,
+        "hide":false,
+        "iconColor":"red",
+        "name":"Server Error Spike",
+        "tagKeys":"server, error",
+        "target":{
+          "expr":"sum(rate(http_requests_total{status=~\"5xx\"}[5m])) > 0.1",
+          "interval":"",
+          "refId":"Anno"
+        },
+        "textFormat":"Server error rate exceeded threshold (>0.1 req/s)",
+        "titleFormat":"Server Error Spike"
+      }
+    ]
+  },
+  "description":"Dashboard showing system metrics including request count, latency, and error rate.",
+  "editable":true,
+  "fiscalYearStartMonth":0,
+  "graphTooltip":0,
+  "id":3,
+  "links":[
+
+  ],
+  "panels":[
+    {
+      "datasource":{
+        "type":"prometheus",
+        "uid":"PBFA97CFB590B2093"
+      },
+      "fieldConfig":{
+        "defaults":{
+          "color":{
+            "mode":"palette-classic"
+          },
+          "custom":{
+            "axisBorderShow":false,
+            "axisCenteredZero":false,
+            "axisColorMode":"text",
+            "axisLabel":"",
+            "axisPlacement":"auto",
+            "barAlignment":0,
+            "barWidthFactor":0.6,
+            "drawStyle":"line",
+            "fillOpacity":20,
+            "gradientMode":"none",
+            "hideFrom":{
+              "legend":false,
+              "tooltip":false,
+              "viz":false
+            },
+            "insertNulls":false,
+            "lineInterpolation":"smooth",
+            "lineWidth":2,
+            "pointSize":5,
+            "scaleDistribution":{
+              "type":"linear"
+            },
+            "showPoints":"auto",
+            "spanNulls":false,
+            "stacking":{
+              "group":"A",
+              "mode":"none"
+            },
+            "thresholdsStyle":{
+              "mode":"area"
+            }
+          },
+          "mappings":[
+
+          ],
+          "thresholds":{
+            "mode":"absolute",
+            "steps":[
+              {
+                "color":"green"
+              },
+              {
+                "color":"orange",
+                "value":70
+              },
+              {
+                "color":"red",
+                "value":80
+              }
+            ]
+          }
+        },
+        "overrides":[
+
+        ]
+      },
+      "gridPos":{
+        "h":17,
+        "w":12,
+        "x":0,
+        "y":0
+      },
+      "id":1,
+      "options":{
+        "legend":{
+          "calcs":[
+            "mean",
+            "max"
+          ],
+          "displayMode":"table",
+          "placement":"bottom",
+          "showLegend":true
+        },
+        "tooltip":{
+          "hideZeros":false,
+          "mode":"multi",
+          "sort":"none"
+        }
+      },
+      "pluginVersion":"12.0.2",
+      "targets":[
+        {
+          "datasource":{
+            "type":"prometheus",
+            "uid":"PBFA97CFB590B2093"
+          },
+          "editorMode":"code",
+          "expr":"sum by (handler, method, status) (increase(http_requests_total{job=\"genai_job\"}[5m]))",
+          "legendFormat":"{{method}} {{status}} {{uri}}",
+          "range":true,
+          "refId":"A"
+        }
+      ],
+      "title":"Request Count",
+      "type":"timeseries"
+    },
+    {
+      "datasource":{
+        "type":"prometheus",
+        "uid":"PBFA97CFB590B2093"
+      },
+      "fieldConfig":{
+        "defaults":{
+          "color":{
+            "mode":"palette-classic"
+          },
+          "custom":{
+            "axisBorderShow":false,
+            "axisCenteredZero":false,
+            "axisColorMode":"text",
+            "axisLabel":"Average Latency (seconds)",
+            "axisPlacement":"auto",
+            "barAlignment":0,
+            "barWidthFactor":0.6,
+            "drawStyle":"line",
+            "fillOpacity":20,
+            "gradientMode":"none",
+            "hideFrom":{
+              "legend":false,
+              "tooltip":false,
+              "viz":false
+            },
+            "insertNulls":false,
+            "lineInterpolation":"smooth",
+            "lineWidth":2,
+            "pointSize":5,
+            "scaleDistribution":{
+              "type":"linear"
+            },
+            "showPoints":"auto",
+            "spanNulls":false,
+            "stacking":{
+              "group":"A",
+              "mode":"none"
+            },
+            "thresholdsStyle":{
+              "mode":"area"
+            }
+          },
+          "mappings":[
+
+          ],
+          "thresholds":{
+            "mode":"absolute",
+            "steps":[
+              {
+                "color":"green"
+              },
+              {
+                "color":"red",
+                "value":80
+              }
+            ]
+          }
+        },
+        "overrides":[
+
+        ]
+      },
+      "gridPos":{
+        "h":17,
+        "w":12,
+        "x":12,
+        "y":0
+      },
+      "id":2,
+      "options":{
+        "legend":{
+          "calcs":[
+            "mean",
+            "max"
+          ],
+          "displayMode":"table",
+          "placement":"bottom",
+          "showLegend":true
+        },
+        "tooltip":{
+          "hideZeros":false,
+          "mode":"multi",
+          "sort":"none"
+        }
+      },
+      "pluginVersion":"12.0.2",
+      "targets":[
+        {
+          "datasource":{
+            "type":"prometheus",
+            "uid":"PBFA97CFB590B2093"
+          },
+          "editorMode":"code",
+          "expr":"sum(rate(http_request_duration_seconds_sum{job=\"genai_job\"}[5m])) by (method, handler) /\nsum(rate(http_request_duration_seconds_count{job=\"genai_job\"}[5m])) by (method, handler)",
+          "legendFormat":"{{method}} {{handler}}",
+          "range":true,
+          "refId":"A"
+        }
+      ],
+      "title":"Latency",
+      "type":"timeseries"
+    },
+    {
+      "datasource":{
+        "type":"prometheus",
+        "uid":"PBFA97CFB590B2093"
+      },
+      "fieldConfig":{
+        "defaults":{
+          "color":{
+            "mode":"palette-classic"
+          },
+          "custom":{
+            "axisBorderShow":false,
+            "axisCenteredZero":false,
+            "axisColorMode":"text",
+            "axisLabel":"",
+            "axisPlacement":"auto",
+            "barAlignment":0,
+            "barWidthFactor":0.6,
+            "drawStyle":"line",
+            "fillOpacity":0,
+            "gradientMode":"none",
+            "hideFrom":{
+              "legend":false,
+              "tooltip":false,
+              "viz":false
+            },
+            "insertNulls":false,
+            "lineInterpolation":"linear",
+            "lineWidth":1,
+            "pointSize":5,
+            "scaleDistribution":{
+              "type":"linear"
+            },
+            "showPoints":"auto",
+            "spanNulls":false,
+            "stacking":{
+              "group":"A",
+              "mode":"none"
+            },
+            "thresholdsStyle":{
+              "mode":"off"
+            }
+          },
+          "mappings":[
+
+          ],
+          "thresholds":{
+            "mode":"absolute",
+            "steps":[
+              {
+                "color":"green"
+              },
+              {
+                "color":"red",
+                "value":80
+              }
+            ]
+          }
+        },
+        "overrides":[
+
+        ]
+      },
+      "gridPos":{
+        "h":12,
+        "w":12,
+        "x":0,
+        "y":17
+      },
+      "id":4,
+      "options":{
+        "legend":{
+          "calcs":[
+
+          ],
+          "displayMode":"list",
+          "placement":"bottom",
+          "showLegend":true
+        },
+        "tooltip":{
+          "hideZeros":false,
+          "mode":"single",
+          "sort":"none"
+        }
+      },
+      "pluginVersion":"12.0.2",
+      "targets":[
+        {
+          "editorMode":"code",
+          "expr":"sum by (handler, method) (increase(http_requests_total{status=~\"4..\", job=\"genai_job\"}[5m]))",
+          "legendFormat":"{{label_name}}",
+          "range":true,
+          "refId":"A"
+        },
+        {
+          "datasource":{
+            "type":"prometheus",
+            "uid":"PBFA97CFB590B2093"
+          },
+          "editorMode":"code",
+          "expr":"sum by (handler, method) (increase(http_requests_total{status=~\"4..\", job=\"genai_job\"}[5m]))",
+          "hide":false,
+          "instant":false,
+          "legendFormat":"__auto",
+          "range":true,
+          "refId":"B"
+        }
+      ],
+      "title":"Errors",
+      "type":"timeseries"
+    },
+    {
+      "datasource":{
+        "type":"prometheus",
+        "uid":"PBFA97CFB590B2093"
+      },
+      "description":"This dashboard displays the average number of input and output tokens generated per request for each operation (completion, summarization, rephrase_text) over time. The values represent the mean input and output token count, calculated every 5 minutes, grouped by operation.",
+      "fieldConfig":{
+        "defaults":{
+          "color":{
+            "mode":"palette-classic"
+          },
+          "custom":{
+            "axisBorderShow":false,
+            "axisCenteredZero":false,
+            "axisColorMode":"text",
+            "axisLabel":"",
+            "axisPlacement":"auto",
+            "barAlignment":0,
+            "barWidthFactor":0.6,
+            "drawStyle":"line",
+            "fillOpacity":0,
+            "gradientMode":"none",
+            "hideFrom":{
+              "legend":false,
+              "tooltip":false,
+              "viz":false
+            },
+            "insertNulls":false,
+            "lineInterpolation":"linear",
+            "lineWidth":1,
+            "pointSize":5,
+            "scaleDistribution":{
+              "type":"linear"
+            },
+            "showPoints":"auto",
+            "spanNulls":false,
+            "stacking":{
+              "group":"A",
+              "mode":"none"
+            },
+            "thresholdsStyle":{
+              "mode":"off"
+            }
+          },
+          "mappings":[
+
+          ],
+          "thresholds":{
+            "mode":"absolute",
+            "steps":[
+              {
+                "color":"green"
+              },
+              {
+                "color":"red",
+                "value":80
+              }
+            ]
+          }
+        },
+        "overrides":[
+
+        ]
+      },
+      "gridPos":{
+        "h":12,
+        "w":12,
+        "x":12,
+        "y":17
+      },
+      "id":5,
+      "options":{
+        "legend":{
+          "calcs":[
+
+          ],
+          "displayMode":"list",
+          "placement":"bottom",
+          "showLegend":true
+        },
+        "tooltip":{
+          "hideZeros":false,
+          "mode":"single",
+          "sort":"none"
+        }
+      },
+      "pluginVersion":"12.0.2",
+      "targets":[
+        {
+          "editorMode":"code",
+          "exemplar":false,
+          "expr":"rate(llm_token_count_sum{type=\"output\"}[5m]) \n/\nrate(llm_token_count_count{type=\"output\"}[5m])",
+          "format":"time_series",
+          "instant":false,
+          "legendFormat":"output - {{operation}}",
+          "range":true,
+          "refId":"A"
+        },
+        {
+          "datasource":{
+            "type":"prometheus",
+            "uid":"PBFA97CFB590B2093"
+          },
+          "editorMode":"code",
+          "expr":"rate(llm_token_count_sum{type=\"input\"}[5m]) \n/\nrate(llm_token_count_count{type=\"input\"}[5m])",
+          "hide":false,
+          "instant":false,
+          "legendFormat":"input - {{operation}}",
+          "range":true,
+          "refId":"B"
+        }
+      ],
+      "title":"LLM API Token Analytics",
+      "type":"timeseries"
+    }
+  ],
+  "preload":false,
+  "refresh":"10s",
+  "schemaVersion":41,
+  "tags":[
+    "monitoring",
+    "alerts"
+  ],
+  "templating":{
+    "list":[
+
+    ]
+  },
+  "time":{
+    "from":"now-6h",
+    "to":"now"
+  },
+  "timepicker":{
+
+  },
+  "timezone":"browser",
+  "title":"GenAi System Metrics Dashboard",
+  "uid":"genai-metrics-dashboard1111",
+  "version":2
+}
diff --git a/docker/grafana/provisioning/dashboards/realtime-dashboard.json b/docker/grafana/provisioning/dashboards/realtime-dashboard.json
new file mode 100644
index 00000000..fac0edbf
--- /dev/null
+++ b/docker/grafana/provisioning/dashboards/realtime-dashboard.json
@@ -0,0 +1,593 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      },
+      {
+        "datasource": {
+          "type": "prometheus",
+          "uid": "PBFA97CFB590B2093"
+        },
+        "enable": true,
+        "hide": false,
+        "iconColor": "red",
+        "name": "WebSocket error",
+        "tagKeys": "websocket_errors",
+        "target": {
+          "expr": "(\n  rate(websocket_read_errors[5m]) +\n  rate(websocket_write_errors[5m]) +\n  rate(websocket_upgrade_errors[5m])\n) > 0.1",
+          "interval": "",
+          "refId": "Anno"
+        },
+        "textFormat": "High WebSocket error rate detected - exceeding 0.1 errors per second",
+        "titleFormat": "WebSocket Error Rate"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 3,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 10,
+        "x": 0,
+        "y": 0
+      },
+      "id": 5,
+      "options": {
+        "minVizHeight": 75,
+        "minVizWidth": 75,
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "sizing": "auto"
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "editorMode": "code",
+          "expr": "websocket_connections_active",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Websocket Connections Active",
+      "type": "gauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 15,
+        "w": 14,
+        "x": 10,
+        "y": 0
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "editorMode": "code",
+          "expr": "rate(websocket_connection_duration_sum[5m]) / rate(websocket_connection_duration_count[5m])",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Connection Duration Summary",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "histogram_quantile(0.95, sum(rate(websocket_connection_duration_bucket[5m])) by (le))"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": []
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 10,
+        "x": 0,
+        "y": 8
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "rate(websocket_sent_messages[1m])",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "Messages Sent",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "rate(websocket_received_messages[1m])",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "Messages Received",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Messages Sent/Received Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "description": "Distribution of WebSocket connection durations across different time buckets. Each bar represents a cumulative count of connections that lasted less than or equal to the specified duration (in seconds). For example, '60s' shows connections lasting up to 60 seconds, '120s' shows connections up to 120 seconds, and so on up to '+Inf' (unlimited duration).",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Value"
+            },
+            "properties": [
+              {
+                "id": "noValue"
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 17,
+        "w": 14,
+        "x": 10,
+        "y": 15
+      },
+      "id": 4,
+      "options": {
+        "displayMode": "gradient",
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "maxVizHeight": 300,
+        "minVizHeight": 16,
+        "minVizWidth": 8,
+        "namePlacement": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showUnfilled": true,
+        "sizing": "auto",
+        "valueMode": "color"
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "websocket_connection_duration_bucket{le=~\".+\"}",
+          "format": "table",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "instant": true,
+          "legendFormat": "{{le}}",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        }
+      ],
+      "title": "Websocket Connection Duration",
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": {
+            "include": {
+              "names": [
+                "Time",
+                "60.0",
+                "120.0",
+                "300.0",
+                "600.0",
+                "900.0",
+                "1200.0",
+                "1800.0",
+                "+Inf"
+              ]
+            }
+          }
+        }
+      ],
+      "type": "bargauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "websocket_read_errors"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 13,
+        "w": 10,
+        "x": 0,
+        "y": 19
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "editorMode": "code",
+          "expr": "rate(websocket_read_errors[1m])",
+          "legendFormat": "websocket_read_errors",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "rate(websocket_write_errors[1m])",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "websocket_write_errors",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "rate(websocket_upgrade_errors[1m])",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "websocket_upgrade_errors",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Websocket Error Rates",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "schemaVersion": 41,
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-3h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Realtime System Metrics",
+  "uid": "275dd5ce-c9b5-4ed9-a6d0-cbac478716a1",
+  "version": 4
+}
\ No newline at end of file
diff --git a/docker/grafana/provisioning/dashboards/server-dashboard.json b/docker/grafana/provisioning/dashboards/server-dashboard.json
new file mode 100644
index 00000000..caa6f15f
--- /dev/null
+++ b/docker/grafana/provisioning/dashboards/server-dashboard.json
@@ -0,0 +1,394 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      },
+      {
+        "datasource": {
+          "type": "prometheus",
+          "uid": "PBFA97CFB590B2093"
+        },
+        "enable": true,
+        "hide": false,
+        "iconColor": "orange",
+        "name": "Client Error Spike",
+        "tagKeys": "client-error, http-4xx",
+        "target": {
+          "expr": "sum(rate(http_server_requests_seconds_count{status=~\"4..\"}[5m])) > 0.1",
+          "interval": "",
+          "refId": "Anno"
+        },
+        "textFormat": "High rate of client errors detected, exceeding 0.1 requests per second",
+        "titleFormat": "Client Error Spike"
+      },
+      {
+        "datasource": {
+          "type": "prometheus",
+          "uid": "PBFA97CFB590B2093"
+        },
+        "enable": true,
+        "hide": false,
+        "iconColor": "red",
+        "name": "Server Error Spike",
+        "tagKeys": "server, error",
+        "target": {
+          "expr": "sum(rate(http_server_requests_seconds_count{status=~\"5..\"}[5m])) > 0.1",
+          "interval": "",
+          "refId": "Anno"
+        },
+        "textFormat": "Server error rate exceeded threshold (>0.1 req/s)",
+        "titleFormat": "Server Error Spike"
+      }
+    ]
+  },
+  "description": "Dashboard showing system metrics including request count, latency, and error rate.",
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 1,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "area"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "orange",
+                "value": 70
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 17,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(http_server_requests_seconds_count[5m])) by (method, uri)",
+          "legendFormat": "{{method}} {{status}} {{uri}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Request Count",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Average Latency (seconds)",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "area"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 17,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "    sum(rate(http_server_requests_seconds_sum[5m])) by (method, uri) /\nsum(rate(http_server_requests_seconds_count[5m])) by (method, uri)",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 12,
+        "w": 12,
+        "x": 0,
+        "y": 17
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "editorMode": "code",
+          "expr": "sum(rate(http_server_requests_seconds_count{outcome=\"CLIENT_ERROR\"}[5m])) by (method, uri, status)",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(http_server_requests_seconds_count{status=~\"5..\"}[5m])) by (method, uri, status)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Errors",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "refresh": "10s",
+  "schemaVersion": 41,
+  "tags": [
+    "monitoring",
+    "alerts"
+  ],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Server System Metrics Dashboard",
+  "uid": "system-metrics-dashboard111112",
+  "version": 1
+}
\ No newline at end of file
diff --git a/docker/grafana/provisioning/datasources/prometheus.yml b/docker/grafana/provisioning/datasources/prometheus.yml
new file mode 100644
index 00000000..8049912b
--- /dev/null
+++ b/docker/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,8 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
\ No newline at end of file
diff --git a/docker/mailhog/Dockerfile b/docker/mailhog/Dockerfile
new file mode 100644
index 00000000..51730005
--- /dev/null
+++ b/docker/mailhog/Dockerfile
@@ -0,0 +1,25 @@
+FROM golang:1.18-alpine as builder
+
+# Install MailHog:
+RUN apk --no-cache add --virtual build-dependencies \
+    git \
+  && mkdir -p /root/gocode \
+  && export GOPATH=/root/gocode \
+  && go install github.com/mailhog/MailHog@latest
+
+FROM alpine:3
+# Add mailhog user/group with uid/gid 1000.
+# This is a workaround for boot2docker issue #581, see
+# https://github.com/boot2docker/boot2docker/issues/581
+RUN adduser -D -u 1000 mailhog
+
+COPY --from=builder /root/gocode/bin/MailHog /usr/local/bin/
+
+USER mailhog
+
+WORKDIR /home/mailhog
+
+ENTRYPOINT ["MailHog"]
+
+# Expose the SMTP and HTTP ports:
+EXPOSE 1025 8025
\ No newline at end of file
diff --git a/docker/prometheus/Dockerfile b/docker/prometheus/Dockerfile
new file mode 100644
index 00000000..9c042ad1
--- /dev/null
+++ b/docker/prometheus/Dockerfile
@@ -0,0 +1 @@
+FROM prom/prometheus:v3.5.0
\ No newline at end of file
diff --git a/docker/prometheus/config/alert.rules.yml b/docker/prometheus/config/alert.rules.yml
new file mode 100644
index 00000000..e7aa7f87
--- /dev/null
+++ b/docker/prometheus/config/alert.rules.yml
@@ -0,0 +1,11 @@
+groups:
+  - name: service-availability
+    rules:
+      - alert: Service Down
+        expr: up{job=~"server_job|genai_job"} == 0
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Service {{ $labels.job }} is down"
+          description: "Service {{ $labels.job }} on {{ $labels.instance }} has been down for more than 1 minute"
\ No newline at end of file
diff --git a/docker/prometheus/config/prometheus.yml b/docker/prometheus/config/prometheus.yml
new file mode 100644
index 00000000..ee883ff0
--- /dev/null
+++ b/docker/prometheus/config/prometheus.yml
@@ -0,0 +1,31 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets:
+            - 'alertmanager:9093'
+
+rule_files:
+  - "/etc/prometheus/alert.rules.yml"
+
+scrape_configs:
+  - job_name: 'server_job'
+    metrics_path: '/actuator/prometheus'
+    static_configs:
+      - targets:
+        - 'server:9091'
+
+  - job_name: 'genai_job'
+    metrics_path: '/metrics'
+    static_configs:
+      - targets:
+        - 'genai:8000'
+
+  - job_name: 'realtime_job'
+    metrics_path: '/metrics'
+    static_configs:
+      - targets:
+        - 'realtime:9090'
\ No newline at end of file
diff --git a/genai/app/core/__init__.py b/genai/app/core/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/genai/app/main.py b/genai/app/main.py
index dc7b521a..01fbc3db 100644
--- a/genai/app/main.py
+++ b/genai/app/main.py
@@ -1,7 +1,10 @@
+from fastapi import FastAPI
+from prometheus_fastapi_instrumentator import Instrumentator
+from prometheus_client import Histogram
 import os
 import requests
 from typing import Any, List, Optional
-from fastapi import FastAPI, HTTPException, APIRouter
+from fastapi import HTTPException, APIRouter
 from pydantic import BaseModel
 from langchain.llms.base import LLM
 from langchain.callbacks.manager import CallbackManagerForLLMRun
@@ -11,6 +14,13 @@
 from fastapi.openapi.utils import get_openapi
 from dotenv import load_dotenv
 
+# Initialize FastAPI app
+app = FastAPI(
+    title="LLM Service",
+    description="OpenWebUI powered LLM service for text operations",
+    version="1.0.0",
+)
+
 # Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -19,6 +29,8 @@
 
 router = APIRouter()
 
+Instrumentator().instrument(app).expose(app)
+
 # Environment configuration
 OPEN_WEB_UI_API_KEY = os.getenv("OPEN_WEB_UI_API_KEY")
 API_URL = os.getenv("API_URL")
@@ -26,6 +38,14 @@
 CLIENT_URL = os.getenv("CLIENT_URL")
 GENAI_URL = os.getenv("GENAI_URL")
 
+
+LLM_TOKEN_COUNT = Histogram(
+    "llm_token_count",
+    "Number of tokens in requests/responses",
+    labelnames=["operation", "type"],
+)
+
+
 class OpenWebUILLM(LLM):
     api_url: str = API_URL
     api_key: str = OPEN_WEB_UI_API_KEY
@@ -84,14 +104,6 @@ def _call(
             raise Exception(f"API request failed: {str(e)}")
 
 
-# Initialize FastAPI app
-app = FastAPI(
-    title="LLM Service",
-    description="OpenWebUI powered LLM service for text operations",
-    version="1.0.0",
-)
-
-
 @app.get("/v3/api-docs", include_in_schema=False)
 def custom_openapi():
     return JSONResponse(
@@ -117,7 +129,7 @@ def custom_openapi():
 
 
 class TextRequest(BaseModel):
-    user_text: List[str]
+    user_text: str
 
 
 class TextResponse(BaseModel):
@@ -126,19 +138,29 @@ class TextResponse(BaseModel):
 
 @router.post("/completion", response_model=TextResponse)
 async def complete_text(request: TextRequest):
+    operation = "completion"
+
     try:
-        input_text = " ".join(request.user_text)
+        input_tokens = len(request.user_text.split(" "))
+        LLM_TOKEN_COUNT.labels(operation=operation, type="input").observe(input_tokens)
+
         prompt = f"""Complete the following text with exactly one natural sentence:
-        {input_text}
-        
+        {request.user_text}
+
         Rules:
         - ALWAYS start your response with the exact input text
         - Add only ONE sentence
         - Keep the style consistent
         - Make it coherent with the input
         """
-        logger.info(f"Processing completion request for text: {input_text}")
+        logger.info(f"Processing completion request for text: {request.user_text}")
         result = llm(prompt)
+
+        output_tokens = len(result.split())
+        LLM_TOKEN_COUNT.labels(operation=operation, type="output").observe(
+            output_tokens
+        )
+
         logger.info(f"Generated completion: {result}")
         return TextResponse(llm_response=result)
     except Exception as e:
@@ -148,11 +170,22 @@ async def complete_text(request: TextRequest):
 
 @router.post("/summarization", response_model=TextResponse)
 async def summarize_text(request: TextRequest):
+    operation = "summarization"
+
     try:
+        input_tokens = len(request.user_text.split(" "))
+        LLM_TOKEN_COUNT.labels(operation=operation, type="input").observe(input_tokens)
+
         prompt = f"""Summarize the following text concisely:
-        {' '.join(request.user_text)}
+        {request.user_text}
         """
         result = llm(prompt)
+
+        output_tokens = len(result.split())
+        LLM_TOKEN_COUNT.labels(operation=operation, type="output").observe(
+            output_tokens
+        )
+
         return TextResponse(llm_response=result)
     except Exception as e:
         logger.error(f"Summarization error: {str(e)}")
@@ -161,21 +194,30 @@ async def summarize_text(request: TextRequest):
 
 @router.post("/rephrase", response_model=TextResponse)
 async def rephrase_text(request: TextRequest):
+    operation = "rephrase_text"
     logger.info(f"Received rephrase request: {request}")
+
     try:
-        input_text = " ".join(request.user_text)
-        word_count = len(input_text.split())
+        input_tokens = len(request.user_text.split(" "))
+        LLM_TOKEN_COUNT.labels(operation=operation, type="input").observe(input_tokens)
+
+        word_count = len(request.user_text.split())
         prompt = f"""Rephrase the following text:
-        {input_text}
-        
+        {request.user_text}
+
         Rules:
         - Keep EXACTLY {word_count} words
         - Maintain the original meaning
         - Use similar tone and style
         - Make it sound natural
         """
-        logger.info(f"Received rephrase request: {input_text}")
+        logger.info(f"Received rephrase request: {request.user_text}")
         result = llm(prompt)
+
+        output_tokens = len(result.split())
+        LLM_TOKEN_COUNT.labels(operation=operation, type="output").observe(
+            output_tokens
+        )
         # Ensure exact word count
         result_words = result.split()
         if len(result_words) > word_count:
diff --git a/genai/app/services/__init__.py b/genai/app/services/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/genai/app/test.py b/genai/app/test.py
index 5acc29fd..02f65146 100644
--- a/genai/app/test.py
+++ b/genai/app/test.py
@@ -3,27 +3,29 @@
 
 client = TestClient(app)
 
+
 def test_health_check():
-    response =  client.get("/health")
+    response = client.get("/health")
     assert response.status_code == 200
     assert "status" in response.json()
 
 
 def test_completion():
-    payload = {"user_text": ["This is a test input."]}
+    payload = {"user_text": "This is a test input."}
     response = client.post("/completion", json=payload)
     assert response.status_code == 200
     assert "llm_response" in response.json()
 
+
 def test_summarization():
-    payload = {"user_text": ["This is a long sentence that needs summarizing."]}
+    payload = {"user_text": "This is a long sentence that needs summarizing."}
     response = client.post("/summarization", json=payload)
     assert response.status_code == 200
     assert "llm_response" in response.json()
 
 
 def test_rephrase():
-    payload = {"user_text": ["This is a sample sentence."]}
+    payload = {"user_text": "This is a sample sentence."}
     response = client.post("/rephrase", json=payload)
     assert response.status_code == 200
-    assert "llm_response" in response.json()
\ No newline at end of file
+    assert "llm_response" in response.json()
diff --git a/genai/app/utils/__init__.py b/genai/app/utils/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/genai/requirements.txt b/genai/requirements.txt
index f0bf846a..a56d35c1 100644
--- a/genai/requirements.txt
+++ b/genai/requirements.txt
@@ -1,7 +1,9 @@
 annotated-types==0.7.0
+anyio==4.9.0
 authlib==1.3.1
 certifi==2025.4.26
 cffi==1.17.1
+charset-normalizer==3.4.2
 click==8.1.8
 cryptography==43.0.3
 deprecation==2.1.0
@@ -9,6 +11,7 @@ dnspython==2.7.0
 email-validator==2.2.0
 fastapi==0.115.12
 fastapi-cli==0.0.7
+greenlet==3.2.3
 grpcio==1.71.0
 grpcio-health-checking==1.71.0
 grpcio-tools==1.71.0
@@ -19,11 +22,21 @@ httpx==0.28.1
 idna==3.10
 iniconfig==2.1.0
 jinja2==3.1.6
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.3.26
+langchain-core==0.3.69
+langchain-text-splitters==0.3.8
+langsmith==0.4.8
 markdown-it-py==3.0.0
 markupsafe==3.0.2
 mdurl==0.1.2
+orjson==3.11.0
 packaging==23.2
+pip==25.0.1
 pluggy==1.5.0
+prometheus-client==0.22.1
+prometheus-fastapi-instrumentator==7.1.0
 protobuf==5.29.4
 pycparser==2.22
 pydantic==2.11.4
@@ -35,21 +48,24 @@ python-dotenv==1.1.0
 python-multipart==0.0.20
 pyyaml==6.0.2
 requests==2.31.0
+requests-toolbelt==1.0.0
 rich==14.0.0
 rich-toolkit==0.14.5
 ruff==0.11.8
 setuptools==80.4.0
 shellingham==1.5.4
 sniffio==1.3.1
+sqlalchemy==2.0.41
 starlette==0.46.2
+tenacity==9.1.2
 typer==0.15.3
 typing-extensions==4.13.2
 typing-inspection==0.4.0
+urllib3==2.5.0
 uvicorn==0.34.2
 uvloop==0.21.0
 validators==0.34.0
 watchfiles==1.0.5
 weaviate-client==4.14.1
 websockets==15.0.1
-langchain>=0.1.0
-langchain-core>=0.1.10
+zstandard==0.23.0
diff --git a/infrastructure/whiteboard-app/Chart.lock b/infrastructure/whiteboard-app/Chart.lock
index e5dc3a08..00b613af 100644
--- a/infrastructure/whiteboard-app/Chart.lock
+++ b/infrastructure/whiteboard-app/Chart.lock
@@ -9,4 +9,4 @@ dependencies:
   repository: https://charts.bitnami.com/bitnami
   version: 21.2.12
 digest: sha256:68d94162b9c62e8d173c984b419f1837de111fe9d75c381a1555bc09860e543b
-generated: "2025-07-16T13:59:46.643364+02:00"
+generated: "2025-07-19T17:14:37.691057+02:00"
diff --git a/infrastructure/whiteboard-observability/.helmignore b/infrastructure/whiteboard-observability/.helmignore
new file mode 100644
index 00000000..0e8a0eb3
--- /dev/null
+++ b/infrastructure/whiteboard-observability/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/infrastructure/whiteboard-observability/Chart.lock b/infrastructure/whiteboard-observability/Chart.lock
new file mode 100644
index 00000000..afdfd431
--- /dev/null
+++ b/infrastructure/whiteboard-observability/Chart.lock
@@ -0,0 +1,12 @@
+dependencies:
+- name: grafana
+  repository: https://grafana.github.io/helm-charts
+  version: 9.2.10
+- name: prometheus
+  repository: https://prometheus-community.github.io/helm-charts
+  version: 27.28.0
+- name: mailhog
+  repository: https://codecentric.github.io/helm-charts
+  version: 5.8.0
+digest: sha256:b1512e01eadc92c03de33e061fdf24f4ba22e910166fbe57fd8805f0ffca1e9d
+generated: "2025-07-20T13:30:29.991761+02:00"
diff --git a/infrastructure/whiteboard-observability/Chart.yaml b/infrastructure/whiteboard-observability/Chart.yaml
new file mode 100644
index 00000000..78717bdf
--- /dev/null
+++ b/infrastructure/whiteboard-observability/Chart.yaml
@@ -0,0 +1,16 @@
+apiVersion: v2
+name: whiteboard-observability
+description: A Helm chart for Kubernetes
+type: application
+version: 0.1.0
+appVersion: "1.16.0"
+dependencies:
+  - name: grafana
+    version: 9.2.10
+    repository: "https://grafana.github.io/helm-charts"
+  - name: prometheus
+    version: 27.28.0
+    repository: "https://prometheus-community.github.io/helm-charts"
+  - name: mailhog
+    version: 5.8.0
+    repository: https://codecentric.github.io/helm-charts
diff --git a/infrastructure/whiteboard-observability/charts/grafana-9.2.10.tgz b/infrastructure/whiteboard-observability/charts/grafana-9.2.10.tgz
new file mode 100644
index 00000000..d9cbb75e
Binary files /dev/null and b/infrastructure/whiteboard-observability/charts/grafana-9.2.10.tgz differ
diff --git a/infrastructure/whiteboard-observability/charts/mailhog-5.8.0.tgz b/infrastructure/whiteboard-observability/charts/mailhog-5.8.0.tgz
new file mode 100644
index 00000000..9cdfd149
Binary files /dev/null and b/infrastructure/whiteboard-observability/charts/mailhog-5.8.0.tgz differ
diff --git a/infrastructure/whiteboard-observability/charts/prometheus-27.28.0.tgz b/infrastructure/whiteboard-observability/charts/prometheus-27.28.0.tgz
new file mode 100644
index 00000000..bf979b48
Binary files /dev/null and b/infrastructure/whiteboard-observability/charts/prometheus-27.28.0.tgz differ
diff --git a/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/dashboards.yml b/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/dashboards.yml
new file mode 100644
index 00000000..0e8f7220
--- /dev/null
+++ b/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,23 @@
+apiVersion: 1
+
+providers:
+  - name: "Server System Metrics Dashboard"
+    type: file
+    editable: true
+    updateIntervalSeconds: 10
+    options:
+      path: /etc/grafana/provisioning/dashboards/server-dashboard.json
+
+  - name: "GenAi System Metrics Dashboard"
+    type: file
+    editable: true
+    updateIntervalSeconds: 10
+    options:
+      path: /etc/grafana/provisioning/dashboards/genai-dashboard.json
+
+  - name: "Realtime System Metrics Dashboard"
+    type: file
+    editable: true
+    updateIntervalSeconds: 10
+    options:
+      path: /etc/grafana/provisioning/dashboards/realtime-dashboard.json
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/genai-dashboard.json b/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/genai-dashboard.json
new file mode 100644
index 00000000..bc3a1fc6
--- /dev/null
+++ b/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/genai-dashboard.json
@@ -0,0 +1,522 @@
+{
+  "annotations":{
+    "list":[
+      {
+        "builtIn":1,
+        "datasource":{
+          "type":"grafana",
+          "uid":"-- Grafana --"
+        },
+        "enable":true,
+        "hide":true,
+        "iconColor":"rgba(0, 211, 255, 1)",
+        "name":"Annotations & Alerts",
+        "type":"dashboard"
+      },
+      {
+        "datasource":{
+          "type":"prometheus",
+          "uid":"PBFA97CFB590B2093"
+        },
+        "enable":true,
+        "hide":false,
+        "iconColor":"orange",
+        "name":"Client Error Spike",
+        "target":{
+          "expr":"sum(rate(http_requests_total{status=~\"4xx\"}[5m])) > 0.1",
+          "interval":"",
+          "refId":"Anno"
+        },
+        "textFormat":"High rate of client errors detected, exceeding 0.1 requests per second",
+        "titleFormat":"Client Error Spike"
+      },
+      {
+        "datasource":{
+          "type":"prometheus",
+          "uid":"PBFA97CFB590B2093"
+        },
+        "enable":true,
+        "hide":false,
+        "iconColor":"red",
+        "name":"Server Error Spike",
+        "tagKeys":"server, error",
+        "target":{
+          "expr":"sum(rate(http_requests_total{status=~\"5xx\"}[5m])) > 0.1",
+          "interval":"",
+          "refId":"Anno"
+        },
+        "textFormat":"Server error rate exceeded threshold (>0.1 req/s)",
+        "titleFormat":"Server Error Spike"
+      }
+    ]
+  },
+  "description":"Dashboard showing system metrics including request count, latency, and error rate.",
+  "editable":true,
+  "fiscalYearStartMonth":0,
+  "graphTooltip":0,
+  "id":3,
+  "links":[
+
+  ],
+  "panels":[
+    {
+      "datasource":{
+        "type":"prometheus",
+        "uid":"PBFA97CFB590B2093"
+      },
+      "fieldConfig":{
+        "defaults":{
+          "color":{
+            "mode":"palette-classic"
+          },
+          "custom":{
+            "axisBorderShow":false,
+            "axisCenteredZero":false,
+            "axisColorMode":"text",
+            "axisLabel":"",
+            "axisPlacement":"auto",
+            "barAlignment":0,
+            "barWidthFactor":0.6,
+            "drawStyle":"line",
+            "fillOpacity":20,
+            "gradientMode":"none",
+            "hideFrom":{
+              "legend":false,
+              "tooltip":false,
+              "viz":false
+            },
+            "insertNulls":false,
+            "lineInterpolation":"smooth",
+            "lineWidth":2,
+            "pointSize":5,
+            "scaleDistribution":{
+              "type":"linear"
+            },
+            "showPoints":"auto",
+            "spanNulls":false,
+            "stacking":{
+              "group":"A",
+              "mode":"none"
+            },
+            "thresholdsStyle":{
+              "mode":"area"
+            }
+          },
+          "mappings":[
+
+          ],
+          "thresholds":{
+            "mode":"absolute",
+            "steps":[
+              {
+                "color":"green"
+              },
+              {
+                "color":"orange",
+                "value":70
+              },
+              {
+                "color":"red",
+                "value":80
+              }
+            ]
+          }
+        },
+        "overrides":[
+
+        ]
+      },
+      "gridPos":{
+        "h":17,
+        "w":12,
+        "x":0,
+        "y":0
+      },
+      "id":1,
+      "options":{
+        "legend":{
+          "calcs":[
+            "mean",
+            "max"
+          ],
+          "displayMode":"table",
+          "placement":"bottom",
+          "showLegend":true
+        },
+        "tooltip":{
+          "hideZeros":false,
+          "mode":"multi",
+          "sort":"none"
+        }
+      },
+      "pluginVersion":"12.0.2",
+      "targets":[
+        {
+          "datasource":{
+            "type":"prometheus",
+            "uid":"PBFA97CFB590B2093"
+          },
+          "editorMode":"code",
+          "expr":"sum by (handler, method, status) (increase(http_requests_total{job=\"genai_job\"}[5m]))",
+          "legendFormat":"{{method}} {{status}} {{uri}}",
+          "range":true,
+          "refId":"A"
+        }
+      ],
+      "title":"Request Count",
+      "type":"timeseries"
+    },
+    {
+      "datasource":{
+        "type":"prometheus",
+        "uid":"PBFA97CFB590B2093"
+      },
+      "fieldConfig":{
+        "defaults":{
+          "color":{
+            "mode":"palette-classic"
+          },
+          "custom":{
+            "axisBorderShow":false,
+            "axisCenteredZero":false,
+            "axisColorMode":"text",
+            "axisLabel":"Average Latency (seconds)",
+            "axisPlacement":"auto",
+            "barAlignment":0,
+            "barWidthFactor":0.6,
+            "drawStyle":"line",
+            "fillOpacity":20,
+            "gradientMode":"none",
+            "hideFrom":{
+              "legend":false,
+              "tooltip":false,
+              "viz":false
+            },
+            "insertNulls":false,
+            "lineInterpolation":"smooth",
+            "lineWidth":2,
+            "pointSize":5,
+            "scaleDistribution":{
+              "type":"linear"
+            },
+            "showPoints":"auto",
+            "spanNulls":false,
+            "stacking":{
+              "group":"A",
+              "mode":"none"
+            },
+            "thresholdsStyle":{
+              "mode":"area"
+            }
+          },
+          "mappings":[
+
+          ],
+          "thresholds":{
+            "mode":"absolute",
+            "steps":[
+              {
+                "color":"green"
+              },
+              {
+                "color":"red",
+                "value":80
+              }
+            ]
+          }
+        },
+        "overrides":[
+
+        ]
+      },
+      "gridPos":{
+        "h":17,
+        "w":12,
+        "x":12,
+        "y":0
+      },
+      "id":2,
+      "options":{
+        "legend":{
+          "calcs":[
+            "mean",
+            "max"
+          ],
+          "displayMode":"table",
+          "placement":"bottom",
+          "showLegend":true
+        },
+        "tooltip":{
+          "hideZeros":false,
+          "mode":"multi",
+          "sort":"none"
+        }
+      },
+      "pluginVersion":"12.0.2",
+      "targets":[
+        {
+          "datasource":{
+            "type":"prometheus",
+            "uid":"PBFA97CFB590B2093"
+          },
+          "editorMode":"code",
+          "expr":"sum(rate(http_request_duration_seconds_sum{job=\"genai_job\"}[5m])) by (method, handler) /\nsum(rate(http_request_duration_seconds_count{job=\"genai_job\"}[5m])) by (method, handler)",
+          "legendFormat":"{{method}} {{handler}}",
+          "range":true,
+          "refId":"A"
+        }
+      ],
+      "title":"Latency",
+      "type":"timeseries"
+    },
+    {
+      "datasource":{
+        "type":"prometheus",
+        "uid":"PBFA97CFB590B2093"
+      },
+      "fieldConfig":{
+        "defaults":{
+          "color":{
+            "mode":"palette-classic"
+          },
+          "custom":{
+            "axisBorderShow":false,
+            "axisCenteredZero":false,
+            "axisColorMode":"text",
+            "axisLabel":"",
+            "axisPlacement":"auto",
+            "barAlignment":0,
+            "barWidthFactor":0.6,
+            "drawStyle":"line",
+            "fillOpacity":0,
+            "gradientMode":"none",
+            "hideFrom":{
+              "legend":false,
+              "tooltip":false,
+              "viz":false
+            },
+            "insertNulls":false,
+            "lineInterpolation":"linear",
+            "lineWidth":1,
+            "pointSize":5,
+            "scaleDistribution":{
+              "type":"linear"
+            },
+            "showPoints":"auto",
+            "spanNulls":false,
+            "stacking":{
+              "group":"A",
+              "mode":"none"
+            },
+            "thresholdsStyle":{
+              "mode":"off"
+            }
+          },
+          "mappings":[
+
+          ],
+          "thresholds":{
+            "mode":"absolute",
+            "steps":[
+              {
+                "color":"green"
+              },
+              {
+                "color":"red",
+                "value":80
+              }
+            ]
+          }
+        },
+        "overrides":[
+
+        ]
+      },
+      "gridPos":{
+        "h":12,
+        "w":12,
+        "x":0,
+        "y":17
+      },
+      "id":4,
+      "options":{
+        "legend":{
+          "calcs":[
+
+          ],
+          "displayMode":"list",
+          "placement":"bottom",
+          "showLegend":true
+        },
+        "tooltip":{
+          "hideZeros":false,
+          "mode":"single",
+          "sort":"none"
+        }
+      },
+      "pluginVersion":"12.0.2",
+      "targets":[
+        {
+          "editorMode":"code",
+          "expr":"sum by (handler, method) (increase(http_requests_total{status=~\"4..\", job=\"genai_job\"}[5m]))",
+          "legendFormat":"{{label_name}}",
+          "range":true,
+          "refId":"A"
+        },
+        {
+          "datasource":{
+            "type":"prometheus",
+            "uid":"PBFA97CFB590B2093"
+          },
+          "editorMode":"code",
+          "expr":"sum by (handler, method) (increase(http_requests_total{status=~\"4..\", job=\"genai_job\"}[5m]))",
+          "hide":false,
+          "instant":false,
+          "legendFormat":"__auto",
+          "range":true,
+          "refId":"B"
+        }
+      ],
+      "title":"Errors",
+      "type":"timeseries"
+    },
+    {
+      "datasource":{
+        "type":"prometheus",
+        "uid":"PBFA97CFB590B2093"
+      },
+      "description":"This dashboard displays the average number of input and output tokens generated per request for each operation (completion, summarization, rephrase_text) over time. The values represent the mean input and output token count, calculated every 5 minutes, grouped by operation.",
+      "fieldConfig":{
+        "defaults":{
+          "color":{
+            "mode":"palette-classic"
+          },
+          "custom":{
+            "axisBorderShow":false,
+            "axisCenteredZero":false,
+            "axisColorMode":"text",
+            "axisLabel":"",
+            "axisPlacement":"auto",
+            "barAlignment":0,
+            "barWidthFactor":0.6,
+            "drawStyle":"line",
+            "fillOpacity":0,
+            "gradientMode":"none",
+            "hideFrom":{
+              "legend":false,
+              "tooltip":false,
+              "viz":false
+            },
+            "insertNulls":false,
+            "lineInterpolation":"linear",
+            "lineWidth":1,
+            "pointSize":5,
+            "scaleDistribution":{
+              "type":"linear"
+            },
+            "showPoints":"auto",
+            "spanNulls":false,
+            "stacking":{
+              "group":"A",
+              "mode":"none"
+            },
+            "thresholdsStyle":{
+              "mode":"off"
+            }
+          },
+          "mappings":[
+
+          ],
+          "thresholds":{
+            "mode":"absolute",
+            "steps":[
+              {
+                "color":"green"
+              },
+              {
+                "color":"red",
+                "value":80
+              }
+            ]
+          }
+        },
+        "overrides":[
+
+        ]
+      },
+      "gridPos":{
+        "h":12,
+        "w":12,
+        "x":12,
+        "y":17
+      },
+      "id":5,
+      "options":{
+        "legend":{
+          "calcs":[
+
+          ],
+          "displayMode":"list",
+          "placement":"bottom",
+          "showLegend":true
+        },
+        "tooltip":{
+          "hideZeros":false,
+          "mode":"single",
+          "sort":"none"
+        }
+      },
+      "pluginVersion":"12.0.2",
+      "targets":[
+        {
+          "editorMode":"code",
+          "exemplar":false,
+          "expr":"rate(llm_token_count_sum{type=\"output\"}[5m]) \n/\nrate(llm_token_count_count{type=\"output\"}[5m])",
+          "format":"time_series",
+          "instant":false,
+          "legendFormat":"output - {{operation}}",
+          "range":true,
+          "refId":"A"
+        },
+        {
+          "datasource":{
+            "type":"prometheus",
+            "uid":"PBFA97CFB590B2093"
+          },
+          "editorMode":"code",
+          "expr":"rate(llm_token_count_sum{type=\"input\"}[5m]) \n/\nrate(llm_token_count_count{type=\"input\"}[5m])",
+          "hide":false,
+          "instant":false,
+          "legendFormat":"input - {{operation}}",
+          "range":true,
+          "refId":"B"
+        }
+      ],
+      "title":"LLM API Token Analytics",
+      "type":"timeseries"
+    }
+  ],
+  "preload":false,
+  "refresh":"10s",
+  "schemaVersion":41,
+  "tags":[
+    "monitoring",
+    "alerts"
+  ],
+  "templating":{
+    "list":[
+
+    ]
+  },
+  "time":{
+    "from":"now-6h",
+    "to":"now"
+  },
+  "timepicker":{
+
+  },
+  "timezone":"browser",
+  "title":"GenAi System Metrics Dashboard",
+  "uid":"genai-metrics-dashboard1111",
+  "version":2
+}
diff --git a/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/realtime-dashboard.json b/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/realtime-dashboard.json
new file mode 100644
index 00000000..fac0edbf
--- /dev/null
+++ b/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/realtime-dashboard.json
@@ -0,0 +1,593 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      },
+      {
+        "datasource": {
+          "type": "prometheus",
+          "uid": "PBFA97CFB590B2093"
+        },
+        "enable": true,
+        "hide": false,
+        "iconColor": "red",
+        "name": "WebSocket error",
+        "tagKeys": "websocket_errors",
+        "target": {
+          "expr": "(\n  rate(websocket_read_errors[5m]) +\n  rate(websocket_write_errors[5m]) +\n  rate(websocket_upgrade_errors[5m])\n) > 0.1",
+          "interval": "",
+          "refId": "Anno"
+        },
+        "textFormat": "High WebSocket error rate detected - exceeding 0.1 errors per second",
+        "titleFormat": "WebSocket Error Rate"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 3,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 10,
+        "x": 0,
+        "y": 0
+      },
+      "id": 5,
+      "options": {
+        "minVizHeight": 75,
+        "minVizWidth": 75,
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "sizing": "auto"
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "editorMode": "code",
+          "expr": "websocket_connections_active",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Websocket Connections Active",
+      "type": "gauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 15,
+        "w": 14,
+        "x": 10,
+        "y": 0
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "editorMode": "code",
+          "expr": "rate(websocket_connection_duration_sum[5m]) / rate(websocket_connection_duration_count[5m])",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Connection Duration Summary",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "histogram_quantile(0.95, sum(rate(websocket_connection_duration_bucket[5m])) by (le))"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": []
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 10,
+        "x": 0,
+        "y": 8
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "rate(websocket_sent_messages[1m])",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "Messages Sent",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "rate(websocket_received_messages[1m])",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "Messages Received",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Messages Sent/Received Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "description": "Distribution of WebSocket connection durations across different time buckets. Each bar represents a cumulative count of connections that lasted less than or equal to the specified duration (in seconds). For example, '60s' shows connections lasting up to 60 seconds, '120s' shows connections up to 120 seconds, and so on up to '+Inf' (unlimited duration).",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Value"
+            },
+            "properties": [
+              {
+                "id": "noValue"
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 17,
+        "w": 14,
+        "x": 10,
+        "y": 15
+      },
+      "id": 4,
+      "options": {
+        "displayMode": "gradient",
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "maxVizHeight": 300,
+        "minVizHeight": 16,
+        "minVizWidth": 8,
+        "namePlacement": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showUnfilled": true,
+        "sizing": "auto",
+        "valueMode": "color"
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "websocket_connection_duration_bucket{le=~\".+\"}",
+          "format": "table",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "instant": true,
+          "legendFormat": "{{le}}",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        }
+      ],
+      "title": "Websocket Connection Duration",
+      "transformations": [
+        {
+          "id": "filterFieldsByName",
+          "options": {
+            "include": {
+              "names": [
+                "Time",
+                "60.0",
+                "120.0",
+                "300.0",
+                "600.0",
+                "900.0",
+                "1200.0",
+                "1800.0",
+                "+Inf"
+              ]
+            }
+          }
+        }
+      ],
+      "type": "bargauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "websocket_read_errors"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 13,
+        "w": 10,
+        "x": 0,
+        "y": 19
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "editorMode": "code",
+          "expr": "rate(websocket_read_errors[1m])",
+          "legendFormat": "websocket_read_errors",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "rate(websocket_write_errors[1m])",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "websocket_write_errors",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "rate(websocket_upgrade_errors[1m])",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "websocket_upgrade_errors",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Websocket Error Rates",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "schemaVersion": 41,
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-3h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Realtime System Metrics",
+  "uid": "275dd5ce-c9b5-4ed9-a6d0-cbac478716a1",
+  "version": 4
+}
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/server-dashboard.json b/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/server-dashboard.json
new file mode 100644
index 00000000..caa6f15f
--- /dev/null
+++ b/infrastructure/whiteboard-observability/files/grafana/provisioning/dashboards/server-dashboard.json
@@ -0,0 +1,394 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      },
+      {
+        "datasource": {
+          "type": "prometheus",
+          "uid": "PBFA97CFB590B2093"
+        },
+        "enable": true,
+        "hide": false,
+        "iconColor": "orange",
+        "name": "Client Error Spike",
+        "tagKeys": "client-error, http-4xx",
+        "target": {
+          "expr": "sum(rate(http_server_requests_seconds_count{status=~\"4..\"}[5m])) > 0.1",
+          "interval": "",
+          "refId": "Anno"
+        },
+        "textFormat": "High rate of client errors detected, exceeding 0.1 requests per second",
+        "titleFormat": "Client Error Spike"
+      },
+      {
+        "datasource": {
+          "type": "prometheus",
+          "uid": "PBFA97CFB590B2093"
+        },
+        "enable": true,
+        "hide": false,
+        "iconColor": "red",
+        "name": "Server Error Spike",
+        "tagKeys": "server, error",
+        "target": {
+          "expr": "sum(rate(http_server_requests_seconds_count{status=~\"5..\"}[5m])) > 0.1",
+          "interval": "",
+          "refId": "Anno"
+        },
+        "textFormat": "Server error rate exceeded threshold (>0.1 req/s)",
+        "titleFormat": "Server Error Spike"
+      }
+    ]
+  },
+  "description": "Dashboard showing system metrics including request count, latency, and error rate.",
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 1,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "area"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "orange",
+                "value": 70
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 17,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(http_server_requests_seconds_count[5m])) by (method, uri)",
+          "legendFormat": "{{method}} {{status}} {{uri}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Request Count",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Average Latency (seconds)",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "area"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 17,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "    sum(rate(http_server_requests_seconds_sum[5m])) by (method, uri) /\nsum(rate(http_server_requests_seconds_count[5m])) by (method, uri)",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 12,
+        "w": 12,
+        "x": 0,
+        "y": 17
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "editorMode": "code",
+          "expr": "sum(rate(http_server_requests_seconds_count{outcome=\"CLIENT_ERROR\"}[5m])) by (method, uri, status)",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(http_server_requests_seconds_count{status=~\"5..\"}[5m])) by (method, uri, status)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Errors",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "refresh": "10s",
+  "schemaVersion": 41,
+  "tags": [
+    "monitoring",
+    "alerts"
+  ],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Server System Metrics Dashboard",
+  "uid": "system-metrics-dashboard111112",
+  "version": 1
+}
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/files/grafana/provisioning/datasources/prometheus.yml b/infrastructure/whiteboard-observability/files/grafana/provisioning/datasources/prometheus.yml
new file mode 100644
index 00000000..20ab3d19
--- /dev/null
+++ b/infrastructure/whiteboard-observability/files/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,8 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: '{{ printf "http://%s-prometheus-server" .Release.Name }}'
+    isDefault: true
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/files/prometheus/alert.rules.yml b/infrastructure/whiteboard-observability/files/prometheus/alert.rules.yml
new file mode 100644
index 00000000..502c3530
--- /dev/null
+++ b/infrastructure/whiteboard-observability/files/prometheus/alert.rules.yml
@@ -0,0 +1,11 @@
+groups:
+  - name: service-availability
+    rules:
+      - alert: Service Down
+        expr: up{job=~"server_job|genai_job|realtime_job"} == 0
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Service {{ $labels.job }} is down"
+          description: "Service {{ $labels.job }} on {{ $labels.instance }} has been down for more than 1 minute"
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/files/prometheus/prometheus.yml b/infrastructure/whiteboard-observability/files/prometheus/prometheus.yml
new file mode 100644
index 00000000..c6f4c4d5
--- /dev/null
+++ b/infrastructure/whiteboard-observability/files/prometheus/prometheus.yml
@@ -0,0 +1,32 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets:
+            - '{{ printf "%s-alertmanager" .Release.Name }}:9093'
+
+rule_files:
+  - "/etc/prometheus/alert.rules.yml"
+
+scrape_configs:
+  - job_name: 'server_job'
+    metrics_path: '/actuator/prometheus'
+    scheme: https
+    static_configs:
+      - targets:
+        - '{{ .Values.server.url }}'
+
+  - job_name: 'genai_job'
+    metrics_path: '/metrics'
+    static_configs:
+      - targets:
+          - '{{ .Values.genai.url }}'
+
+  - job_name: 'realtime_job'
+    metrics_path: '/metrics'
+    static_configs:
+      - targets:
+          - '{{ .Values.realtime.url }}'
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/production.values.yaml b/infrastructure/whiteboard-observability/production.values.yaml
new file mode 100644
index 00000000..f3ab5686
--- /dev/null
+++ b/infrastructure/whiteboard-observability/production.values.yaml
@@ -0,0 +1,92 @@
+grafana:
+  service:
+    port: 3000
+  rbac:
+    create: false
+    namespaced: true
+  adminUser: admin
+  adminPassword: admin
+  extraVolumes:
+    - name: grafana-datasources-config
+      configMap:
+        name: grafana-datasources-configmap
+    - name: grafana-dashboards-config
+      configMap:
+        name: grafana-dashboards-configmap
+  extraVolumeMounts:
+    - name: grafana-datasources-config
+      mountPath: /etc/grafana/provisioning/datasources
+    - name: grafana-dashboards-config
+      mountPath: /etc/grafana/provisioning/dashboards
+
+prometheus:
+  rbac:
+    create: false
+  kube-state-metrics:
+    enabled: false
+  prometheus-node-exporter:
+    enabled: false
+  server:
+    extraVolumes:
+      - name: prometheus-config
+        configMap:
+          name: prometheus-configmap
+    extraVolumeMounts:
+      - name: prometheus-config
+        mountPath: /etc/prometheus
+    defaultFlagsOverride:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/etc/prometheus/console_libraries'
+      - '--web.console.templates=/etc/prometheus/consoles'
+      - '--web.enable-lifecycle'
+  alertmanager:
+    config:
+      global:
+        smtp_smarthost: 'whiteboard-observability-production-mailhog:1025'
+        smtp_from: 'alertmanager@whiteboard.student.k8s.aet.cit.tum.de'
+        smtp_require_tls: false
+      route:
+        receiver: 'mailhog-alerts'
+        group_by: [ 'alertname' ]
+        group_wait: 10s
+        group_interval: 1m
+        repeat_interval: 30m
+      receivers:
+        - name: 'mailhog-alerts'
+          email_configs:
+            - to: 'teamserverdown@whiteboard.student.k8s.aet.cit.tum.de'
+              from: 'alertmanager@whiteboard.student.k8s.aet.cit.tum.de'
+              smarthost: 'whiteboard-observability-production-mailhog:1025'
+              send_resolved: true
+
+ingress:
+  enabled: true
+  className: "nginx"
+  annotations:
+    cert-manager.io/cluster-issuer: "letsencrypt-prod"
+    nginx.ingress.kubernetes.io/rewrite-target: /
+    nginx.ingress.kubernetes.io/use-forwarded-headers: "true"
+    nginx.ingress.kubernetes.io/proxy-buffer-size: "8k"
+  tls:
+    hosts:
+      - '{{ .Values.metrics.url }}'
+      - "mailhog.whiteboard.student.k8s.aet.cit.tum.de"
+    secretName: '{{ .Values.namespace }}-whiteboard-observability-devops25-tls'
+  rules:
+    - host: '{{ .Values.metrics.url }}'
+      paths:
+        - path: /
+          pathType: Prefix
+          service:
+            name: '{{ printf "%s-grafana" .Release.Name }}'
+            port:
+              number: 3000
+    - host: "mailhog.whiteboard.student.k8s.aet.cit.tum.de"
+      paths:
+        - path: /
+          pathType: Prefix
+          service:
+            name: '{{ printf "%s-mailhog" .Release.Name }}'
+            port:
+              number: 8025
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/staging.values.yaml b/infrastructure/whiteboard-observability/staging.values.yaml
new file mode 100644
index 00000000..e772c1bb
--- /dev/null
+++ b/infrastructure/whiteboard-observability/staging.values.yaml
@@ -0,0 +1,92 @@
+grafana:
+  service:
+    port: 3000
+  rbac:
+    create: false
+    namespaced: true
+  adminUser: admin
+  adminPassword: admin
+  extraVolumes:
+    - name: grafana-datasources-config
+      configMap:
+        name: grafana-datasources-configmap
+    - name: grafana-dashboards-config
+      configMap:
+        name: grafana-dashboards-configmap
+  extraVolumeMounts:
+    - name: grafana-datasources-config
+      mountPath: /etc/grafana/provisioning/datasources
+    - name: grafana-dashboards-config
+      mountPath: /etc/grafana/provisioning/dashboards
+
+prometheus:
+  rbac:
+    create: false
+  kube-state-metrics:
+    enabled: false
+  prometheus-node-exporter:
+    enabled: false
+  server:
+    extraVolumes:
+      - name: prometheus-config
+        configMap:
+          name: prometheus-configmap
+    extraVolumeMounts:
+      - name: prometheus-config
+        mountPath: /etc/prometheus
+    defaultFlagsOverride:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/etc/prometheus/console_libraries'
+      - '--web.console.templates=/etc/prometheus/consoles'
+      - '--web.enable-lifecycle'
+  alertmanager:
+    config:
+      global:
+        smtp_smarthost: 'whiteboard-observability-staging-mailhog:1025'
+        smtp_from: 'alertmanager@staging.whiteboard.student.k8s.aet.cit.tum.de'
+        smtp_require_tls: false
+      route:
+        receiver: 'mailhog-alerts'
+        group_by: [ 'alertname' ]
+        group_wait: 10s
+        group_interval: 1m
+        repeat_interval: 30m
+      receivers:
+        - name: 'mailhog-alerts'
+          email_configs:
+            - to: 'teamserverdown@staging.whiteboard.student.k8s.aet.cit.tum.de'
+              from: 'alertmanager@staging.whiteboard.student.k8s.aet.cit.tum.de'
+              smarthost: 'whiteboard-observability-staging-mailhog:1025'
+              send_resolved: true
+
+ingress:
+  enabled: true
+  className: "nginx"
+  annotations:
+    cert-manager.io/cluster-issuer: "letsencrypt-prod"
+    nginx.ingress.kubernetes.io/rewrite-target: /
+    nginx.ingress.kubernetes.io/use-forwarded-headers: "true"
+    nginx.ingress.kubernetes.io/proxy-buffer-size: "8k"
+  tls:
+    hosts:
+      - '{{ .Values.metrics.url }}'
+      - "staging.mailhog.whiteboard.student.k8s.aet.cit.tum.de"
+    secretName: '{{ .Values.namespace }}-whiteboard-observability-devops25-tls'
+  rules:
+    - host: '{{ .Values.metrics.url }}'
+      paths:
+        - path: /
+          pathType: Prefix
+          service:
+            name: '{{ printf "%s-grafana" .Release.Name }}'
+            port:
+              number: 3000
+    - host: "staging.mailhog.whiteboard.student.k8s.aet.cit.tum.de"
+      paths:
+        - path: /
+          pathType: Prefix
+          service:
+            name: '{{ printf "%s-mailhog" .Release.Name }}'
+            port:
+              number: 8025
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/templates/grafana-configmap.yaml b/infrastructure/whiteboard-observability/templates/grafana-configmap.yaml
new file mode 100644
index 00000000..3ce9f9b0
--- /dev/null
+++ b/infrastructure/whiteboard-observability/templates/grafana-configmap.yaml
@@ -0,0 +1,24 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: grafana-datasources-configmap
+data:
+  prometheus.yml: |-
+{{- $dataSourceConfig := .Files.Get "files/grafana/provisioning/datasources/prometheus.yml" }}
+{{ tpl $dataSourceConfig . | indent 4 }}
+
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: grafana-dashboards-configmap
+data:
+  dashboards.yml: |-
+{{- $dashboardsConfig := .Files.Get "files/grafana/provisioning/dashboards/dashboards.yml" }}
+{{ tpl $dashboardsConfig . | indent 4 }}
+  server-dashboard.json: |-
+{{ .Files.Get "files/grafana/provisioning/dashboards/server-dashboard.json" | indent 4 }}
+  genai-dashboard.json: |-
+{{ .Files.Get "files/grafana/provisioning/dashboards/genai-dashboard.json" | indent 4 }}
+  realtime-dashboard.json: |-
+{{ .Files.Get "files/grafana/provisioning/dashboards/realtime-dashboard.json" | indent 4 }}
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/templates/ingress.yaml b/infrastructure/whiteboard-observability/templates/ingress.yaml
new file mode 100644
index 00000000..f7616cd6
--- /dev/null
+++ b/infrastructure/whiteboard-observability/templates/ingress.yaml
@@ -0,0 +1,34 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: "whiteboard-observability-ingress"
+  {{- $annotations := .Values.ingress.annotations | default dict }}
+  {{- if $annotations }}
+  annotations:
+    {{- toYaml $annotations | nindent 4 }}
+  {{- end }}
+spec:
+  tls:
+    - hosts:
+      {{- range .Values.ingress.tls.hosts }}
+        - {{ tpl . $ }}
+      {{- end }}
+      secretName: {{ tpl .Values.ingress.tls.secretName $ }}
+  ingressClassName: nginx
+  rules:
+      {{- range .Values.ingress.rules }}
+    - host: {{ tpl .host $ }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            pathType: {{ .pathType }}
+            backend:
+              service:
+                name: {{ tpl .service.name $ | quote }}
+                port:
+                  number: {{ .service.port.number }}
+          {{- end}}
+      {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/infrastructure/whiteboard-observability/templates/prometheus-configmap.yaml b/infrastructure/whiteboard-observability/templates/prometheus-configmap.yaml
new file mode 100644
index 00000000..bd09c3ca
--- /dev/null
+++ b/infrastructure/whiteboard-observability/templates/prometheus-configmap.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: prometheus-configmap
+data:
+  prometheus.yml: |-
+{{- $config := .Files.Get "files/prometheus/prometheus.yml" }}
+{{ tpl $config . | indent 4 }}
+  alert.rules.yml: |-
+{{ .Files.Get "files/prometheus/alert.rules.yml" | indent 4 }}
\ No newline at end of file
diff --git a/realtime/go.mod b/realtime/go.mod
index 2beec722..dc18154a 100644
--- a/realtime/go.mod
+++ b/realtime/go.mod
@@ -17,6 +17,7 @@ require (
 
 require (
 	github.com/KyleBanks/depth v1.2.1 // indirect
+	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/bytedance/sonic v1.13.3 // indirect
 	github.com/bytedance/sonic/loader v0.2.4 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
@@ -42,8 +43,13 @@ require (
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/prometheus/client_golang v1.22.0 // indirect
+	github.com/prometheus/client_model v0.6.1 // indirect
+	github.com/prometheus/common v0.62.0 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/rogpeppe/go-internal v1.14.1 // indirect
 	github.com/sagikazarmark/locafero v0.7.0 // indirect
 	github.com/sourcegraph/conc v0.3.0 // indirect
diff --git a/realtime/go.sum b/realtime/go.sum
index 4c1b29a8..5c224bc6 100644
--- a/realtime/go.sum
+++ b/realtime/go.sum
@@ -1,5 +1,7 @@
 github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
 github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
 github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
 github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
@@ -83,10 +85,20 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
 github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
+github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
+github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
 github.com/redis/go-redis/v9 v9.11.0 h1:E3S08Gl/nJNn5vkxd2i78wZxWAPNZgUNTp8WIJUAiIs=
 github.com/redis/go-redis/v9 v9.11.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
diff --git a/realtime/pkg/api/handler/whiteboard.go b/realtime/pkg/api/handler/whiteboard.go
index f72240ce..ac5fafb1 100644
--- a/realtime/pkg/api/handler/whiteboard.go
+++ b/realtime/pkg/api/handler/whiteboard.go
@@ -2,20 +2,24 @@ package handler
 
 import (
 	"context"
+	"github.com/AET-DevOps25/team-server-down/pkg/api/metrics"
 	"github.com/AET-DevOps25/team-server-down/pkg/mq"
 	"github.com/gin-gonic/gin"
 	"github.com/gorilla/websocket"
 	"log"
 	"net/http"
+	"time"
 )
 
 type WhiteboardHandler struct {
-	mq *mq.RedisMQ
+	mq      *mq.RedisMQ
+	metrics *metrics.Metrics
 }
 
-func NewWhiteboardHandler(redisMQ *mq.RedisMQ) *WhiteboardHandler {
+func NewWhiteboardHandler(redisMQ *mq.RedisMQ, metrics *metrics.Metrics) *WhiteboardHandler {
 	return &WhiteboardHandler{
 		redisMQ,
+		metrics,
 	}
 }
 
@@ -33,10 +37,20 @@ func (wh *WhiteboardHandler) GetWhiteboardEvents(c *gin.Context) {
 	conn, err := upgrader.Upgrade(c.Writer, c.Request, nil)
 	if err != nil {
 		log.Printf("WebSocket upgrade failed: %v", err)
+		wh.metrics.WebsocketUpgradeErrors.Inc()
 		return
 	}
 	defer conn.Close()
 
+	wh.metrics.WebsocketConnectionsActive.Inc()
+	defer wh.metrics.WebsocketConnectionsActive.Dec()
+
+	start := time.Now()
+	defer func() {
+		duration := time.Since(start).Seconds()
+		wh.metrics.WebsocketConnectionDuration.Observe(duration)
+	}()
+
 	ctx, cancel := context.WithCancel(c.Request.Context())
 	defer cancel()
 
@@ -79,9 +93,11 @@ func (wh *WhiteboardHandler) GetWhiteboardEvents(c *gin.Context) {
 				return
 			}
 			if err := conn.WriteMessage(websocket.TextMessage, msg); err != nil {
+				wh.metrics.WebsocketWriteErrors.Inc()
 				cancel()
 				return
 			}
+			wh.metrics.WebsocketSentMessages.Inc()
 		}
 	}
 }
@@ -91,15 +107,29 @@ func (wh *WhiteboardHandler) PublishWhiteboardEvents(c *gin.Context) {
 
 	conn, err := upgrader.Upgrade(c.Writer, c.Request, nil)
 	if err != nil {
+		log.Printf("WebSocket upgrade failed: %v", err)
+		wh.metrics.WebsocketUpgradeErrors.Inc()
 		return
 	}
 	defer conn.Close()
 
+	wh.metrics.WebsocketConnectionsActive.Inc()
+	defer wh.metrics.WebsocketConnectionsActive.Dec()
+
+	start := time.Now()
+	defer func() {
+		duration := time.Since(start).Seconds()
+		wh.metrics.WebsocketConnectionDuration.Observe(duration)
+	}()
+
 	for {
 		_, message, err := conn.ReadMessage()
 		if err != nil {
+			wh.metrics.WebsocketReadErrors.Inc()
 			break
 		}
+		wh.metrics.WebsocketReceivedMessages.Inc()
+
 		err = wh.mq.Publish(whiteboardId, string(message))
 		if err != nil {
 			log.Printf("Failed to publish message: %v", err)
diff --git a/realtime/pkg/api/metrics/metrics.go b/realtime/pkg/api/metrics/metrics.go
new file mode 100644
index 00000000..1b6c197d
--- /dev/null
+++ b/realtime/pkg/api/metrics/metrics.go
@@ -0,0 +1,66 @@
+package metrics
+
+import "github.com/prometheus/client_golang/prometheus"
+
+type Metrics struct {
+	WebsocketConnectionsActive  prometheus.Gauge
+	WebsocketConnectionDuration prometheus.Histogram
+	WebsocketUpgradeErrors      prometheus.Counter
+	WebsocketReadErrors         prometheus.Counter
+	WebsocketWriteErrors        prometheus.Counter
+	WebsocketSentMessages       prometheus.Counter
+	WebsocketReceivedMessages   prometheus.Counter
+}
+
+func NewMetrics(reg *prometheus.Registry) *Metrics {
+	m := &Metrics{
+		WebsocketConnectionsActive: prometheus.NewGauge(prometheus.GaugeOpts{
+			Name: "websocket_connections_active",
+			Help: "Number of active websocket connections",
+		}),
+		WebsocketConnectionDuration: prometheus.NewHistogram(prometheus.HistogramOpts{
+			Name: "websocket_connection_duration",
+			Help: "Duration of websocket connections",
+			Buckets: []float64{
+				60,   // 1 min
+				120,  // 2 min
+				300,  // 5 min
+				600,  // 10 min
+				900,  // 15 min
+				1200, // 20 min
+				1800, // 30 min
+			},
+		}),
+		WebsocketUpgradeErrors: prometheus.NewCounter(prometheus.CounterOpts{
+			Name: "websocket_upgrade_errors",
+			Help: "Number of websocket upgrade errors",
+		}),
+		WebsocketReadErrors: prometheus.NewCounter(prometheus.CounterOpts{
+			Name: "websocket_read_errors",
+			Help: "Number of websocket read errors",
+		}),
+		WebsocketWriteErrors: prometheus.NewCounter(prometheus.CounterOpts{
+			Name: "websocket_write_errors",
+			Help: "Number of websocket write errors",
+		}),
+		WebsocketSentMessages: prometheus.NewCounter(prometheus.CounterOpts{
+			Name: "websocket_sent_messages",
+			Help: "Number of sent websocket messages",
+		}),
+		WebsocketReceivedMessages: prometheus.NewCounter(prometheus.CounterOpts{
+			Name: "websocket_received_messages",
+			Help: "Number of received websocket messages",
+		}),
+	}
+
+	reg.MustRegister(
+		m.WebsocketConnectionsActive,
+		m.WebsocketConnectionDuration,
+		m.WebsocketUpgradeErrors,
+		m.WebsocketReadErrors,
+		m.WebsocketWriteErrors,
+		m.WebsocketSentMessages,
+		m.WebsocketReceivedMessages,
+	)
+	return m
+}
diff --git a/realtime/pkg/api/metrics/provider.go b/realtime/pkg/api/metrics/provider.go
new file mode 100644
index 00000000..a9839105
--- /dev/null
+++ b/realtime/pkg/api/metrics/provider.go
@@ -0,0 +1,11 @@
+package metrics
+
+import "github.com/prometheus/client_golang/prometheus"
+
+func ProvideRegistry() *prometheus.Registry {
+	return prometheus.NewRegistry()
+}
+
+func ProvideMetrics(reg *prometheus.Registry) *Metrics {
+	return NewMetrics(reg)
+}
diff --git a/realtime/pkg/api/server.go b/realtime/pkg/api/server.go
index 6b0965e1..a78ad579 100644
--- a/realtime/pkg/api/server.go
+++ b/realtime/pkg/api/server.go
@@ -4,6 +4,8 @@ import (
 	_ "github.com/AET-DevOps25/team-server-down/cmd/api/docs"
 	"github.com/AET-DevOps25/team-server-down/pkg/api/handler"
 	"github.com/gin-gonic/gin"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
 	swaggerFiles "github.com/swaggo/files"
 	ginSwagger "github.com/swaggo/gin-swagger"
 )
@@ -12,14 +14,20 @@ type Server struct {
 	engine *gin.Engine
 }
 
-func NewServer(rootHandler *handler.RootHandler, whiteboardHandler *handler.WhiteboardHandler) *Server {
+func NewServer(
+	rootHandler *handler.RootHandler,
+	whiteboardHandler *handler.WhiteboardHandler,
+	reg *prometheus.Registry,
+) *Server {
 	engine := gin.New()
 
-	engine.Use(gin.Logger())
+	engine.Use(gin.Logger(), gin.Recovery())
 
 	engine.GET("/", rootHandler.GetRoot)
 	engine.GET("/swagger/*any", ginSwagger.WrapHandler(swaggerFiles.Handler))
 
+	engine.GET("/metrics", gin.WrapH(promhttp.HandlerFor(reg, promhttp.HandlerOpts{})))
+
 	engine.GET("/ws/whiteboard/:whiteboardId/subscribe", whiteboardHandler.GetWhiteboardEvents)
 	engine.GET("/ws/whiteboard/:whiteboardId/publish", whiteboardHandler.PublishWhiteboardEvents)
 	return &Server{engine: engine}
diff --git a/realtime/pkg/di/wire.go b/realtime/pkg/di/wire.go
index cf875224..684ba831 100644
--- a/realtime/pkg/di/wire.go
+++ b/realtime/pkg/di/wire.go
@@ -6,6 +6,7 @@ package di
 import (
 	http "github.com/AET-DevOps25/team-server-down/pkg/api"
 	"github.com/AET-DevOps25/team-server-down/pkg/api/handler"
+	"github.com/AET-DevOps25/team-server-down/pkg/api/metrics"
 	"github.com/AET-DevOps25/team-server-down/pkg/config"
 	"github.com/AET-DevOps25/team-server-down/pkg/mq"
 	"github.com/google/wire"
@@ -17,6 +18,8 @@ func InitializeAPI(cfg config.Config) (*http.Server, error) {
 		handler.NewRootHandler,
 		handler.NewWhiteboardHandler,
 		mq.NewRedisMQ,
+		metrics.ProvideRegistry,
+		metrics.ProvideMetrics,
 	)
 
 	return &http.Server{}, nil
diff --git a/realtime/pkg/di/wire_gen.go b/realtime/pkg/di/wire_gen.go
index 96a6092b..736234d1 100644
--- a/realtime/pkg/di/wire_gen.go
+++ b/realtime/pkg/di/wire_gen.go
@@ -9,6 +9,7 @@ package di
 import (
 	"github.com/AET-DevOps25/team-server-down/pkg/api"
 	"github.com/AET-DevOps25/team-server-down/pkg/api/handler"
+	"github.com/AET-DevOps25/team-server-down/pkg/api/metrics"
 	"github.com/AET-DevOps25/team-server-down/pkg/config"
 	"github.com/AET-DevOps25/team-server-down/pkg/mq"
 )
@@ -18,7 +19,9 @@ import (
 func InitializeAPI(cfg config.Config) (*http.Server, error) {
 	rootHandler := handler.NewRootHandler()
 	redisMQ := mq.NewRedisMQ(cfg)
-	whiteboardHandler := handler.NewWhiteboardHandler(redisMQ)
-	server := http.NewServer(rootHandler, whiteboardHandler)
+	registry := metrics.ProvideRegistry()
+	metricsMetrics := metrics.ProvideMetrics(registry)
+	whiteboardHandler := handler.NewWhiteboardHandler(redisMQ, metricsMetrics)
+	server := http.NewServer(rootHandler, whiteboardHandler, registry)
 	return server, nil
 }
diff --git a/server/build.gradle b/server/build.gradle
index ca2cb201..0209c94f 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -32,6 +32,9 @@ dependencies {
     implementation 'org.springframework.boot:spring-boot-starter-security'
     implementation 'org.hibernate.validator:hibernate-validator'
     implementation 'com.auth0:java-jwt:4.5.0'
+    implementation 'org.springframework.boot:spring-boot-starter-actuator'
+    implementation 'io.micrometer:micrometer-core'
+    implementation 'io.micrometer:micrometer-registry-prometheus'
     testImplementation "org.mockito:mockito-core"
     testImplementation "org.mockito:mockito-junit-jupiter"
     testImplementation 'org.springframework.boot:spring-boot-starter-test'
diff --git a/server/src/main/java/de/tum/cit/aet/devops/teamserverdown/security/JWTAuthenticationFilter.java b/server/src/main/java/de/tum/cit/aet/devops/teamserverdown/security/JWTAuthenticationFilter.java
index 51e12d9b..452d6cb8 100644
--- a/server/src/main/java/de/tum/cit/aet/devops/teamserverdown/security/JWTAuthenticationFilter.java
+++ b/server/src/main/java/de/tum/cit/aet/devops/teamserverdown/security/JWTAuthenticationFilter.java
@@ -51,6 +51,8 @@ protected void doFilterInternal(
   @Override
   protected boolean shouldNotFilter(HttpServletRequest request) {
     String path = request.getServletPath();
-    return path.startsWith("/v3/api-docs") || path.startsWith("/swagger-ui");
+    return path.startsWith("/v3/api-docs")
+        || path.startsWith("/swagger-ui")
+        || path.startsWith("/actuator");
   }
 }
diff --git a/server/src/main/resources/application.yaml b/server/src/main/resources/application.yaml
index 868c171a..b0e5bd1d 100644
--- a/server/src/main/resources/application.yaml
+++ b/server/src/main/resources/application.yaml
@@ -24,4 +24,13 @@ spring:
         dialect: org.hibernate.dialect.PostgreSQLDialect
   flyway:
     enabled: true
-    validate-on-migrate: true
\ No newline at end of file
+    validate-on-migrate: true
+
+management:
+  endpoints:
+    web:
+      exposure:
+        include: [ "prometheus" ]
+  endpoint:
+    prometheus:
+      access: unrestricted
\ No newline at end of file