fix: Mark alert block as yaml to fix syntax error (#954)

JeffLuoo · web-flow · commit cd83f1c69478 · 2025-06-10T14:37:02.000-07:00
TESTED=preview with local mkdocs
diff --git a/site-src/guides/metrics.md b/site-src/guides/metrics.md
@@ -107,54 +107,54 @@ A template alert rule is available at [alert.yaml](../../tools/alerts/alert.yaml
 
 #### High Inference Request Latency P99
 
-```
+```yaml
 alert: HighInferenceRequestLatencyP99
 expr: histogram_quantile(0.99, rate(inference_model_request_duration_seconds_bucket[5m])) > 10.0 # Adjust threshold as needed (e.g., 10.0 seconds)
 for: 5m
 annotations:
-  title: 'High latency (P99) for model {{ $labels.model_name }}'
-  description: 'The 99th percentile request duration for model {{ $labels.model_name }} and target model {{ $labels.target_model_name }} has been consistently above 10.0 seconds for 5 minutes.'
+  title: 'High latency (P99) for model {% raw %}{{ $labels.model_name }}{% endraw %}'
+  description: 'The 99th percentile request duration for model {% raw %}{{ $labels.model_name }}{% endraw %} and target model {% raw %}{{ $labels.target_model_name }}{% endraw %} has been consistently above 10.0 seconds for 5 minutes.'
 labels:
   severity: 'warning'
 ```
 
 #### High Inference Error Rate
 
-```
+```yaml
 alert: HighInferenceErrorRate
 expr: sum by (model_name) (rate(inference_model_request_error_total[5m])) / sum by (model_name) (rate(inference_model_request_total[5m])) > 0.05 # Adjust threshold as needed (e.g., 5% error rate)
 for: 5m
 annotations:
-  title: 'High error rate for model {{ $labels.model_name }}'
-  description: 'The error rate for model {{ $labels.model_name }} and target model {{ $labels.target_model_name }} has been consistently above 5% for 5 minutes.'
+  title: 'High error rate for model {% raw %}{{ $labels.model_name }}{% endraw %}'
+  description: 'The error rate for model {% raw %}{{ $labels.model_name }}{% endraw %} and target model {% raw %}{{ $labels.target_model_name }}{% endraw %} has been consistently above 5% for 5 minutes.'
 labels:
   severity: 'critical'
   impact: 'availability'
 ```
 
 #### High Inference Pool Queue Average Size
 
-```
+```yaml
 alert: HighInferencePoolAvgQueueSize
 expr: inference_pool_average_queue_size > 50 # Adjust threshold based on expected queue size
 for: 5m
 annotations:
-  title: 'High average queue size for inference pool {{ $labels.name }}'
-  description: 'The average number of requests pending in the queue for inference pool {{ $labels.name }} has been consistently above 50 for 5 minutes.'
+  title: 'High average queue size for inference pool {% raw %}{{ $labels.name }}{% endraw %}'
+  description: 'The average number of requests pending in the queue for inference pool {% raw %}{{ $labels.name }}{% endraw %} has been consistently above 50 for 5 minutes.'
 labels:
   severity: 'critical'
   impact: 'performance'
 ```
 
 #### High Inference Pool Average KV Cache
 
-```
+```yaml
 alert: HighInferencePoolAvgKVCacheUtilization
 expr: inference_pool_average_kv_cache_utilization > 0.9 # 90% utilization
 for: 5m
 annotations:
-  title: 'High KV cache utilization for inference pool {{ $labels.name }}'
-  description: 'The average KV cache utilization for inference pool {{ $labels.name }} has been consistently above 90% for 5 minutes, indicating potential resource exhaustion.'
+  title: 'High KV cache utilization for inference pool {% raw %}{{ $labels.name }}{% endraw %}'
+  description: 'The average KV cache utilization for inference pool {% raw %}{{ $labels.name }}{% endraw %} has been consistently above 90% for 5 minutes, indicating potential resource exhaustion.'
 labels:
   severity: 'critical'
   impact: 'resource_exhaustion'