fix(KAR-577): too many requests error in vector pod (#7614)

olegbet · obetsun · web-flow · commit ee4eec92d2e1 · 2025-08-14T11:17:07.000Z
* fix(KAR-577): too many requests error in vector pod

Signed-off-by: obetsun &lt;obetsun@redhat.com&gt;

rh-pre-commit.version: 2.3.2
rh-pre-commit.check-secrets: ENABLED

* fix resources requests and reduce script behavior

Signed-off-by: obetsun &lt;obetsun@redhat.com&gt;

rh-pre-commit.version: 2.3.2
rh-pre-commit.check-secrets: ENABLED

---------

Co-authored-by: obetsun &lt;obetsun@redhat.com&gt;
diff --git a/components/vector-kubearchive-log-collector/development/loki-helm-values.yaml b/components/vector-kubearchive-log-collector/development/loki-helm-values.yaml
@@ -55,6 +55,15 @@ loki:
         index:
           prefix: loki_index_
           period: 24h
+  # Configure ingestion limits to handle Vector's data volume
+  limits_config:
+    ingestion_rate_mb: 16          # Increase from default 4MB to 16MB per second
+    ingestion_burst_size_mb: 32    # Allow bursts up to 32MB
+    max_line_size: 256KB           # Allow larger log lines
+    max_streams_per_user: 10000    # Allow more streams per tenant
+    max_global_streams_per_user: 50000
+    reject_old_samples: false      # Don't reject old samples
+    reject_old_samples_max_age: 168h  # Accept samples up to 7 days old
   # Required storage configuration for Helm chart
   storage:
     type: filesystem
diff --git a/components/vector-kubearchive-log-collector/development/vector-helm-values.yaml b/components/vector-kubearchive-log-collector/development/vector-helm-values.yaml
@@ -118,7 +118,8 @@ customConfig:
       # Direct connection to Loki service (no gateway)
       endpoint: "http://vector-kubearchive-log-collector-loki.product-kubearchive-logging.svc.cluster.local:3100"
       encoding:
-        codec: "json"
+        codec: "text"  # Use text instead of json to avoid metadata issues
+        except_fields: ["tmp"]  # Exclude temporary fields
       auth:
         strategy: "basic"
         user: "${LOKI_USERNAME}"
@@ -127,10 +128,12 @@ customConfig:
       request:
         headers:
           X-Scope-OrgID: kubearchive
+        timeout_secs: 60  # Shorter timeout
       batch:
-        max_bytes: 10485760
-        timeout_secs: 300 
-      compression: "none"
+        max_bytes: 2097152  # Reduce to 2MB (half of Loki's 4MB/sec limit)
+        max_events: 1000   # Limit number of events per batch
+        timeout_secs: 10   # Send batches more frequently
+      compression: "gzip"  # Enable compression to reduce data size
       labels:
         job: "vector"
         pod_id: "{{`{{ pod_id }}`}}"
@@ -140,7 +143,7 @@ customConfig:
       buffer:
         type: "memory"
         max_events: 10000
-        when_full: "block"
+        when_full: "drop_newest"  # Drop newest instead of blocking
 env:
   - name: LOKI_USERNAME
     valueFrom:
diff --git a/components/vector-kubearchive-log-collector/staging/stone-stg-rh01/loki-helm-values.yaml b/components/vector-kubearchive-log-collector/staging/stone-stg-rh01/loki-helm-values.yaml
@@ -53,6 +53,15 @@ loki:
         index:
           prefix: loki_index_
           period: 24h
+  # Configure ingestion limits to handle Vector's data volume
+  limits_config:
+    ingestion_rate_mb: 16          # Increase from default 4MB to 16MB per second
+    ingestion_burst_size_mb: 32    # Allow bursts up to 32MB
+    max_line_size: 256KB           # Allow larger log lines
+    max_streams_per_user: 10000    # Allow more streams per tenant
+    max_global_streams_per_user: 50000
+    reject_old_samples: false      # Don't reject old samples
+    reject_old_samples_max_age: 168h  # Accept samples up to 7 days old
   # Required storage configuration for Helm chart
   storage:
     type: s3
diff --git a/components/vector-kubearchive-log-collector/staging/stone-stg-rh01/vector-helm-values.yaml b/components/vector-kubearchive-log-collector/staging/stone-stg-rh01/vector-helm-values.yaml
@@ -2,11 +2,11 @@
 role: Agent
 resources:
   requests:
-    cpu: 200m
-    memory: 1024Mi
+    cpu: 512m
+    memory: 4096Mi
   limits:
-    cpu: 1000m
-    memory: 2048Mi
+    cpu: 2000m
+    memory: 4096Mi
 customConfig:
   data_dir: /vector-data-dir
   api:
@@ -20,6 +20,7 @@ customConfig:
       glob_minimum_cooldown_ms: 500
       max_line_bytes: 3145728
       auto_partial_merge: true
+      extra_label_selector: "app.kubernetes.io/managed-by in (tekton-pipelines,pipelinesascode.tekton.dev)"
   transforms:
     reduce_events:
       type: reduce
@@ -92,24 +93,9 @@ customConfig:
           .log_type = "application"
         }
         # --- End: Cronjob Specific Handling ---
-        # Handling general Kubernetes labels
-        if exists(.tmp.kubernetes.pod_labels) {
-          .pod_labels = .tmp.kubernetes.pod_labels
-        } else {
-          .pod_labels = "no_labels"
-        }
-        # General message field handling
-        if exists(.tmp.message) {
-          .message = to_string(del(.tmp.message)) ?? "no_message"
-        } else {
-          .message = "no_message"
-        }
-        # Basic data sanitization to prevent 400 errors
-        # Truncate very long messages
-        if length(.message) > 32768 {
-          .message = slice!(.message, 0, 32768) + "...[TRUNCATED]"
-        }
         # Clean up temporary fields
+        .container = del(.tmp.kubernetes.container_name)
+        .message = del(.tmp.message)
         del(.tmp)
   sinks:
     loki: