Merge pull request #190 from DrDroidLab/prateek/fix/memoryissue

prateekkanurkar-cmd · web-flow · commit 6df25a4c9996 · 2026-01-07T16:45:42.000+05:30
Fixed deep copy memory leak and update work count
diff --git a/helm/charts/celery_worker/templates/deployment.yaml b/helm/charts/celery_worker/templates/deployment.yaml
@@ -68,7 +68,7 @@ spec:
             - name: CELERY_QUEUE
               value: "celery"
             - name: CELERY_WORKER_COUNT
-              value: "4"
+              value: "2"
             - name: CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP
               value: "true"
             - name: CELERY_BROKER_CONNECTION_MAX_RETRIES
@@ -127,7 +127,7 @@ spec:
             - name: CELERY_QUEUE
               value: "exec"
             - name: CELERY_WORKER_COUNT
-              value: "4"
+              value: "3"
             - name: CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP
               value: "true"
             - name: CELERY_BROKER_CONNECTION_MAX_RETRIES
@@ -186,7 +186,7 @@ spec:
             - name: CELERY_QUEUE
               value: "asset_extraction"
             - name: CELERY_WORKER_COUNT
-              value: "3"
+              value: "1"
             - name: CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP
               value: "true"
             - name: CELERY_BROKER_CONNECTION_MAX_RETRIES
diff --git a/helm/values.yaml b/helm/values.yaml
@@ -48,29 +48,29 @@ celery-worker:
   # Resource configuration for celery worker containers
   resources:
     scheduler:
-      # Lightweight task scheduler - handles task distribution and scheduling
+      # Lightweight task scheduler - handles polling tasks (reduced from 4 to 2 workers)
       requests:
-        cpu: "200m"
+        cpu: "100m"
         memory: "256Mi"
       limits:
-        cpu: "800m"
-        memory: "896Mi"
+        cpu: "500m"
+        memory: "1000Mi"
     taskExecutor:
-      # Task executor for high-priority tasks - handles execution of critical tasks
+      # Task executor for high-priority tasks - handles execution of critical tasks (reduced from 4 to 3 workers)
       requests:
-        cpu: "250m"
-        memory: "256Mi"
+        cpu: "200m"
+        memory: "512Mi"
       limits:
-        cpu: "1500m"
-        memory: "2Gi"
+        cpu: "1000m"
+        memory: "1536Mi"
     assetExtractor:
-      # Asset extraction worker - handles long-running asset discovery and extraction tasks
+      # Asset extraction worker - handles long-running asset discovery (reduced from 3 to 1 worker)
       requests:
-        cpu: "250m"
-        memory: "256Mi"
+        cpu: "200m"
+        memory: "512Mi"
       limits:
-        cpu: "1500m"
-        memory: "2Gi"
+        cpu: "1000m"
+        memory: "1536Mi"
 
 redis:
-  image: redis:7.2
+  image: redis:7.2
diff --git a/playbooks_engine/tasks.py b/playbooks_engine/tasks.py
@@ -1,5 +1,4 @@
 import logging
-import copy
 
 import requests
 from celery import shared_task
@@ -94,10 +93,9 @@ def _execute_asset_refresh_task(playbook_task_execution_log):
         logger.error(f'_execute_asset_refresh_task:: Error during asset refresh: {str(e)}')
         result = PlaybookTaskResult(error=StringValue(value=str(e)))
     
-    # Create processed log in the same format as normal playbook tasks
-    processed_log = copy.deepcopy(playbook_task_execution_log)
+    # Create processed log in the same format as normal playbook tasks (shallow copy is sufficient)
     result_dict = proto_to_dict(result)
-    processed_log['result'] = result_dict
+    processed_log = {**playbook_task_execution_log, 'result': result_dict}
     
     # Send results using existing playbook infrastructure
     drd_cloud_host = settings.DRD_CLOUD_API_HOST
@@ -261,19 +259,17 @@ def execute_task_and_send_result(playbook_task_execution_log):
         try:
             # Execute task
             results = _execute_playbook_task(task_proto, time_range, global_variable_set)
-            
-            # Create processed logs
+
+            # Create processed logs (using shallow copy - deep copy is unnecessary since we only add 'result')
             for result in results:
-                current_log_copy = copy.deepcopy(playbook_task_execution_log)
                 result_dict = proto_to_dict(result)
-                current_log_copy['result'] = result_dict
+                current_log_copy = {**playbook_task_execution_log, 'result': result_dict}
                 processed_logs.append(current_log_copy)
-                
+
         except Exception as e:
             logger.error(f'execute_task_and_send_result:: Error while executing tasks: {str(e)}')
-            current_log_copy = copy.deepcopy(playbook_task_execution_log)
             error_result = PlaybookTaskResult(error=StringValue(value=str(e)))
-            current_log_copy['result'] = proto_to_dict(error_result)
+            current_log_copy = {**playbook_task_execution_log, 'result': proto_to_dict(error_result)}
             processed_logs.append(current_log_copy)
 
         # Send results