Add configurable concurrency parameters to synthesis and evaluation services

Khauneesh-AI · Khauneesh-AI · commit 06537b4be68f · 2025-09-17T01:12:21.000+05:30
- Added max_concurrent_topics field to SynthesisRequest (1-100, default: 5)
- Added max_workers field to EvaluationRequest (1-100, default: 4)
- Updated all synthesis services to use request.max_concurrent_topics
- Updated all evaluator services to use request.max_workers
- Added validation constraints to prevent invalid values
- Updated example payloads in main.py to include new parameters
- Services now respect API-configurable concurrency limits while maintaining defaults

This allows users to optimize performance based on their infrastructure
and workload requirements via API parameters.
diff --git a/app/main.py b/app/main.py
@@ -1262,6 +1262,7 @@ async def get_example_payloads(use_case:UseCase):
                     "technique": "sft",
                     "topics": ["python_basics", "data_structures"],
                     "is_demo": True,
+                    "max_concurrent_topics": 5,
                     "examples":  [
                 {
                     "question": "How do you create a list in Python and add elements to it?",
@@ -1288,6 +1289,7 @@ async def get_example_payloads(use_case:UseCase):
                     "technique": "sft",
                     "topics": ["basic_queries", "joins"],
                     "is_demo": True,
+                    "max_concurrent_topics": 5,
                     "schema": "CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(100), email VARCHAR(255));\nCREATE TABLE orders (id INT PRIMARY KEY, user_id INT, amount DECIMAL(10,2), FOREIGN KEY (user_id) REFERENCES users(id));",
                     "examples":[
                                 {
@@ -1316,6 +1318,7 @@ async def get_example_payloads(use_case:UseCase):
             "topics": ["topic 1", "topic 2"],
             "custom_prompt": "Give your instructions here",
             "is_demo": True,
+            "max_concurrent_topics": 5,
             
             "examples":[
                         {
diff --git a/app/models/request_models.py b/app/models/request_models.py
@@ -138,7 +138,13 @@ class SynthesisRequest(BaseModel):
     example_path: Optional[str] = None
     schema: Optional[str] = None  # Added schema field
     custom_prompt: Optional[str] = None 
-    display_name: Optional[str] = None 
+    display_name: Optional[str] = None
+    max_concurrent_topics: Optional[int] = Field(
+        default=5, 
+        ge=1, 
+        le=100, 
+        description="Maximum number of concurrent topics to process (1-100)"
+    ) 
     
     # Optional model parameters with defaults
     model_params: Optional[ModelParameters] = Field(
@@ -156,7 +162,7 @@ class SynthesisRequest(BaseModel):
                 "technique": "sft",
                 "topics": ["python_basics", "data_structures"],
                 "is_demo": True,
-                
+                "max_concurrent_topics": 5
                 
             }
         }
@@ -209,6 +215,12 @@ class EvaluationRequest(BaseModel):
     display_name: Optional[str] = None 
     output_key: Optional[str] = 'Prompt'
     output_value: Optional[str] = 'Completion'
+    max_workers: Optional[int] = Field(
+        default=4, 
+        ge=1, 
+        le=100, 
+        description="Maximum number of worker threads for parallel evaluation (1-100)"
+    )
 
     # Export configuration
     export_type: str = "local"  # "local" or "s3"
@@ -227,7 +239,8 @@ class EvaluationRequest(BaseModel):
                 "inference_type": "aws_bedrock",
                 "import_path": "qa_pairs_llama3-1-70b-instruct-v1:0_20241114_212837_test.json",
                 "import_type": "local",
-                "export_type":"local"
+                "export_type":"local",
+                "max_workers": 4
                 
             }
         }
diff --git a/app/services/evaluator_legacy_service.py b/app/services/evaluator_legacy_service.py
@@ -24,7 +24,7 @@ class EvaluatorLegacyService:
     def __init__(self, max_workers: int = 4):
         self.bedrock_client = get_bedrock_client()
         self.db = DatabaseManager()
-        self.max_workers = max_workers
+        self.max_workers = max_workers  # Default max workers (configurable via request)
         self.guard = ContentGuardrail()
         self._setup_logging()
 
@@ -155,7 +155,8 @@ def evaluate_topic(self, topic: str, qa_pairs: List[Dict], model_handler, reques
             failed_pairs = []
 
             try:
-                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                max_workers = request.max_workers or self.max_workers
+                with ThreadPoolExecutor(max_workers=max_workers) as executor:
                     try:
                         evaluate_func = partial(
                             self.evaluate_single_pair,
@@ -287,8 +288,9 @@ def evaluate_results(self, request: EvaluationRequest, job_name=None,is_demo: bo
                 # Add to appropriate topic list
                 transformed_data['results'][topic].append(qa_pair)
             
-            self.logger.info(f"Processing {len(transformed_data['results'])} topics with {self.max_workers} workers")
-            with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            max_workers = request.max_workers or self.max_workers
+            self.logger.info(f"Processing {len(transformed_data['results'])} topics with {max_workers} workers")
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                 future_to_topic = {
                     executor.submit(
                         self.evaluate_topic,
diff --git a/app/services/evaluator_service.py b/app/services/evaluator_service.py
@@ -21,10 +21,10 @@
 class EvaluatorService:
     """Service for evaluating freeform data rows using Claude with parallel processing (Freeform technique only)"""
     
-    def __init__(self, max_workers: int = 4):
+    def __init__(self, max_workers: int = 5):
         self.bedrock_client = get_bedrock_client()
         self.db = DatabaseManager()
-        self.max_workers = max_workers
+        self.max_workers = max_workers  # Default max workers (configurable via request)
         self.guard = ContentGuardrail()
         self._setup_logging()
 
@@ -143,7 +143,8 @@ def evaluate_rows(self, rows: List[Dict[str, Any]], model_handler, request: Eval
             failed_rows = []
 
             try:
-                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                max_workers = request.max_workers or self.max_workers
+                with ThreadPoolExecutor(max_workers=max_workers) as executor:
                     try:
                         evaluate_func = partial(
                             self.evaluate_single_row,
diff --git a/app/services/synthesis_legacy_service.py b/app/services/synthesis_legacy_service.py
@@ -36,7 +36,7 @@
 class SynthesisLegacyService:
     """Legacy service for generating synthetic QA pairs (SFT and Custom_Workflow only)"""
     QUESTIONS_PER_BATCH = 5  # Maximum questions per batch
-    MAX_CONCURRENT_TOPICS = 5  # Limit concurrent I/O operations
+    MAX_CONCURRENT_TOPICS = 5  # Default limit for concurrent I/O operations (configurable via request)
 
 
     def __init__(self):
@@ -313,7 +313,8 @@ async def generate_examples(self, request: SynthesisRequest , job_name = None, i
             
             # Create thread pool
             loop = asyncio.get_event_loop()
-            with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_TOPICS) as executor:
+            max_workers = request.max_concurrent_topics or self.MAX_CONCURRENT_TOPICS
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                 topic_futures = [
                     loop.run_in_executor(
                         executor,
diff --git a/app/services/synthesis_service.py b/app/services/synthesis_service.py
@@ -36,7 +36,7 @@
 class SynthesisService:
     """Service for generating synthetic freeform data (Freeform technique only)"""
     QUESTIONS_PER_BATCH = 5  # Maximum questions per batch
-    MAX_CONCURRENT_TOPICS = 5  # Limit concurrent I/O operations
+    MAX_CONCURRENT_TOPICS = 5  # Default limit for concurrent I/O operations (configurable via request)
 
 
     def __init__(self):
@@ -368,7 +368,8 @@ async def generate_freeform(self, request: SynthesisRequest, job_name=None, is_d
             
             # Create thread pool
             loop = asyncio.get_event_loop()
-            with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_TOPICS) as executor:
+            max_workers = request.max_concurrent_topics or self.MAX_CONCURRENT_TOPICS
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                 topic_futures = [
                     loop.run_in_executor(
                         executor,