@@ -1151,6 +1151,116 @@ def _handle_generate_data(self, user_input: str):
11511151 ]
11521152 )
11531153
1154+ def _generate_synthetic_examples (
1155+ self , task_description : str , num_examples : int
1156+ ) -> list [dict [str , Any ]]:
1157+ """Generate synthetic training examples using LLM.
1158+
1159+ Args:
1160+ task_description: Description of the task
1161+ num_examples: Number of examples to generate
1162+
1163+ Returns:
1164+ List of example dictionaries with 'input' and 'output' keys
1165+ """
1166+ if not self .llm_connector or not self .llm_connector .current_model :
1167+ logger .warning ("No LLM connected for data generation" )
1168+ return []
1169+
1170+ try :
1171+ # Build prompt for data generation
1172+ prompt = (
1173+ f"Generate { num_examples } diverse, realistic training examples "
1174+ f"for: { task_description } \n \n "
1175+ "Requirements:\n "
1176+ "1. Create varied, realistic examples that cover different scenarios\n "
1177+ "2. Include edge cases and challenging examples\n "
1178+ "3. Make inputs natural and outputs accurate\n "
1179+ "4. Ensure diversity in topics, length, and complexity\n "
1180+ "5. Format as JSON array with 'input' and 'output' keys\n \n "
1181+ "Example format:\n "
1182+ '[{"input": "example input text", "output": "expected output"}]\n \n '
1183+ f"Task: { task_description } \n "
1184+ f"Number of examples: { num_examples } \n \n "
1185+ "Generate ONLY the JSON array, no explanations:"
1186+ )
1187+
1188+ # Generate with LLM
1189+ response = self .llm_connector .generate_response (
1190+ prompt = prompt ,
1191+ system_prompt = (
1192+ "You are a data generation expert. "
1193+ "Generate high-quality, diverse training examples in JSON format."
1194+ ),
1195+ context = {},
1196+ )
1197+
1198+ # Extract JSON from response
1199+ examples = self ._extract_json_from_response (response )
1200+
1201+ if examples and isinstance (examples , list ):
1202+ logger .info (f"Generated { len (examples )} examples" )
1203+ return examples
1204+ else :
1205+ logger .warning ("Failed to parse generated examples" )
1206+ return []
1207+
1208+ except Exception as e :
1209+ logger .error (f"Data generation failed: { e } " )
1210+ return []
1211+
1212+ def _extract_json_from_response (self , response : str ) -> list [dict [str , Any ]] | None :
1213+ """Extract JSON array from LLM response."""
1214+ import json
1215+ import re
1216+
1217+ try :
1218+ # Try to find JSON array in response
1219+ json_match = re .search (r"\[[\s\S]*\]" , response )
1220+ if json_match :
1221+ json_str = json_match .group (0 )
1222+ return json .loads (json_str )
1223+
1224+ # Try parsing entire response
1225+ return json .loads (response )
1226+
1227+ except json .JSONDecodeError :
1228+ logger .warning ("Failed to parse JSON from response" )
1229+ return None
1230+
1231+ def _show_example_samples (self , examples : list [dict [str , Any ]], task_description : str = None ):
1232+ """Display a sample of generated examples."""
1233+ from rich .table import Table
1234+
1235+ console .print ()
1236+ console .print (f"[bold cyan]Generated Examples for: { task_description } [/bold cyan]" )
1237+ console .print ()
1238+
1239+ # Show first 5 examples in a table
1240+ table = Table (show_header = True , header_style = "bold magenta" )
1241+ table .add_column ("Input" , style = "cyan" , width = 50 )
1242+ table .add_column ("Output" , style = "green" , width = 30 )
1243+
1244+ for i , example in enumerate (examples [:5 ]):
1245+ input_text = str (example .get ("input" , "" ))
1246+ output_text = str (example .get ("output" , "" ))
1247+
1248+ # Truncate if too long
1249+ if len (input_text ) > 100 :
1250+ input_text = input_text [:97 ] + "..."
1251+ if len (output_text ) > 50 :
1252+ output_text = output_text [:47 ] + "..."
1253+
1254+ table .add_row (input_text , output_text )
1255+
1256+ console .print (table )
1257+
1258+ if len (examples ) > 5 :
1259+ console .print ()
1260+ console .print (f"[dim]... and { len (examples ) - 5 } more examples[/dim]" )
1261+
1262+ console .print ()
1263+
11541264 def _handle_optimize (self , user_input : str ):
11551265 """Handle optimization request via natural language - routes to /optimize command."""
11561266 # Extract arguments from natural language
@@ -1389,11 +1499,6 @@ def _handle_explain(self, user_input: str):
13891499 "twostepadapter" : "TwoStepAdapter" ,
13901500 "two-step adapter" : "TwoStepAdapter" ,
13911501 "two step" : "TwoStepAdapter" ,
1392- # Concepts
1393- "signature" : "signature" ,
1394- "signatures" : "signature" ,
1395- "module" : "module" ,
1396- "modules" : "module" ,
13971502 "rag" : "rag" ,
13981503 "retrieval" : "rag" ,
13991504 "retrieval augmented generation" : "rag" ,
@@ -3154,18 +3259,18 @@ def _show_welcome_screen(console, context, config_manager):
31543259 # Create beautiful gradient colors for the ASCII art (purple → pink → orange)
31553260 # Using exact RGB colors matching the SVG gradient
31563261 gradient_colors = [
3157- (217 , 70 , 239 ), # #d946ef - Deep purple - "DSPY" starts here
3158- (217 , 70 , 239 ), # #d946ef - Purple
3159- (192 , 38 , 211 ), # #c026d3 - Purple
3160- (168 , 85 , 247 ), # #a855f7 - Bright purple
3161- (168 , 85 , 247 ), # #a855f7 - Purple transitioning to pink
3162- (236 , 72 , 153 ), # #ec4899 - Pink
3163- (236 , 72 , 153 ), # #ec4899 - Bright pink - middle section
3164- (244 , 63 , 94 ), # #f43f5e - Pink
3165- (244 , 63 , 94 ), # #f43f5e - Pink transitioning to orange
3166- (251 , 146 , 60 ), # #fb923c - Orange - "CODE" section
3167- (251 , 146 , 60 ), # #fb923c - Bright orange
3168- (251 , 146 , 60 ), # #fb923c - Orange end
3262+ (217 , 70 , 239 ), # #d946ef - Deep purple - "DSPY" starts here
3263+ (217 , 70 , 239 ), # #d946ef - Purple
3264+ (192 , 38 , 211 ), # #c026d3 - Purple
3265+ (168 , 85 , 247 ), # #a855f7 - Bright purple
3266+ (168 , 85 , 247 ), # #a855f7 - Purple transitioning to pink
3267+ (236 , 72 , 153 ), # #ec4899 - Pink
3268+ (236 , 72 , 153 ), # #ec4899 - Bright pink - middle section
3269+ (244 , 63 , 94 ), # #f43f5e - Pink
3270+ (244 , 63 , 94 ), # #f43f5e - Pink transitioning to orange
3271+ (251 , 146 , 60 ), # #fb923c - Orange - "CODE" section
3272+ (251 , 146 , 60 ), # #fb923c - Bright orange
3273+ (251 , 146 , 60 ), # #fb923c - Orange end
31693274 ]
31703275
31713276 # Show ASCII art with gradient in a panel
@@ -3251,161 +3356,3 @@ def execute(verbose: bool = False, debug: bool = False):
32513356 except Exception as e :
32523357 logger .error (f"Failed to start interactive mode: { e } " )
32533358 raise DSPyCLIError (f"Failed to start interactive mode: { e } " )
3254- import re
3255-
3256- # Extract number of examples
3257- num_match = re .search (
3258- r"(\d+)\s*(?:examples?|samples?|data points?)" , user_input , re .IGNORECASE
3259- )
3260- num_examples = int (num_match .group (1 )) if num_match else 20 # Default to 20
3261-
3262- # Limit to reasonable range
3263- num_examples = max (5 , min (num_examples , 100 ))
3264-
3265- # Extract task description
3266- task_keywords = ["for" , "about" , "on" , "regarding" ]
3267- task_description = None
3268-
3269- for keyword in task_keywords :
3270- if keyword in user_input .lower ():
3271- parts = user_input .lower ().split (keyword , 1 )
3272- if len (parts ) > 1 :
3273- task_description = parts [1 ].strip ()
3274- # Clean up
3275- task_description = re .sub (
3276- r"\d+\s*(?:examples?|samples?|data points?)" , "" , task_description
3277- ).strip ()
3278- break
3279-
3280- # If no task found, try to extract from common patterns
3281- if not task_description :
3282- # Look for task types
3283- task_types = {
3284- "sentiment" : "sentiment analysis" ,
3285- "classification" : "text classification" ,
3286- "question" : "question answering" ,
3287- "summarization" : "text summarization" ,
3288- "translation" : "translation" ,
3289- "email" : "email classification" ,
3290- "ner" : "named entity recognition" ,
3291- "qa" : "question answering" ,
3292- }
3293-
3294- for key , value in task_types .items ():
3295- if key in user_input .lower ():
3296- task_description = value
3297- break
3298-
3299- return task_description , num_examples
3300-
3301- def _generate_synthetic_examples (
3302- self , task_description : str , num_examples : int
3303- ) -> list [dict [str , Any ]]:
3304- """Generate synthetic training examples using LLM.
3305-
3306- Args:
3307- task_description: Description of the task
3308- num_examples: Number of examples to generate
3309-
3310- Returns:
3311- List of example dictionaries with 'input' and 'output' keys
3312- """
3313- if not self .llm_connector or not self .llm_connector .current_model :
3314- logger .warning ("No LLM connected for data generation" )
3315- return []
3316-
3317- try :
3318- # Build prompt for data generation
3319- prompt = f"""Generate { num_examples } diverse, realistic training examples for: { task_description }
3320-
3321- Requirements:
3322- 1. Create varied, realistic examples that cover different scenarios
3323- 2. Include edge cases and challenging examples
3324- 3. Make inputs natural and outputs accurate
3325- 4. Ensure diversity in topics, length, and complexity
3326- 5. Format as JSON array with 'input' and 'output' keys
3327-
3328- Example format:
3329- [
3330- {{"input": "example input text", "output": "expected output"}},
3331- {{"input": "another input", "output": "another output"}},
3332- ...
3333- ]
3334-
3335- Task: { task_description }
3336- Number of examples: { num_examples }
3337-
3338- Generate ONLY the JSON array, no explanations:"""
3339-
3340- # Generate with LLM
3341- response = self .llm_connector .generate_response (
3342- prompt = prompt ,
3343- system_prompt = "You are a data generation expert. Generate high-quality, diverse training examples in JSON format." ,
3344- context = {},
3345- )
3346-
3347- # Extract JSON from response
3348- examples = self ._extract_json_from_response (response )
3349-
3350- if examples and isinstance (examples , list ):
3351- logger .info (f"Generated { len (examples )} examples" )
3352- return examples
3353- else :
3354- logger .warning ("Failed to parse generated examples" )
3355- return []
3356-
3357- except Exception as e :
3358- logger .error (f"Data generation failed: { e } " )
3359- return []
3360-
3361- def _extract_json_from_response (self , response : str ) -> list [dict [str , Any ]] | None :
3362- """Extract JSON array from LLM response."""
3363- import json
3364- import re
3365-
3366- try :
3367- # Try to find JSON array in response
3368- json_match = re .search (r"\[[\s\S]*\]" , response )
3369- if json_match :
3370- json_str = json_match .group (0 )
3371- return json .loads (json_str )
3372-
3373- # Try parsing entire response
3374- return json .loads (response )
3375-
3376- except json .JSONDecodeError :
3377- logger .warning ("Failed to parse JSON from response" )
3378- return None
3379-
3380- def _show_example_samples (self , examples : list [dict [str , Any ]], task_description : str = None ):
3381- """Display a sample of generated examples."""
3382- from rich .table import Table
3383-
3384- console .print ()
3385- console .print (f"[bold cyan]Generated Examples for: { task_description } [/bold cyan]" )
3386- console .print ()
3387-
3388- # Show first 5 examples in a table
3389- table = Table (show_header = True , header_style = "bold magenta" )
3390- table .add_column ("Input" , style = "cyan" , width = 50 )
3391- table .add_column ("Output" , style = "green" , width = 30 )
3392-
3393- for i , example in enumerate (examples [:5 ]):
3394- input_text = str (example .get ("input" , "" ))
3395- output_text = str (example .get ("output" , "" ))
3396-
3397- # Truncate if too long
3398- if len (input_text ) > 100 :
3399- input_text = input_text [:97 ] + "..."
3400- if len (output_text ) > 50 :
3401- output_text = output_text [:47 ] + "..."
3402-
3403- table .add_row (input_text , output_text )
3404-
3405- console .print (table )
3406-
3407- if len (examples ) > 5 :
3408- console .print ()
3409- console .print (f"[dim]... and { len (examples ) - 5 } more examples[/dim]" )
3410-
3411- console .print ()
0 commit comments