|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +System Prompt Library JSON Format Standardization Script |
| 4 | +
|
| 5 | +This script standardizes all JSON files in the system-prompts/json/ directory |
| 6 | +to match the latest format (240925_format.json). |
| 7 | +
|
| 8 | +Usage: |
| 9 | + python standardize_json_format.py [--dry-run] [--backup] |
| 10 | +""" |
| 11 | + |
| 12 | +import json |
| 13 | +import os |
| 14 | +import sys |
| 15 | +import argparse |
| 16 | +import shutil |
| 17 | +from pathlib import Path |
| 18 | +from datetime import datetime |
| 19 | +from typing import Dict, Any, Optional |
| 20 | + |
| 21 | +# Field mapping from old format to new format |
| 22 | +FIELD_MAPPING = { |
| 23 | + # Basic info fields |
| 24 | + "agentname": "agent_name", |
| 25 | + "description": "Description", |
| 26 | + "systemprompt": "System Prompt", |
| 27 | + "chatgptlink": "ChatGPT Access URL", |
| 28 | + "creation_date": "Creation Date", |
| 29 | + |
| 30 | + # Boolean capability fields (old -> new) |
| 31 | + "is-agent": "Is Agent", |
| 32 | + "is-single-turn": "Single Turn (Workflow Type)", |
| 33 | + "structured-output-generation": "Structured Output (Workflow Type)", |
| 34 | + "image-generation": "Image Generation (Workflow Type)", |
| 35 | + "data-utility": "Data Utility (Category)", |
| 36 | + "personalised-system-prompt": "Personalised", |
| 37 | + |
| 38 | + # Schema fields |
| 39 | + "json-schema": "JSON Schema (Full)", |
| 40 | + "json-example": "JSON Schema (Example Value)", |
| 41 | +} |
| 42 | + |
| 43 | +# Complete new format template with all fields |
| 44 | +NEW_FORMAT_TEMPLATE = { |
| 45 | + "agent_name": None, |
| 46 | + "Description": None, |
| 47 | + "One Line Summary": None, |
| 48 | + "Creation Date": None, |
| 49 | + "ChatGPT Access URL": None, |
| 50 | + "Utility Estimate": 0, |
| 51 | + "Test Entry": False, |
| 52 | + "JSON Schema (Full)": None, |
| 53 | + "JSON Schema (Example Value)": None, |
| 54 | + "Better As Tool": False, |
| 55 | + "Is Agent": False, |
| 56 | + "Single Turn (Workflow Type)": False, |
| 57 | + "External Tooling (Required)": False, |
| 58 | + "Structured Output (Workflow Type)": False, |
| 59 | + "Image Generation (Workflow Type)": False, |
| 60 | + "System Prompt": None, |
| 61 | + "Character (Type)": False, |
| 62 | + "Roleplay (Behavior)": False, |
| 63 | + "Voice First": False, |
| 64 | + "Writing Assistant": False, |
| 65 | + "Data Utility (Category)": False, |
| 66 | + "N8N Link": None, |
| 67 | + "RAG (Required)": False, |
| 68 | + "Vision (Req)": False, |
| 69 | + "Spech-To-Speech": False, |
| 70 | + "Video Input (Required)": False, |
| 71 | + "Audio (Required)": False, |
| 72 | + "TTS (Required)": False, |
| 73 | + "File Input (Req)": False, |
| 74 | + "Conversational": False, |
| 75 | + "Instructional": False, |
| 76 | + "Autonomous": False, |
| 77 | + "MCPs Used": None, |
| 78 | + "API Notes": None, |
| 79 | + "MCP Notes": None, |
| 80 | + "Local LLM Friendly?": False, |
| 81 | + "Local LLM Notes": None, |
| 82 | + "LLM Selection Notes": None, |
| 83 | + "Deep Research": False, |
| 84 | + "Update/Iteration": False, |
| 85 | + "Iteration Notes": None, |
| 86 | + "Use Case Outline": None, |
| 87 | + "PII Notes": None, |
| 88 | + "Cost Estimates": None, |
| 89 | + "Localtisation Notes": None, |
| 90 | + "Guardrails Notes": None, |
| 91 | + "Gemini URL": None |
| 92 | +} |
| 93 | + |
| 94 | +def normalize_boolean_value(value: Any) -> bool: |
| 95 | + """Convert various boolean representations to actual boolean.""" |
| 96 | + if isinstance(value, bool): |
| 97 | + return value |
| 98 | + if isinstance(value, str): |
| 99 | + return value.lower() in ('true', '1', 'yes', 'on') |
| 100 | + if isinstance(value, (int, float)): |
| 101 | + return bool(value) |
| 102 | + return False |
| 103 | + |
| 104 | +def normalize_date_value(value: Any) -> Optional[str]: |
| 105 | + """Normalize date values to YYYY-MM-DD format.""" |
| 106 | + if value is None: |
| 107 | + return None |
| 108 | + |
| 109 | + if isinstance(value, str): |
| 110 | + # Handle various date formats |
| 111 | + if value.strip() == "": |
| 112 | + return None |
| 113 | + |
| 114 | + # If it's already in YYYY-MM-DD format, keep it |
| 115 | + if len(value) == 10 and value.count('-') == 2: |
| 116 | + try: |
| 117 | + datetime.strptime(value, '%Y-%m-%d') |
| 118 | + return value |
| 119 | + except ValueError: |
| 120 | + pass |
| 121 | + |
| 122 | + # Try to parse ISO format with timezone |
| 123 | + if '+' in value or 'T' in value: |
| 124 | + try: |
| 125 | + dt = datetime.fromisoformat(value.replace('Z', '+00:00')) |
| 126 | + return dt.strftime('%Y-%m-%d') |
| 127 | + except ValueError: |
| 128 | + pass |
| 129 | + |
| 130 | + return str(value) if value else None |
| 131 | + |
| 132 | +def standardize_json_file(file_path: Path, dry_run: bool = False) -> Dict[str, Any]: |
| 133 | + """ |
| 134 | + Standardize a single JSON file to the new format. |
| 135 | + |
| 136 | + Args: |
| 137 | + file_path: Path to the JSON file |
| 138 | + dry_run: If True, don't write changes, just return what would be changed |
| 139 | + |
| 140 | + Returns: |
| 141 | + Dictionary with standardization results |
| 142 | + """ |
| 143 | + result = { |
| 144 | + "file": str(file_path), |
| 145 | + "success": False, |
| 146 | + "changes_made": [], |
| 147 | + "errors": [] |
| 148 | + } |
| 149 | + |
| 150 | + try: |
| 151 | + # Read the existing file |
| 152 | + with open(file_path, 'r', encoding='utf-8') as f: |
| 153 | + old_data = json.load(f) |
| 154 | + |
| 155 | + # Start with the new format template |
| 156 | + new_data = NEW_FORMAT_TEMPLATE.copy() |
| 157 | + |
| 158 | + # Map old fields to new fields |
| 159 | + for old_field, new_field in FIELD_MAPPING.items(): |
| 160 | + if old_field in old_data: |
| 161 | + old_value = old_data[old_field] |
| 162 | + |
| 163 | + # Handle boolean fields |
| 164 | + if new_field in ["Is Agent", "Single Turn (Workflow Type)", |
| 165 | + "Structured Output (Workflow Type)", "Image Generation (Workflow Type)", |
| 166 | + "Data Utility (Category)"]: |
| 167 | + new_data[new_field] = normalize_boolean_value(old_value) |
| 168 | + if old_value != new_data[new_field]: |
| 169 | + result["changes_made"].append(f"Normalized boolean {old_field} -> {new_field}: {old_value} -> {new_data[new_field]}") |
| 170 | + |
| 171 | + # Handle date fields |
| 172 | + elif new_field == "Creation Date": |
| 173 | + new_data[new_field] = normalize_date_value(old_value) |
| 174 | + if old_value != new_data[new_field]: |
| 175 | + result["changes_made"].append(f"Normalized date {old_field} -> {new_field}: {old_value} -> {new_data[new_field]}") |
| 176 | + |
| 177 | + # Handle regular fields |
| 178 | + else: |
| 179 | + new_data[new_field] = old_value |
| 180 | + result["changes_made"].append(f"Mapped {old_field} -> {new_field}") |
| 181 | + |
| 182 | + # Handle fields that might already be in new format |
| 183 | + for field in NEW_FORMAT_TEMPLATE.keys(): |
| 184 | + if field in old_data and field not in [v for v in FIELD_MAPPING.values()]: |
| 185 | + new_data[field] = old_data[field] |
| 186 | + result["changes_made"].append(f"Preserved existing field: {field}") |
| 187 | + |
| 188 | + # Special handling for fields that might need boolean normalization |
| 189 | + boolean_fields = [ |
| 190 | + "Test Entry", "Better As Tool", "Is Agent", "Single Turn (Workflow Type)", |
| 191 | + "External Tooling (Required)", "Structured Output (Workflow Type)", |
| 192 | + "Image Generation (Workflow Type)", "Character (Type)", "Roleplay (Behavior)", |
| 193 | + "Voice First", "Writing Assistant", "Data Utility (Category)", |
| 194 | + "RAG (Required)", "Vision (Req)", "Spech-To-Speech", "Video Input (Required)", |
| 195 | + "Audio (Required)", "TTS (Required)", "File Input (Req)", "Conversational", |
| 196 | + "Instructional", "Autonomous", "Local LLM Friendly?", "Deep Research", |
| 197 | + "Update/Iteration" |
| 198 | + ] |
| 199 | + |
| 200 | + for field in boolean_fields: |
| 201 | + if field in new_data: |
| 202 | + old_value = new_data[field] |
| 203 | + new_data[field] = normalize_boolean_value(old_value) |
| 204 | + if old_value != new_data[field]: |
| 205 | + result["changes_made"].append(f"Normalized boolean {field}: {old_value} -> {new_data[field]}") |
| 206 | + |
| 207 | + # Check if any changes were made |
| 208 | + if not result["changes_made"]: |
| 209 | + result["changes_made"].append("File already in correct format") |
| 210 | + |
| 211 | + # Write the standardized file |
| 212 | + if not dry_run: |
| 213 | + with open(file_path, 'w', encoding='utf-8') as f: |
| 214 | + json.dump(new_data, f, indent=2, ensure_ascii=False) |
| 215 | + |
| 216 | + result["success"] = True |
| 217 | + |
| 218 | + except json.JSONDecodeError as e: |
| 219 | + result["errors"].append(f"JSON decode error: {e}") |
| 220 | + except Exception as e: |
| 221 | + result["errors"].append(f"Unexpected error: {e}") |
| 222 | + |
| 223 | + return result |
| 224 | + |
| 225 | +def main(): |
| 226 | + parser = argparse.ArgumentParser(description="Standardize System Prompt Library JSON format") |
| 227 | + parser.add_argument("--dry-run", action="store_true", |
| 228 | + help="Show what would be changed without making changes") |
| 229 | + parser.add_argument("--backup", action="store_true", |
| 230 | + help="Create backup of original files before standardization") |
| 231 | + parser.add_argument("--json-dir", type=str, |
| 232 | + default="../system-prompts/json", |
| 233 | + help="Directory containing JSON files to standardize") |
| 234 | + |
| 235 | + args = parser.parse_args() |
| 236 | + |
| 237 | + # Get the script directory and construct paths |
| 238 | + script_dir = Path(__file__).parent |
| 239 | + json_dir = script_dir / args.json_dir |
| 240 | + |
| 241 | + if not json_dir.exists(): |
| 242 | + print(f"Error: JSON directory not found: {json_dir}") |
| 243 | + sys.exit(1) |
| 244 | + |
| 245 | + # Find all JSON files |
| 246 | + json_files = list(json_dir.glob("*.json")) |
| 247 | + if not json_files: |
| 248 | + print(f"No JSON files found in {json_dir}") |
| 249 | + sys.exit(1) |
| 250 | + |
| 251 | + print(f"Found {len(json_files)} JSON files to process") |
| 252 | + |
| 253 | + if args.dry_run: |
| 254 | + print("DRY RUN MODE - No files will be modified") |
| 255 | + |
| 256 | + if args.backup and not args.dry_run: |
| 257 | + backup_dir = json_dir / f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}" |
| 258 | + backup_dir.mkdir(exist_ok=True) |
| 259 | + print(f"Creating backup in: {backup_dir}") |
| 260 | + |
| 261 | + # Process each file |
| 262 | + total_processed = 0 |
| 263 | + total_errors = 0 |
| 264 | + total_changes = 0 |
| 265 | + |
| 266 | + for json_file in sorted(json_files): |
| 267 | + print(f"\nProcessing: {json_file.name}") |
| 268 | + |
| 269 | + # Create backup if requested |
| 270 | + if args.backup and not args.dry_run: |
| 271 | + backup_file = backup_dir / json_file.name |
| 272 | + shutil.copy2(json_file, backup_file) |
| 273 | + |
| 274 | + # Standardize the file |
| 275 | + result = standardize_json_file(json_file, dry_run=args.dry_run) |
| 276 | + |
| 277 | + if result["success"]: |
| 278 | + total_processed += 1 |
| 279 | + if len(result["changes_made"]) > 1 or result["changes_made"][0] != "File already in correct format": |
| 280 | + total_changes += 1 |
| 281 | + print(f" ✓ Changes made:") |
| 282 | + for change in result["changes_made"]: |
| 283 | + print(f" - {change}") |
| 284 | + else: |
| 285 | + print(f" ✓ Already in correct format") |
| 286 | + else: |
| 287 | + total_errors += 1 |
| 288 | + print(f" ✗ Errors:") |
| 289 | + for error in result["errors"]: |
| 290 | + print(f" - {error}") |
| 291 | + |
| 292 | + # Summary |
| 293 | + print(f"\n{'='*60}") |
| 294 | + print(f"STANDARDIZATION SUMMARY") |
| 295 | + print(f"{'='*60}") |
| 296 | + print(f"Total files processed: {total_processed}") |
| 297 | + print(f"Files with changes: {total_changes}") |
| 298 | + print(f"Files with errors: {total_errors}") |
| 299 | + |
| 300 | + if args.dry_run: |
| 301 | + print(f"\nThis was a dry run. To apply changes, run without --dry-run flag.") |
| 302 | + elif args.backup: |
| 303 | + print(f"\nBackups created in: {backup_dir}") |
| 304 | + |
| 305 | + if total_errors > 0: |
| 306 | + sys.exit(1) |
| 307 | + |
| 308 | +if __name__ == "__main__": |
| 309 | + main() |
0 commit comments