Skip to content

Commit dad3366

Browse files
commit
1 parent 1c07237 commit dad3366

File tree

2 files changed

+309
-0
lines changed

2 files changed

+309
-0
lines changed
File renamed without changes.

scripts/standardize_json_format.py

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
#!/usr/bin/env python3
2+
"""
3+
System Prompt Library JSON Format Standardization Script
4+
5+
This script standardizes all JSON files in the system-prompts/json/ directory
6+
to match the latest format (240925_format.json).
7+
8+
Usage:
9+
python standardize_json_format.py [--dry-run] [--backup]
10+
"""
11+
12+
import json
13+
import os
14+
import sys
15+
import argparse
16+
import shutil
17+
from pathlib import Path
18+
from datetime import datetime
19+
from typing import Dict, Any, Optional
20+
21+
# Field mapping from old format to new format
22+
FIELD_MAPPING = {
23+
# Basic info fields
24+
"agentname": "agent_name",
25+
"description": "Description",
26+
"systemprompt": "System Prompt",
27+
"chatgptlink": "ChatGPT Access URL",
28+
"creation_date": "Creation Date",
29+
30+
# Boolean capability fields (old -> new)
31+
"is-agent": "Is Agent",
32+
"is-single-turn": "Single Turn (Workflow Type)",
33+
"structured-output-generation": "Structured Output (Workflow Type)",
34+
"image-generation": "Image Generation (Workflow Type)",
35+
"data-utility": "Data Utility (Category)",
36+
"personalised-system-prompt": "Personalised",
37+
38+
# Schema fields
39+
"json-schema": "JSON Schema (Full)",
40+
"json-example": "JSON Schema (Example Value)",
41+
}
42+
43+
# Complete new format template with all fields
44+
NEW_FORMAT_TEMPLATE = {
45+
"agent_name": None,
46+
"Description": None,
47+
"One Line Summary": None,
48+
"Creation Date": None,
49+
"ChatGPT Access URL": None,
50+
"Utility Estimate": 0,
51+
"Test Entry": False,
52+
"JSON Schema (Full)": None,
53+
"JSON Schema (Example Value)": None,
54+
"Better As Tool": False,
55+
"Is Agent": False,
56+
"Single Turn (Workflow Type)": False,
57+
"External Tooling (Required)": False,
58+
"Structured Output (Workflow Type)": False,
59+
"Image Generation (Workflow Type)": False,
60+
"System Prompt": None,
61+
"Character (Type)": False,
62+
"Roleplay (Behavior)": False,
63+
"Voice First": False,
64+
"Writing Assistant": False,
65+
"Data Utility (Category)": False,
66+
"N8N Link": None,
67+
"RAG (Required)": False,
68+
"Vision (Req)": False,
69+
"Spech-To-Speech": False,
70+
"Video Input (Required)": False,
71+
"Audio (Required)": False,
72+
"TTS (Required)": False,
73+
"File Input (Req)": False,
74+
"Conversational": False,
75+
"Instructional": False,
76+
"Autonomous": False,
77+
"MCPs Used": None,
78+
"API Notes": None,
79+
"MCP Notes": None,
80+
"Local LLM Friendly?": False,
81+
"Local LLM Notes": None,
82+
"LLM Selection Notes": None,
83+
"Deep Research": False,
84+
"Update/Iteration": False,
85+
"Iteration Notes": None,
86+
"Use Case Outline": None,
87+
"PII Notes": None,
88+
"Cost Estimates": None,
89+
"Localtisation Notes": None,
90+
"Guardrails Notes": None,
91+
"Gemini URL": None
92+
}
93+
94+
def normalize_boolean_value(value: Any) -> bool:
95+
"""Convert various boolean representations to actual boolean."""
96+
if isinstance(value, bool):
97+
return value
98+
if isinstance(value, str):
99+
return value.lower() in ('true', '1', 'yes', 'on')
100+
if isinstance(value, (int, float)):
101+
return bool(value)
102+
return False
103+
104+
def normalize_date_value(value: Any) -> Optional[str]:
105+
"""Normalize date values to YYYY-MM-DD format."""
106+
if value is None:
107+
return None
108+
109+
if isinstance(value, str):
110+
# Handle various date formats
111+
if value.strip() == "":
112+
return None
113+
114+
# If it's already in YYYY-MM-DD format, keep it
115+
if len(value) == 10 and value.count('-') == 2:
116+
try:
117+
datetime.strptime(value, '%Y-%m-%d')
118+
return value
119+
except ValueError:
120+
pass
121+
122+
# Try to parse ISO format with timezone
123+
if '+' in value or 'T' in value:
124+
try:
125+
dt = datetime.fromisoformat(value.replace('Z', '+00:00'))
126+
return dt.strftime('%Y-%m-%d')
127+
except ValueError:
128+
pass
129+
130+
return str(value) if value else None
131+
132+
def standardize_json_file(file_path: Path, dry_run: bool = False) -> Dict[str, Any]:
133+
"""
134+
Standardize a single JSON file to the new format.
135+
136+
Args:
137+
file_path: Path to the JSON file
138+
dry_run: If True, don't write changes, just return what would be changed
139+
140+
Returns:
141+
Dictionary with standardization results
142+
"""
143+
result = {
144+
"file": str(file_path),
145+
"success": False,
146+
"changes_made": [],
147+
"errors": []
148+
}
149+
150+
try:
151+
# Read the existing file
152+
with open(file_path, 'r', encoding='utf-8') as f:
153+
old_data = json.load(f)
154+
155+
# Start with the new format template
156+
new_data = NEW_FORMAT_TEMPLATE.copy()
157+
158+
# Map old fields to new fields
159+
for old_field, new_field in FIELD_MAPPING.items():
160+
if old_field in old_data:
161+
old_value = old_data[old_field]
162+
163+
# Handle boolean fields
164+
if new_field in ["Is Agent", "Single Turn (Workflow Type)",
165+
"Structured Output (Workflow Type)", "Image Generation (Workflow Type)",
166+
"Data Utility (Category)"]:
167+
new_data[new_field] = normalize_boolean_value(old_value)
168+
if old_value != new_data[new_field]:
169+
result["changes_made"].append(f"Normalized boolean {old_field} -> {new_field}: {old_value} -> {new_data[new_field]}")
170+
171+
# Handle date fields
172+
elif new_field == "Creation Date":
173+
new_data[new_field] = normalize_date_value(old_value)
174+
if old_value != new_data[new_field]:
175+
result["changes_made"].append(f"Normalized date {old_field} -> {new_field}: {old_value} -> {new_data[new_field]}")
176+
177+
# Handle regular fields
178+
else:
179+
new_data[new_field] = old_value
180+
result["changes_made"].append(f"Mapped {old_field} -> {new_field}")
181+
182+
# Handle fields that might already be in new format
183+
for field in NEW_FORMAT_TEMPLATE.keys():
184+
if field in old_data and field not in [v for v in FIELD_MAPPING.values()]:
185+
new_data[field] = old_data[field]
186+
result["changes_made"].append(f"Preserved existing field: {field}")
187+
188+
# Special handling for fields that might need boolean normalization
189+
boolean_fields = [
190+
"Test Entry", "Better As Tool", "Is Agent", "Single Turn (Workflow Type)",
191+
"External Tooling (Required)", "Structured Output (Workflow Type)",
192+
"Image Generation (Workflow Type)", "Character (Type)", "Roleplay (Behavior)",
193+
"Voice First", "Writing Assistant", "Data Utility (Category)",
194+
"RAG (Required)", "Vision (Req)", "Spech-To-Speech", "Video Input (Required)",
195+
"Audio (Required)", "TTS (Required)", "File Input (Req)", "Conversational",
196+
"Instructional", "Autonomous", "Local LLM Friendly?", "Deep Research",
197+
"Update/Iteration"
198+
]
199+
200+
for field in boolean_fields:
201+
if field in new_data:
202+
old_value = new_data[field]
203+
new_data[field] = normalize_boolean_value(old_value)
204+
if old_value != new_data[field]:
205+
result["changes_made"].append(f"Normalized boolean {field}: {old_value} -> {new_data[field]}")
206+
207+
# Check if any changes were made
208+
if not result["changes_made"]:
209+
result["changes_made"].append("File already in correct format")
210+
211+
# Write the standardized file
212+
if not dry_run:
213+
with open(file_path, 'w', encoding='utf-8') as f:
214+
json.dump(new_data, f, indent=2, ensure_ascii=False)
215+
216+
result["success"] = True
217+
218+
except json.JSONDecodeError as e:
219+
result["errors"].append(f"JSON decode error: {e}")
220+
except Exception as e:
221+
result["errors"].append(f"Unexpected error: {e}")
222+
223+
return result
224+
225+
def main():
226+
parser = argparse.ArgumentParser(description="Standardize System Prompt Library JSON format")
227+
parser.add_argument("--dry-run", action="store_true",
228+
help="Show what would be changed without making changes")
229+
parser.add_argument("--backup", action="store_true",
230+
help="Create backup of original files before standardization")
231+
parser.add_argument("--json-dir", type=str,
232+
default="../system-prompts/json",
233+
help="Directory containing JSON files to standardize")
234+
235+
args = parser.parse_args()
236+
237+
# Get the script directory and construct paths
238+
script_dir = Path(__file__).parent
239+
json_dir = script_dir / args.json_dir
240+
241+
if not json_dir.exists():
242+
print(f"Error: JSON directory not found: {json_dir}")
243+
sys.exit(1)
244+
245+
# Find all JSON files
246+
json_files = list(json_dir.glob("*.json"))
247+
if not json_files:
248+
print(f"No JSON files found in {json_dir}")
249+
sys.exit(1)
250+
251+
print(f"Found {len(json_files)} JSON files to process")
252+
253+
if args.dry_run:
254+
print("DRY RUN MODE - No files will be modified")
255+
256+
if args.backup and not args.dry_run:
257+
backup_dir = json_dir / f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
258+
backup_dir.mkdir(exist_ok=True)
259+
print(f"Creating backup in: {backup_dir}")
260+
261+
# Process each file
262+
total_processed = 0
263+
total_errors = 0
264+
total_changes = 0
265+
266+
for json_file in sorted(json_files):
267+
print(f"\nProcessing: {json_file.name}")
268+
269+
# Create backup if requested
270+
if args.backup and not args.dry_run:
271+
backup_file = backup_dir / json_file.name
272+
shutil.copy2(json_file, backup_file)
273+
274+
# Standardize the file
275+
result = standardize_json_file(json_file, dry_run=args.dry_run)
276+
277+
if result["success"]:
278+
total_processed += 1
279+
if len(result["changes_made"]) > 1 or result["changes_made"][0] != "File already in correct format":
280+
total_changes += 1
281+
print(f" ✓ Changes made:")
282+
for change in result["changes_made"]:
283+
print(f" - {change}")
284+
else:
285+
print(f" ✓ Already in correct format")
286+
else:
287+
total_errors += 1
288+
print(f" ✗ Errors:")
289+
for error in result["errors"]:
290+
print(f" - {error}")
291+
292+
# Summary
293+
print(f"\n{'='*60}")
294+
print(f"STANDARDIZATION SUMMARY")
295+
print(f"{'='*60}")
296+
print(f"Total files processed: {total_processed}")
297+
print(f"Files with changes: {total_changes}")
298+
print(f"Files with errors: {total_errors}")
299+
300+
if args.dry_run:
301+
print(f"\nThis was a dry run. To apply changes, run without --dry-run flag.")
302+
elif args.backup:
303+
print(f"\nBackups created in: {backup_dir}")
304+
305+
if total_errors > 0:
306+
sys.exit(1)
307+
308+
if __name__ == "__main__":
309+
main()

0 commit comments

Comments
 (0)