Skip to content

Commit 3f23c52

Browse files
CLDR-18745 cldr_dynamic_prompter.py (#4989)
1 parent e7bc294 commit 3f23c52

File tree

1 file changed

+111
-29
lines changed

1 file changed

+111
-29
lines changed

tools/scripts/llm/ticket_valdator/cldr_dynamic_prompter.py

Lines changed: 111 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@
22
"""
33
cldr_dynamic_prompter.py
44
5-
What it does: prints a short, ticket-specific LLM prompt for a CLDR JIRA ticket.
6-
Usage:
7-
python cldr_dynamic_prompter.py CLDR-1234
8-
python cldr_dynamic_prompter.py CLDR-1234 --category "Software Bug"
9-
python cldr_dynamic_prompter.py CLDR-1234 --auto-category
5+
Print a short, ticket-specific LLM prompt for a CLDR JIRA ticket.
6+
Now supports --template to load the prompt from a separate file.
107
"""
118

129
import re
1310
import sys
1411
import argparse
12+
from pathlib import Path
1513
from typing import Optional, Dict, List
1614

1715
from jira import JIRA
@@ -21,24 +19,25 @@
2119
# CONFIG: paste credentials
2220
# =========================
2321
JIRA_SERVER = "https://unicode-org.atlassian.net"
24-
JIRA_USER_EMAIL = "MAIL" # <-- Replace with your email
25-
JIRA_API_TOKEN = "API OF JIRA" # <-- Replace with your API token
26-
22+
JIRA_USER_EMAIL = "YOUR MAIL !!!" # <-- your email
23+
JIRA_API_TOKEN = "JIRA API KEY" # <-- your API token
2724

2825
# optional; only needed if you use --auto-category
29-
OPENAI_API_KEY = "OpenAI API KEY!!"
30-
OPENAI_MODEL = "gpt-4o-mini"
26+
OPENAI_API_KEY = "OPEN AI API KEY" # keep empty if not using auto-category
27+
OPENAI_MODEL = "gpt-4o-mini"
3128

3229
CATEGORIES = ("Data Accuracy", "Documentation Issue", "Software Bug", "Feature Request")
3330

31+
# Default template path (relative to this file)
32+
DEFAULT_TEMPLATE = Path(__file__).parent / "templates" / "phase1_prompt.md"
33+
3434
# -----------------------------
3535
# Jira + parsing helpers
3636
# -----------------------------
3737

3838
def get_jira_client_from_config() -> JIRA:
3939
if not JIRA_USER_EMAIL or not JIRA_API_TOKEN:
40-
print("Error: fill JIRA_USER_EMAIL and JIRA_API_TOKEN in cldr_dynamic_prompter.py", file=sys.stderr)
41-
sys.exit(2)
40+
raise RuntimeError("JIRA creds missing: set JIRA_USER_EMAIL and JIRA_API_TOKEN in cldr_dynamic_prompter.py")
4241
return JIRA(server=JIRA_SERVER, basic_auth=(JIRA_USER_EMAIL, JIRA_API_TOKEN))
4342

4443
def _get_match(pattern: str, text: str, flags=0, default: str = "") -> str:
@@ -48,21 +47,29 @@ def _get_match(pattern: str, text: str, flags=0, default: str = "") -> str:
4847
def parse_report_text(report_text: str) -> Optional[Dict[str, str]]:
4948
"""Extract fields from cldr_ticket_reader.py's report string."""
5049
try:
51-
title = _get_match(r"^Title:\s*(.*)$", report_text, re.MULTILINE)
52-
reporter = _get_match(r"^Reporter:\s*(.*)$", report_text, re.MULTILINE, "N/A")
53-
priority = _get_match(r"^Priority:\s*(.*)$", report_text, re.MULTILINE, "N/A")
54-
components = _get_match(r"^Components:\s*(.*)$", report_text, re.MULTILINE, "None")
55-
labels = _get_match(r"^Labels:\s*(.*)$", report_text, re.MULTILINE, "None")
56-
desc = _get_match(r"Description:\s*\n---\n(.*?)\n---", report_text, re.DOTALL, "")
50+
title = _get_match(r"^Title:\s*(.*)$", report_text, re.MULTILINE)
51+
reporter = _get_match(r"^Reporter:\s*(.*)$", report_text, re.MULTILINE, "N/A")
52+
priority = _get_match(r"^Priority:\s*(.*)$", report_text, re.MULTILINE, "N/A")
53+
components = _get_match(r"^Components:\s*(.*)$", report_text, re.MULTILINE, "None")
54+
labels = _get_match(r"^Labels:\s*(.*)$", report_text, re.MULTILINE, "None")
55+
desc = _get_match(r"Description:\s*\n---\n(.*?)\n---", report_text, re.DOTALL, "")
5756
code_blocks = re.findall(r"\{code.*?\}(.*?)\{code\}", desc, re.DOTALL)
58-
has_code = bool(code_blocks)
57+
has_code = bool(code_blocks)
58+
links_block = _get_match(r"Connected Work Items:\n(.*?)\n\nDescription:", report_text, re.DOTALL, "")
59+
connected_items = []
60+
if links_block:
61+
for ln in links_block.splitlines():
62+
t = ln.strip().lstrip("-• ").strip()
63+
if t:
64+
connected_items.append(t)
5965
return {
6066
"title": title,
6167
"description": desc,
6268
"reporter": reporter,
6369
"priority": priority,
6470
"components": components,
6571
"labels": labels,
72+
"connected_items": connected_items,
6673
"has_code": has_code,
6774
}
6875
except Exception:
@@ -75,7 +82,7 @@ def fetch_ticket_data(ticket_key: str, jira_client: JIRA) -> Dict[str, str]:
7582
if data:
7683
return data
7784

78-
# fallback
85+
# fallback (rare)
7986
issue = jira_client.issue(ticket_key)
8087
title = getattr(issue.fields, "summary", "") or ""
8188
description = getattr(issue.fields, "description", "") or ""
@@ -91,6 +98,7 @@ def fetch_ticket_data(ticket_key: str, jira_client: JIRA) -> Dict[str, str]:
9198
"priority": priority,
9299
"components": components,
93100
"labels": labels,
101+
"connected_items": [],
94102
"has_code": has_code,
95103
}
96104

@@ -118,7 +126,7 @@ def auto_pick_category(title: str, description: str) -> str:
118126
cat = (resp.choices[0].message.content or "").strip().strip('"')
119127
return cat if cat in CATEGORIES else "Triage"
120128
except Exception as e:
121-
print(f"Auto-category failed, falling back to 'Triage': {e}", file=sys.stderr)
129+
print(f"[warn] Auto-category failed; using 'Triage': {e}", file=sys.stderr)
122130
return "Triage"
123131

124132
# -----------------------------
@@ -132,9 +140,9 @@ def _norm_list_field(s: str) -> List[str]:
132140

133141
def detect_topic(data: Dict[str, str]) -> str:
134142
title = (data.get("title") or "").lower()
135-
desc = (data.get("description") or "").lower()
143+
desc = (data.get("description") or "").lower()
136144
components = _norm_list_field(data.get("components"))
137-
labels = _norm_list_field(data.get("labels"))
145+
labels = _norm_list_field(data.get("labels"))
138146
text = " ".join([title, desc] + components + labels)
139147

140148
if any(k in text for k in ["intervalformatfallback", "datetime", "date time", "date-time", "skeleton", "pattern", "quotes", "apostrophe"]):
@@ -154,7 +162,7 @@ def detect_topic(data: Dict[str, str]) -> str:
154162
return "General/Locale Data"
155163

156164
# -----------------------------
157-
# Prompt builders
165+
# Built-in prompt builders (fallback if no template)
158166
# -----------------------------
159167

160168
def build_triage_prompt(data: Dict[str, str]) -> str:
@@ -266,16 +274,90 @@ def build_topic_prompt(category: str, topic: str, data: Dict[str, str]) -> str:
266274
}
267275
return base + topic_asks.get(topic, topic_asks["General/Locale Data"])
268276

277+
# -----------------------------
278+
# Template loading + rendering
279+
# -----------------------------
280+
281+
def load_template(path: Path) -> str:
282+
with open(path, "r", encoding="utf-8") as f:
283+
return f.read()
284+
285+
def render_template(tmpl: str, mapping: Dict[str, str]) -> str:
286+
"""Replace {{KEY}} tokens with mapping values (simple placeholder engine)."""
287+
def _sub(m):
288+
key = m.group(1).strip()
289+
return mapping.get(key, "")
290+
return re.sub(r"\{\{\s*([A-Z0-9_]+)\s*\}\}", _sub, tmpl)
291+
292+
def build_from_template(ticket_key: str, template_path: Path, jira: Optional[JIRA]) -> str:
293+
if jira is not None:
294+
try:
295+
data = fetch_ticket_data(ticket_key, jira)
296+
except Exception as e:
297+
print(f"[warn] Jira fetch failed, rendering template with placeholders: {e}", file=sys.stderr)
298+
data = None
299+
else:
300+
data = None
301+
302+
if data is None:
303+
# offline/placeholder mapping (still prints a prompt)
304+
mapping = {
305+
"TICKET_ID": ticket_key,
306+
"TITLE": "(unavailable: Jira error)",
307+
"DESCRIPTION": "(unavailable: Jira error)",
308+
"COMPONENTS": "None",
309+
"LABELS": "None",
310+
"REPORTER": "N/A",
311+
"PRIORITY": "N/A",
312+
"CONNECTED_ITEMS": "None",
313+
"HAS_CODE_BLOCK": "false",
314+
}
315+
else:
316+
connected = ", ".join(data["connected_items"]) if data.get("connected_items") else "None"
317+
mapping = {
318+
"TICKET_ID": ticket_key,
319+
"TITLE": data["title"],
320+
"DESCRIPTION": data["description"],
321+
"COMPONENTS": data["components"],
322+
"LABELS": data["labels"],
323+
"REPORTER": data["reporter"],
324+
"PRIORITY": data["priority"],
325+
"CONNECTED_ITEMS": connected,
326+
"HAS_CODE_BLOCK": str(data["has_code"]).lower(),
327+
}
328+
329+
tmpl = load_template(template_path)
330+
return render_template(tmpl, mapping)
331+
269332
# -----------------------------
270333
# PUBLIC API for other tools
271334
# -----------------------------
272335

273-
def make_prompt(ticket_key: str, category: Optional[str] = None, auto_category: bool = False) -> str:
336+
def make_prompt(ticket_key: str,
337+
category: Optional[str] = None,
338+
auto_category: bool = False,
339+
template: Optional[str] = None) -> str:
274340
"""
275341
Return a ready-to-send prompt string for the given ticket.
276-
Other tools import and call this.
342+
If a template file exists (explicit path or default), use it; otherwise fall back to built-ins.
277343
"""
278-
jira = get_jira_client_from_config()
344+
# Try to get Jira; do not crash if it fails.
345+
jira = None
346+
try:
347+
jira = get_jira_client_from_config()
348+
except Exception as e:
349+
print(f"[warn] Jira not available: {e}", file=sys.stderr)
350+
351+
# Prefer template if provided or default exists
352+
template_path = Path(template) if template else DEFAULT_TEMPLATE
353+
if template_path.exists():
354+
return build_from_template(ticket_key, template_path, jira)
355+
356+
# Fallback to built-in prompts (requires Jira; will warn if missing)
357+
if jira is None:
358+
# Minimal fallback prompt if Jira is totally unavailable and no template is supplied
359+
return f"Ticket {ticket_key}: Jira unavailable and no template provided."
360+
279361
data = fetch_ticket_data(ticket_key, jira)
280362

281363
if category:
@@ -287,7 +369,6 @@ def make_prompt(ticket_key: str, category: Optional[str] = None, auto_category:
287369

288370
if chosen == "Triage":
289371
return build_triage_prompt(data)
290-
291372
topic = detect_topic(data)
292373
return build_topic_prompt(chosen, topic, data)
293374

@@ -301,11 +382,12 @@ def main():
301382
description="Print a ticket-specific LLM prompt for a CLDR JIRA ticket."
302383
)
303384
parser.add_argument("ticket_key", help="e.g., CLDR-12345")
385+
parser.add_argument("--template", help="Path to a prompt template (default: templates/phase1_prompt.md)")
304386
parser.add_argument("--category", choices=list(CATEGORIES) + ["Triage"])
305387
parser.add_argument("--auto-category", action="store_true")
306388
args = parser.parse_args()
307389

308-
prompt = make_prompt(args.ticket_key, category=args.category, auto_category=args.auto_category)
390+
prompt = make_prompt(args.ticket_key, category=args.category, auto_category=args.auto_category, template=args.template)
309391
print(prompt)
310392

311393
if __name__ == "__main__":

0 commit comments

Comments
 (0)