Skip to content

Commit 37d31da

Browse files
preetsojitra2712Squash Bot
authored andcommitted
CLDR-18745 cldr_rule_classifier.py
See unicode-org#4951
1 parent 5565730 commit 37d31da

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import json
2+
import os
3+
import re
4+
from jira import JIRA
5+
6+
def classify_ticket_with_rules_advanced(title, description):
7+
"""
8+
Classifies a Jira ticket using an advanced rule-set with
9+
regex, a scoring system, and specific keyword mapping.
10+
"""
11+
text = (title + " " + (description or "")).lower()
12+
13+
# --- Initialize default classification and scoring ---
14+
classification = {
15+
"is_cldr_related": True,
16+
"ticket_type": "task",
17+
"component": "other",
18+
"needs_engineering_work": False,
19+
"needs_language_specialist": False,
20+
"is_potential_duplicate": False,
21+
"potential_duplicate_of": None,
22+
"summary": "Classification based on advanced rules."
23+
}
24+
scores = {"priority": 0}
25+
26+
# --- Rule 1: Use Regex to find duplicate ticket keys ---
27+
duplicate_match = re.search(r"cldr-\d+", text)
28+
if duplicate_match:
29+
classification["is_potential_duplicate"] = True
30+
classification["potential_duplicate_of"] = duplicate_match.group(0).upper()
31+
32+
# --- Rule 2: Scoring System for Priority ---
33+
priority_keywords = {
34+
"critical": ["critical", "crash", "urgent", "blocker", "unusable"],
35+
"high": ["important", "major", "severe"],
36+
"low": ["trivial", "minor", "cosmetic"]
37+
}
38+
for word in priority_keywords["critical"]:
39+
if word in text: scores["priority"] += 20
40+
for word in priority_keywords["high"]:
41+
if word in text: scores["priority"] += 10
42+
for word in priority_keywords["low"]:
43+
if word in text: scores["priority"] -= 5
44+
45+
# --- Rule 3: Deduce Component from Keywords ---
46+
component_keywords = {
47+
"plural-rules": ["plural", "plurals", "cardinal", "ordinal"],
48+
"date-time-formats": ["date", "time", "datetime", "timezone", "calendar", "format"],
49+
"units": ["unit", "measurement", "gallon", "meter", "km", "celsius"],
50+
"locale-data": ["locale", "country", "language", "territory", "subdivision"],
51+
"charts": ["chart", "graph", "visualization"]
52+
}
53+
for component, keywords in component_keywords.items():
54+
if any(word in text for word in keywords):
55+
classification["component"] = component
56+
break
57+
58+
# --- Rule 4: Determine Ticket Type ---
59+
if any(word in text for word in ["error", "bug", "fails", "broken", "crash"]):
60+
classification["ticket_type"] = "bug"
61+
elif any(word in text for word in ["add", "create", "implement", "feature", "proposal"]):
62+
classification["ticket_type"] = "feature"
63+
elif any(word in text for word in ["improve", "enhance", "update", "refactor"]):
64+
classification["ticket_type"] = "enhancement"
65+
66+
# --- Rule 5: Set Routing Group based on content ---
67+
routing_keywords = {
68+
"CLDR Design WG": ["structure", "api", "icu", "icu4x", "design", "proposal", "architecture"],
69+
"PMs": ["data", "accurate", "incorrect", "validate", "country", "language", "locale"]
70+
}
71+
classification["routing_group"] = "CLDR Ops"
72+
if any(word in text for word in routing_keywords["CLDR Design WG"]):
73+
classification["routing_group"] = "CLDR Design WG"
74+
elif any(word in text for word in routing_keywords["PMs"]):
75+
classification["routing_group"] = "PMs"
76+
77+
# --- Make final decisions based on scores ---
78+
if scores["priority"] > 15:
79+
classification["priority"] = "critical"
80+
elif scores["priority"] > 5:
81+
classification["priority"] = "high"
82+
elif scores["priority"] < 0:
83+
classification["priority"] = "low"
84+
else:
85+
classification["priority"] = "medium"
86+
87+
return {
88+
"classification_status": "valid_ticket",
89+
"details": classification
90+
}
91+
92+
# --- Main execution block ---
93+
if __name__ == "__main__":
94+
# --- Configuration for Jira API Connection ---
95+
# ⚠ Warning: Hardcoding credentials is a security risk.
96+
JIRA_SERVER = "https://unicode-org.atlassian.net"
97+
JIRA_USER_EMAIL = "EMAIL!!" # <-- Replace with your email
98+
JIRA_API_TOKEN = "API" # <-- Replace with your API token
99+
100+
# --- Get Jira Ticket Key from User Input ---
101+
TICKET_KEY_TO_FETCH = input("Please enter the Jira Ticket Key (e.g., CLDR-12345): ")
102+
103+
print(f"\n--- Connecting to Jira server at: {JIRA_SERVER} ---")
104+
try:
105+
jira_connection = JIRA(server=JIRA_SERVER, basic_auth=(JIRA_USER_EMAIL, JIRA_API_TOKEN))
106+
print(" Connection successful!")
107+
108+
print(f"\n--- Fetching ticket: {TICKET_KEY_TO_FETCH} ---")
109+
issue = jira_connection.issue(TICKET_KEY_TO_FETCH)
110+
ticket_summary = issue.fields.summary
111+
ticket_description = issue.fields.description
112+
print(f"Title: {ticket_summary}")
113+
print(f"Description: {ticket_description if ticket_description else 'No description found.'}")
114+
115+
# --- Generate and Print the Dynamic Classification ---
116+
print("\n" + "="*50)
117+
print("--- Dynamic Advanced Rule-Based Classification ---")
118+
print("="*50)
119+
120+
advanced_classification = classify_ticket_with_rules_advanced(ticket_summary, ticket_description)
121+
122+
print(json.dumps(advanced_classification, indent=2))
123+
124+
except Exception as e:
125+
print(f" An error occurred: {e}")

0 commit comments

Comments
 (0)