11from app .transcript import Transcript
22from app .logging import get_logger
3+ from app .services .global_tag_manager import GlobalTagManager
34import openai
45from app .config import settings
56
@@ -9,6 +10,7 @@ class CorrectionService:
910 def __init__ (self , provider = 'openai' , model = 'gpt-4o' ):
1011 self .provider = provider
1112 self .model = model
13+ self .tag_manager = GlobalTagManager ()
1214 if self .provider == 'openai' :
1315 self .client = openai
1416 self .client .api_key = settings .OPENAI_API_KEY
@@ -20,8 +22,9 @@ def process(self, transcript: Transcript, **kwargs):
2022 keywords = kwargs .get ('keywords' , [])
2123
2224 metadata = transcript .source .to_json ()
25+ global_context = self .tag_manager .get_correction_context ()
2326
24- prompt = self ._build_prompt (transcript .outputs ['raw' ], keywords , metadata )
27+ prompt = self ._build_enhanced_prompt (transcript .outputs ['raw' ], keywords , metadata , global_context )
2528
2629 # Call the LLM
2730 response = self .client .chat .completions .create (
@@ -34,29 +37,99 @@ def process(self, transcript: Transcript, **kwargs):
3437 transcript .outputs ['corrected_text' ] = corrected_text
3538 logger .info ("Correction complete." )
3639
37- def _build_prompt (self , text , keywords , metadata ):
40+ def _build_enhanced_prompt (self , text , keywords , metadata , global_context ):
3841 prompt = (
39- "You are a domain expert in Bitcoin and blockchain technologies.\n \n "
40- "The following transcript was generated using an automatic speech recognition (ASR) system. "
41- "Your task is to correct it based on the contextual metadata provided.\n \n "
42- "--- Contextual Metadata ---\n "
42+ "You are a transcript correction specialist with expertise in Bitcoin and blockchain terminology.\n \n "
43+ "The following transcript was generated by automatic speech recognition (ASR). Your task is to "
44+ "correct ONLY the obvious mistakes while keeping the transcript as close to the original as possible.\n \n "
45+ "DO NOT:\n "
46+ "- Rephrase or rewrite sentences\n "
47+ "- Change the speaker's style or tone\n "
48+ "- Add or remove content\n "
49+ "- Make major structural changes\n \n "
50+ "DO:\n "
51+ "- Fix spelling errors and typos\n "
52+ "- Correct misheard words using context\n "
53+ "- Fix technical terminology and proper names\n "
54+ "- Maintain the exact same flow and structure\n \n "
55+ "--- Current Video Metadata ---\n "
4356 )
4457
4558 if metadata .get ('title' ):
46- prompt += f"Title: { metadata ['title' ]} \n "
59+ prompt += f"Video Title: { metadata ['title' ]} \n "
4760 if metadata .get ('speakers' ):
4861 prompt += f"Speakers: { ', ' .join (metadata ['speakers' ])} \n "
4962 if metadata .get ('tags' ):
50- prompt += f"Tags: { ', ' .join (metadata ['tags' ])} \n "
63+ prompt += f"Video Tags: { ', ' .join (metadata ['tags' ])} \n "
64+ if metadata .get ('categories' ):
65+ prompt += f"Categories: { ', ' .join (metadata ['categories' ])} \n "
66+ if metadata .get ('youtube' , {}).get ('description' ):
67+ description = metadata ['youtube' ]['description' ][:200 ] + "..." if len (metadata ['youtube' ]['description' ]) > 200 else metadata ['youtube' ]['description' ]
68+ prompt += f"Description: { description } \n "
69+
70+ # Add global knowledge base context
71+ video_count = global_context .get ('video_count' , 0 )
72+ prompt += f"\n --- Global Bitcoin Knowledge Base (From { video_count } Transcripts) ---\n "
73+
74+ if global_context .get ('frequent_tags' ):
75+ frequent_tags = global_context ['frequent_tags' ][:15 ]
76+ prompt += f"Most Common Topics: { ', ' .join (frequent_tags )} \n "
77+
78+ if global_context .get ('technical_terms' ):
79+ tech_terms = global_context ['technical_terms' ][:20 ]
80+ prompt += f"Technical Terms to Recognize: { ', ' .join (tech_terms )} \n "
81+
82+ if global_context .get ('project_names' ):
83+ projects = global_context ['project_names' ][:15 ]
84+ prompt += f"Bitcoin Projects/Tools: { ', ' .join (projects )} \n "
85+
86+ if global_context .get ('common_speakers' ):
87+ speakers = global_context ['common_speakers' ][:10 ]
88+ prompt += f"Frequent Speakers: { ', ' .join (speakers )} \n "
89+
90+ if global_context .get ('common_categories' ):
91+ categories = global_context ['common_categories' ][:8 ]
92+ prompt += f"Common Content Categories: { ', ' .join (categories )} \n "
5193
52- prompt += "Please use this metadata to improve the accuracy of your corrections.\n "
94+ if global_context .get ('expertise_areas' ):
95+ areas = global_context ['expertise_areas' ][:8 ]
96+ prompt += f"Domain Expertise Areas: { ', ' .join (areas )} \n "
97+
98+ if global_context .get ('domain_context' ):
99+ prompt += f"Primary Domain Focus: { global_context ['domain_context' ]} \n "
100+
101+ # Add specific correction focus
102+ prompt += "\n --- Focus Areas for Correction ---\n "
103+ prompt += "Using the metadata and global knowledge, focus on correcting:\n "
104+ prompt += "1. Technical terms (ensure proper spelling and capitalization)\n "
105+ prompt += "2. Speaker names and project names (match known variations)\n "
106+ prompt += "3. Common ASR mishears (but, bit, big -> Bitcoin when context suggests it)\n "
107+ prompt += "4. Homophones and similar-sounding words in Bitcoin context\n "
108+ prompt += "5. Obvious typos and spelling mistakes\n \n "
109+ prompt += "IMPORTANT: Make minimal changes - only fix clear errors, don't improve the text.\n "
110+
111+ # Add tag variations for better recognition
112+ if global_context .get ('tag_variations' ):
113+ variations = global_context ['tag_variations' ]
114+ if variations :
115+ prompt += "\n --- Common Term Variations ---\n "
116+ for base_term , variants in list (variations .items ())[:5 ]:
117+ prompt += f"{ base_term } : { ', ' .join (variants )} \n "
53118
119+ # Add user-provided keywords
54120 if keywords :
55121 prompt += (
56- "\n Additionally, prioritize the following keywords. Ensure they are spelled, cased, and formatted correctly "
57- "whenever they appear in the transcript :\n - "
122+ "\n --- Additional Priority Keywords --- \n "
123+ "Pay special attention to these terms and ensure correct spelling/formatting :\n - "
58124 )
59125 prompt += "\n - " .join (keywords )
60126
61- prompt += f"\n \n --- Transcript Start ---\n \n { text .strip ()} \n \n --- Transcript End ---"
127+ prompt += f"\n \n --- Transcript Start ---\n \n { text .strip ()} \n \n --- Transcript End ---\n \n "
128+ prompt += "Return ONLY the corrected transcript. Make minimal changes - fix only obvious errors while "
129+ prompt += "preserving the original wording, sentence structure, and speaker's natural expression."
130+
62131 return prompt
132+
133+ def _build_prompt (self , text , keywords , metadata ):
134+ """Legacy method for backward compatibility"""
135+ return self ._build_enhanced_prompt (text , keywords , metadata , {})
0 commit comments