33Generate titles for podcast episodes using OpenAI API.
44
55This script takes a podcast page, finds the timestamp file from podcast-timestamps folder,
6- and finds the guest's page in _people folder to use their information .
6+ and generates a SEO-optimized title for the podcast episode .
77
88Usage:
99 python generate_title_podcasts.py <podcast_file> [--update] [--api-key YOUR_KEY]
2626
2727DEFAULT_PROMPT = """You are an SEO expert creating SEO-optimized titles for podcast episodes.
2828
29- You are given episode timestamps showing key topics and discussion flow and guest information (name, title, bio, expertise) .
29+ You are given episode timestamps showing key topics and discussion flow.
3030
31- TASK: Based on the podcast timestamps and guest bio , generate a SEO-optimized title for the podcast episode.
31+ TASK: Based on the podcast timestamps, generate a SEO-optimized title for the podcast episode.
3232
3333REQUIREMENTS:
3434- Make it clear and useful for the reader to grasp the main topic of the episode and its value proposition
3535- Optimize for SEO with relevant keywords
3636- Should be under 100 characters
37- - Do not include guest's name in the title
38- - Focus primarily on the main themes and topics in the timestamps, not the guest's current role
37+ - Focus primarily on the main themes and topics in the timestamps
3938- Use action words and specific terms or concepts when appropriate
4039- Make it compelling enough that someone would click on it in search results
4140
4241TIMESTAMPS:
4342{timestamps_content}
4443
45- GUEST INFORMATION:
46- {guest_info}
47-
4844OUTPUT: Generate ONLY the title text.
4945"""
5046
@@ -84,73 +80,7 @@ def get_timestamps_file(podcast_file_path):
8480 return timestamp_file if timestamp_file .exists () else None
8581
8682
87- def get_guest_info (guest_short ):
88- """Get guest information from _people folder."""
89- script_dir = Path (__file__ ).parent
90- project_root = script_dir .parent
91- people_dir = project_root / '_people'
92-
93- guest_file = people_dir / f"{ guest_short } .md"
94-
95- if not guest_file .exists ():
96- return None
97-
98- with open (guest_file , 'r' , encoding = 'utf-8' ) as f :
99- content = f .read ()
100-
101- # Extract front matter
102- if content .startswith ('---\n ' ):
103- match = re .search (r'\n---\n' , content [4 :])
104- if match :
105- end_pos = match .start () + 4
106- frontmatter_text = content [4 :end_pos ]
107- rest_content = content [end_pos + 5 :].strip ()
108-
109- try :
110- frontmatter = yaml .safe_load (frontmatter_text )
111- # Combine front matter info with bio content
112- guest_info = {
113- 'name' : frontmatter .get ('title' , '' ),
114- 'short' : frontmatter .get ('short' , '' ),
115- 'picture' : frontmatter .get ('picture' , '' ),
116- 'linkedin' : frontmatter .get ('linkedin' , '' ),
117- 'twitter' : frontmatter .get ('twitter' , '' ),
118- 'web' : frontmatter .get ('web' , '' ),
119- 'bio' : rest_content
120- }
121- return guest_info
122- except yaml .YAMLError :
123- return None
124-
125- return None
126-
127-
128- def format_guest_info (guests_info ):
129- """Format guest information for the prompt."""
130- if not guests_info :
131- return "No guest information available."
132-
133- formatted = []
134- for guest in guests_info :
135- if guest :
136- info_parts = []
137- if guest .get ('name' ):
138- info_parts .append (f"Name: { guest ['name' ]} " )
139- if guest .get ('bio' ):
140- info_parts .append (f"Bio: { guest ['bio' ]} " )
141- if guest .get ('linkedin' ):
142- info_parts .append (f"LinkedIn: { guest ['linkedin' ]} " )
143- if guest .get ('twitter' ):
144- info_parts .append (f"Twitter: { guest ['twitter' ]} " )
145- if guest .get ('web' ):
146- info_parts .append (f"Website: { guest ['web' ]} " )
147-
148- formatted .append ("\n " .join (info_parts ))
149-
150- return "\n \n " .join (formatted ) if formatted else "No guest information available."
151-
152-
153- def generate_title (timestamps_content , guest_info , api_key = None ):
83+ def generate_title (timestamps_content , api_key = None ):
15484 """Generate title using OpenAI API."""
15585 # Initialize OpenAI client
15686 if api_key :
@@ -162,12 +92,10 @@ def generate_title(timestamps_content, guest_info, api_key=None):
16292 # Format the prompt with all the information
16393 prompt = DEFAULT_PROMPT .format (
16494 timestamps_content = timestamps_content ,
165- guest_info = guest_info
16695 )
16796
16897 print (f"Prompt size: { len (prompt )} characters" )
16998 print (f" - Timestamps: { len (timestamps_content )} characters" )
170- print (f" - Guest info: { len (guest_info )} characters" )
17199 print ()
172100
173101 # Call OpenAI API
@@ -303,11 +231,6 @@ def process_podcast_file(podcast_file: Path, api_key: str = None, update: bool =
303231 print ("Warning: Could not parse frontmatter from podcast file" , file = sys .stderr )
304232 frontmatter = {}
305233
306- # Get guest names from front matter
307- guests = frontmatter .get ('guests' , [])
308- if not guests :
309- print ("Warning: No guests found in front matter" , file = sys .stderr )
310-
311234 # Get timestamp file
312235 timestamp_file = get_timestamps_file (podcast_file )
313236 if not timestamp_file :
@@ -318,24 +241,12 @@ def process_podcast_file(podcast_file: Path, api_key: str = None, update: bool =
318241 with open (timestamp_file , 'r' , encoding = 'utf-8' ) as f :
319242 timestamps_content = f .read ().strip ()
320243
321- # Get guest information
322- guests_info = []
323- for guest_short in guests :
324- guest_info = get_guest_info (guest_short )
325- if guest_info :
326- print (f"Found guest info: { guest_short } " )
327- guests_info .append (guest_info )
328- else :
329- print (f"Warning: Guest info not found for: { guest_short } " , file = sys .stderr )
330-
331- formatted_guest_info = format_guest_info (guests_info )
332-
333244 print ()
334245 print ("Generating title..." )
335246 print ()
336247
337248 # Generate title
338- title = generate_title (timestamps_content , formatted_guest_info , api_key = api_key )
249+ title = generate_title (timestamps_content , api_key = api_key )
339250
340251 print (f"Generated title ({ len (title )} chars):" )
341252 print (f" { title } " )
0 commit comments