Skip to content

Commit e2658dd

Browse files
committed
updates to scripts
1 parent f05bf4f commit e2658dd

File tree

2 files changed

+7
-96
lines changed

2 files changed

+7
-96
lines changed

scripts/generate_intro_podcasts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
REQUIREMENTS:
3434
- Length: 150-200 words
3535
- Prioritize the topic highlighted in the episode title when generating the intro
36-
- Should include opening hook with a main challenge/question the episode explores, guest credibility for Google's E-E-A-T, introduce the key topics, and provide a value proposition for the listener
36+
- Should include opening hook with a main challenge/question the episode explores, guest's background, introduce the key topics, and provide a value proposition for the listener
3737
- No marketer talk and hype, just focus on the content of the episode and the value it provides to the
3838
- Do not invent things that are not in the timestamps or the episode title
3939
- Naturally integrate SEO keywords if they are related to the episode

scripts/generate_title_podcasts.py

Lines changed: 6 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Generate titles for podcast episodes using OpenAI API.
44
55
This script takes a podcast page, finds the timestamp file from podcast-timestamps folder,
6-
and finds the guest's page in _people folder to use their information.
6+
and generates a SEO-optimized title for the podcast episode.
77
88
Usage:
99
python generate_title_podcasts.py <podcast_file> [--update] [--api-key YOUR_KEY]
@@ -26,25 +26,21 @@
2626

2727
DEFAULT_PROMPT = """You are an SEO expert creating SEO-optimized titles for podcast episodes.
2828
29-
You are given episode timestamps showing key topics and discussion flow and guest information (name, title, bio, expertise).
29+
You are given episode timestamps showing key topics and discussion flow.
3030
31-
TASK: Based on the podcast timestamps and guest bio, generate a SEO-optimized title for the podcast episode.
31+
TASK: Based on the podcast timestamps, generate a SEO-optimized title for the podcast episode.
3232
3333
REQUIREMENTS:
3434
- Make it clear and useful for the reader to grasp the main topic of the episode and its value proposition
3535
- Optimize for SEO with relevant keywords
3636
- Should be under 100 characters
37-
- Do not include guest's name in the title
38-
- Focus primarily on the main themes and topics in the timestamps, not the guest's current role
37+
- Focus primarily on the main themes and topics in the timestamps
3938
- Use action words and specific terms or concepts when appropriate
4039
- Make it compelling enough that someone would click on it in search results
4140
4241
TIMESTAMPS:
4342
{timestamps_content}
4443
45-
GUEST INFORMATION:
46-
{guest_info}
47-
4844
OUTPUT: Generate ONLY the title text.
4945
"""
5046

@@ -84,73 +80,7 @@ def get_timestamps_file(podcast_file_path):
8480
return timestamp_file if timestamp_file.exists() else None
8581

8682

87-
def get_guest_info(guest_short):
88-
"""Get guest information from _people folder."""
89-
script_dir = Path(__file__).parent
90-
project_root = script_dir.parent
91-
people_dir = project_root / '_people'
92-
93-
guest_file = people_dir / f"{guest_short}.md"
94-
95-
if not guest_file.exists():
96-
return None
97-
98-
with open(guest_file, 'r', encoding='utf-8') as f:
99-
content = f.read()
100-
101-
# Extract front matter
102-
if content.startswith('---\n'):
103-
match = re.search(r'\n---\n', content[4:])
104-
if match:
105-
end_pos = match.start() + 4
106-
frontmatter_text = content[4:end_pos]
107-
rest_content = content[end_pos + 5:].strip()
108-
109-
try:
110-
frontmatter = yaml.safe_load(frontmatter_text)
111-
# Combine front matter info with bio content
112-
guest_info = {
113-
'name': frontmatter.get('title', ''),
114-
'short': frontmatter.get('short', ''),
115-
'picture': frontmatter.get('picture', ''),
116-
'linkedin': frontmatter.get('linkedin', ''),
117-
'twitter': frontmatter.get('twitter', ''),
118-
'web': frontmatter.get('web', ''),
119-
'bio': rest_content
120-
}
121-
return guest_info
122-
except yaml.YAMLError:
123-
return None
124-
125-
return None
126-
127-
128-
def format_guest_info(guests_info):
129-
"""Format guest information for the prompt."""
130-
if not guests_info:
131-
return "No guest information available."
132-
133-
formatted = []
134-
for guest in guests_info:
135-
if guest:
136-
info_parts = []
137-
if guest.get('name'):
138-
info_parts.append(f"Name: {guest['name']}")
139-
if guest.get('bio'):
140-
info_parts.append(f"Bio: {guest['bio']}")
141-
if guest.get('linkedin'):
142-
info_parts.append(f"LinkedIn: {guest['linkedin']}")
143-
if guest.get('twitter'):
144-
info_parts.append(f"Twitter: {guest['twitter']}")
145-
if guest.get('web'):
146-
info_parts.append(f"Website: {guest['web']}")
147-
148-
formatted.append("\n".join(info_parts))
149-
150-
return "\n\n".join(formatted) if formatted else "No guest information available."
151-
152-
153-
def generate_title(timestamps_content, guest_info, api_key=None):
83+
def generate_title(timestamps_content, api_key=None):
15484
"""Generate title using OpenAI API."""
15585
# Initialize OpenAI client
15686
if api_key:
@@ -162,12 +92,10 @@ def generate_title(timestamps_content, guest_info, api_key=None):
16292
# Format the prompt with all the information
16393
prompt = DEFAULT_PROMPT.format(
16494
timestamps_content=timestamps_content,
165-
guest_info=guest_info
16695
)
16796

16897
print(f"Prompt size: {len(prompt)} characters")
16998
print(f" - Timestamps: {len(timestamps_content)} characters")
170-
print(f" - Guest info: {len(guest_info)} characters")
17199
print()
172100

173101
# Call OpenAI API
@@ -303,11 +231,6 @@ def process_podcast_file(podcast_file: Path, api_key: str = None, update: bool =
303231
print("Warning: Could not parse frontmatter from podcast file", file=sys.stderr)
304232
frontmatter = {}
305233

306-
# Get guest names from front matter
307-
guests = frontmatter.get('guests', [])
308-
if not guests:
309-
print("Warning: No guests found in front matter", file=sys.stderr)
310-
311234
# Get timestamp file
312235
timestamp_file = get_timestamps_file(podcast_file)
313236
if not timestamp_file:
@@ -318,24 +241,12 @@ def process_podcast_file(podcast_file: Path, api_key: str = None, update: bool =
318241
with open(timestamp_file, 'r', encoding='utf-8') as f:
319242
timestamps_content = f.read().strip()
320243

321-
# Get guest information
322-
guests_info = []
323-
for guest_short in guests:
324-
guest_info = get_guest_info(guest_short)
325-
if guest_info:
326-
print(f"Found guest info: {guest_short}")
327-
guests_info.append(guest_info)
328-
else:
329-
print(f"Warning: Guest info not found for: {guest_short}", file=sys.stderr)
330-
331-
formatted_guest_info = format_guest_info(guests_info)
332-
333244
print()
334245
print("Generating title...")
335246
print()
336247

337248
# Generate title
338-
title = generate_title(timestamps_content, formatted_guest_info, api_key=api_key)
249+
title = generate_title(timestamps_content, api_key=api_key)
339250

340251
print(f"Generated title ({len(title)} chars):")
341252
print(f" {title}")

0 commit comments

Comments
 (0)