1+ import os
2+ import re
3+ import glob
4+ from datetime import datetime
5+ from googleapiclient .discovery import build
6+ from googleapiclient .errors import HttpError
7+ from isodate import parse_duration
8+ import yaml # For reading frontmatter of existing posts
9+ # zoneinfo is built-in for Python 3.9+
10+ # If using Python < 3.9, you'd need 'pytz' and different date handling
11+ from zoneinfo import ZoneInfo
12+
13+ # --- Configuration ---
14+ API_KEY = os .environ .get ("YOUTUBE_API_KEY" )
15+ CHANNEL_ID = os .environ .get ("YOUTUBE_CHANNEL_ID" , "UCRkqSGyfZkhOzZIHjlgBXcQ" ) # Default your channel ID
16+ POSTS_DIR = "_posts"
17+ CATEGORY = "video" # As per your example
18+
19+ YOUTUBE_API_SERVICE_NAME = "youtube"
20+ YOUTUBE_API_VERSION = "v3"
21+
22+ def get_youtube_service ():
23+ """Initializes and returns the YouTube API service object."""
24+ if not API_KEY :
25+ print ("Error: YOUTUBE_API_KEY environment variable not found." )
26+ return None
27+ return build (YOUTUBE_API_SERVICE_NAME , YOUTUBE_API_VERSION , developerKey = API_KEY )
28+
29+ def format_duration (iso_duration_str ):
30+ """Converts ISO 8601 duration to MM:SS format."""
31+ try :
32+ duration = parse_duration (iso_duration_str )
33+ total_seconds = int (duration .total_seconds ())
34+ minutes = total_seconds // 60
35+ seconds = total_seconds % 60
36+ return f"{ minutes :02d} :{ seconds :02d} "
37+ except Exception as e :
38+ print (f"Warning: Could not parse duration '{ iso_duration_str } ': { e } " )
39+ return "00:00"
40+
41+
42+ def sanitize_filename (title ):
43+ """Creates a safe filename from a title."""
44+ if not title :
45+ title = "untitled-video"
46+ title = title .lower ()
47+ title = re .sub (r"[^\w\s-]" , "" , title ) # Remove special characters except word chars, whitespace, hyphens
48+ title = re .sub (r"\s+" , "-" , title ) # Replace whitespace with hyphens
49+ title = re .sub (r"-+" , "-" , title ) # Replace multiple hyphens with single
50+ title = title .strip ("-" )
51+ return title [:80 ] if title else "video" # Truncate and provide default
52+
53+ def get_existing_video_ids (posts_dir ):
54+ """Scans existing posts and extracts VideoIds from frontmatter."""
55+ existing_ids = set ()
56+ if not os .path .exists (posts_dir ):
57+ print (f"Posts directory '{ posts_dir } ' not found. Creating it." )
58+ try :
59+ os .makedirs (posts_dir )
60+ except OSError as e :
61+ print (f"Error creating directory { posts_dir } : { e } " )
62+ return existing_ids # Return empty set if dir creation fails
63+
64+ for filepath in glob .glob (os .path .join (posts_dir , "*.md" )):
65+ try :
66+ with open (filepath , 'r' , encoding = 'utf-8' ) as f :
67+ content = f .read ()
68+ if content .startswith ("---" ):
69+ parts = content .split ("---" , 2 )
70+ if len (parts ) >= 2 :
71+ frontmatter_str = parts [1 ]
72+ # Ensure frontmatter_str is not empty before trying to load
73+ if frontmatter_str .strip ():
74+ frontmatter = yaml .safe_load (frontmatter_str )
75+ if frontmatter and "VideoId" in frontmatter :
76+ existing_ids .add (frontmatter ["VideoId" ])
77+ else :
78+ print (f"Warning: Empty frontmatter in { filepath } " )
79+ except yaml .YAMLError as e :
80+ print (f"Warning: Could not parse YAML frontmatter for { filepath } : { e } " )
81+ except Exception as e :
82+ print (f"Warning: Could not process file { filepath } : { e } " )
83+ return existing_ids
84+
85+ def fetch_channel_videos (youtube , channel_id ):
86+ """Fetches all video details for a given channel."""
87+ videos_data = []
88+ try :
89+ channel_response = youtube .channels ().list (
90+ part = "contentDetails" ,
91+ id = channel_id
92+ ).execute ()
93+
94+ if not channel_response .get ("items" ):
95+ print (f"No channel found for ID: { channel_id } " )
96+ return []
97+
98+ uploads_playlist_id = channel_response ["items" ][0 ]["contentDetails" ]["relatedPlaylists" ]["uploads" ]
99+
100+ video_ids = []
101+ next_page_token = None
102+ while True :
103+ playlist_items_response = youtube .playlistItems ().list (
104+ playlistId = uploads_playlist_id ,
105+ part = "contentDetails" ,
106+ maxResults = 50 ,
107+ pageToken = next_page_token
108+ ).execute ()
109+
110+ for item in playlist_items_response .get ("items" , []):
111+ video_ids .append (item ["contentDetails" ]["videoId" ])
112+
113+ next_page_token = playlist_items_response .get ("nextPageToken" )
114+ if not next_page_token :
115+ break
116+
117+ if not video_ids :
118+ print (f"No videos found in uploads playlist for channel { channel_id } " )
119+ return []
120+
121+ for i in range (0 , len (video_ids ), 50 ):
122+ chunk_ids = video_ids [i :i + 50 ]
123+ videos_response = youtube .videos ().list (
124+ part = "snippet,contentDetails" ,
125+ id = "," .join (chunk_ids )
126+ ).execute ()
127+
128+ for video in videos_response .get ("items" , []):
129+ snippet = video .get ("snippet" , {})
130+ description = snippet .get ("description" , "No description available." )
131+ first_line_description = description .strip ().splitlines ()[0 ] if description else "No description available."
132+
133+ videos_data .append ({
134+ "id" : video ["id" ],
135+ "title" : snippet .get ("title" , "Untitled Video" ),
136+ "description" : first_line_description ,
137+ "full_description" : description , # Store full description if needed later
138+ "published_at" : snippet .get ("publishedAt" ), # e.g., "2017-08-29T07:00:00Z"
139+ "duration" : video .get ("contentDetails" , {}).get ("duration" )
140+ })
141+
142+ return videos_data
143+
144+ except HttpError as e :
145+ print (f"An HTTP error { e .resp .status } occurred: { e .content .decode ()} " )
146+ return []
147+ except Exception as e :
148+ print (f"An unexpected error occurred during YouTube API call: { e } " )
149+ return []
150+
151+ def create_jekyll_post (video_info , posts_dir , category ):
152+ """Creates a Jekyll post file for a video."""
153+ video_id = video_info ["id" ]
154+ title = video_info ["title" ]
155+
156+ if not video_info ["published_at" ]:
157+ print (f"Warning: Video '{ title } ' (ID: { video_id } ) has no published_at date. Skipping." )
158+ return False
159+
160+ published_dt_utc = datetime .fromisoformat (video_info ["published_at" ].replace ('Z' , '+00:00' ))
161+ post_date_str = published_dt_utc .strftime ("%Y-%m-%d" )
162+
163+ slug = sanitize_filename (title )
164+ base_filename = f"{ post_date_str } -{ slug } .md"
165+ filepath = os .path .join (posts_dir , base_filename )
166+
167+ counter = 1
168+ while os .path .exists (filepath ):
169+ # This check is secondary; the primary check is existing_video_ids.
170+ # This handles rare cases of different videos with same title and publish date,
171+ # or if sanitize_filename results in the same slug.
172+ print (f"Warning: File '{ filepath } ' already exists. Appending counter." )
173+ filename = f"{ post_date_str } -{ slug } -{ counter } .md"
174+ filepath = os .path .join (posts_dir , filename )
175+ counter += 1
176+
177+ frontmatter = {
178+ "layout" : "post" ,
179+ "title" : title ,
180+ "description" : video_info ["description" ], # First line
181+ "date" : post_date_str ,
182+ "category" : category ,
183+ "duration" : format_duration (video_info ["duration" ]) if video_info ["duration" ] else "00:00" ,
184+ "VideoId" : video_id
185+ }
186+
187+ try :
188+ with open (filepath , "w" , encoding = "utf-8" ) as f :
189+ f .write ("---\n " )
190+ yaml .dump (frontmatter , f , allow_unicode = True , sort_keys = False , default_flow_style = False )
191+ f .write ("---\n \n " )
192+ # Optionally, add the full description or other content here
193+ # f.write(video_info["full_description"] + "\n")
194+ print (f"Created post: { filepath } " )
195+ return True
196+ except Exception as e :
197+ print (f"Error writing file { filepath } : { e } " )
198+ return False
199+
200+ def main ():
201+ if not API_KEY :
202+ print ("CRITICAL: YOUTUBE_API_KEY is not set in environment variables. Exiting." )
203+ return
204+
205+ if not CHANNEL_ID :
206+ print ("CRITICAL: YOUTUBE_CHANNEL_ID is not set. Please configure it. Exiting." )
207+ return
208+
209+ print (f"Fetching videos for channel ID: { CHANNEL_ID } " )
210+ youtube = get_youtube_service ()
211+ if not youtube :
212+ print ("Failed to initialize YouTube service. Exiting." )
213+ return
214+
215+ videos = fetch_channel_videos (youtube , CHANNEL_ID )
216+ if not videos :
217+ print ("No videos found or an error occurred while fetching." )
218+ # Check if POSTS_DIR exists, otherwise, script might seem like it did nothing
219+ if not os .path .exists (POSTS_DIR ) or not os .listdir (POSTS_DIR ):
220+ print (f"The '{ POSTS_DIR } ' directory is empty or does not exist. No posts to compare against." )
221+ return
222+
223+ existing_video_ids = get_existing_video_ids (POSTS_DIR )
224+ print (f"Found { len (existing_video_ids )} existing video posts by VideoId." )
225+
226+ new_posts_created = 0
227+ # Process in chronological order (oldest first) by sorting based on published_at
228+ # This makes the log output more intuitive if you're watching it process.
229+ # YouTube API usually returns newest first by default for playlistItems.
230+ sorted_videos = sorted (videos , key = lambda v : v ["published_at" ] if v ["published_at" ] else "" )
231+
232+
233+ for video_info in sorted_videos :
234+ if video_info ["id" ] not in existing_video_ids :
235+ print (f"Processing new video: '{ video_info ['title' ]} ' (ID: { video_info ['id' ]} )" )
236+ if create_jekyll_post (video_info , POSTS_DIR , CATEGORY ):
237+ new_posts_created += 1
238+ else :
239+ print (f"Skipping existing video: '{ video_info ['title' ]} ' (ID: { video_info ['id' ]} )" )
240+
241+ print (f"\n --- Summary ---" )
242+ print (f"Total videos fetched from YouTube API: { len (videos )} " )
243+ print (f"New Jekyll posts created: { new_posts_created } " )
244+ if new_posts_created > 0 :
245+ print (f"New posts are in the '{ POSTS_DIR } ' directory." )
246+ else :
247+ print ("No new videos to post or all fetched videos already have corresponding posts." )
248+
249+ if __name__ == "__main__" :
250+ # Ensure _posts directory exists before script operations that rely on it
251+ if not os .path .exists (POSTS_DIR ):
252+ print (f"Creating directory: { POSTS_DIR } " )
253+ try :
254+ os .makedirs (POSTS_DIR )
255+ except OSError as e :
256+ print (f"FATAL: Could not create posts directory '{ POSTS_DIR } ': { e } . Exiting." )
257+ exit (1 ) # Exit if we can't create the essential directory
258+ main ()
0 commit comments