Skip to content

Commit e893706

Browse files
authored
feat(workflows): add RSS-based blog post detection (#9)
Create reusable workflow that detects new blog posts by parsing RSS feed instead of filesystem, enabling detection from live site.
1 parent 9b1457f commit e893706

File tree

1 file changed

+194
-0
lines changed

1 file changed

+194
-0
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
name: Detect Blog Post From RSS
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
rss_url:
7+
description: 'URL of the RSS feed to check'
8+
required: true
9+
type: string
10+
target_date:
11+
description: 'Date to look for posts (YYYY-MM-DD format). Defaults to current Eastern date.'
12+
required: false
13+
type: string
14+
outputs:
15+
has_posts:
16+
description: 'Whether posts were found for the target date'
17+
value: ${{ jobs.detect.outputs.has_posts }}
18+
posts_json:
19+
description: 'JSON array of post objects with all metadata'
20+
value: ${{ jobs.detect.outputs.posts_json }}
21+
post_count:
22+
description: 'Number of posts found'
23+
value: ${{ jobs.detect.outputs.post_count }}
24+
post_title:
25+
description: 'Title of the first post (for single-post workflows)'
26+
value: ${{ jobs.detect.outputs.post_title }}
27+
post_url:
28+
description: 'URL of the first post'
29+
value: ${{ jobs.detect.outputs.post_url }}
30+
post_description:
31+
description: 'Description of the first post'
32+
value: ${{ jobs.detect.outputs.post_description }}
33+
post_hashtags:
34+
description: 'Hashtags from the first post categories'
35+
value: ${{ jobs.detect.outputs.post_hashtags }}
36+
post_image_url:
37+
description: 'Image URL from the first post enclosure'
38+
value: ${{ jobs.detect.outputs.post_image_url }}
39+
40+
jobs:
41+
detect:
42+
runs-on: ubuntu-latest
43+
outputs:
44+
has_posts: ${{ steps.parse.outputs.has_posts }}
45+
posts_json: ${{ steps.parse.outputs.posts_json }}
46+
post_count: ${{ steps.parse.outputs.post_count }}
47+
post_title: ${{ steps.parse.outputs.post_title }}
48+
post_url: ${{ steps.parse.outputs.post_url }}
49+
post_description: ${{ steps.parse.outputs.post_description }}
50+
post_hashtags: ${{ steps.parse.outputs.post_hashtags }}
51+
post_image_url: ${{ steps.parse.outputs.post_image_url }}
52+
steps:
53+
- name: Detect posts from RSS feed
54+
id: parse
55+
run: |
56+
RSS_URL="${{ inputs.rss_url }}"
57+
58+
# Determine target date (Eastern time)
59+
if [ -n "${{ inputs.target_date }}" ]; then
60+
TARGET_DATE="${{ inputs.target_date }}"
61+
echo "Using provided target date: $TARGET_DATE"
62+
else
63+
TARGET_DATE=$(TZ="America/New_York" date +"%Y-%m-%d")
64+
echo "Using current Eastern date: $TARGET_DATE"
65+
fi
66+
67+
echo "Fetching RSS from: $RSS_URL"
68+
RSS_CONTENT=$(curl -s "$RSS_URL")
69+
70+
if [ -z "$RSS_CONTENT" ]; then
71+
echo "Failed to fetch RSS feed"
72+
echo "has_posts=false" >> $GITHUB_OUTPUT
73+
echo "post_count=0" >> $GITHUB_OUTPUT
74+
exit 0
75+
fi
76+
77+
# Parse RSS and find posts matching target date
78+
# Extract items and filter by pubDate
79+
POSTS_JSON=$(echo "$RSS_CONTENT" | python3 << 'PYTHON_SCRIPT'
80+
import sys
81+
import xml.etree.ElementTree as ET
82+
import json
83+
import re
84+
from datetime import datetime
85+
import os
86+
87+
target_date = os.environ.get('TARGET_DATE', '')
88+
rss_content = sys.stdin.read()
89+
90+
try:
91+
root = ET.fromstring(rss_content)
92+
except ET.ParseError as e:
93+
print(json.dumps([]))
94+
sys.exit(0)
95+
96+
posts = []
97+
channel = root.find('channel')
98+
if channel is None:
99+
print(json.dumps([]))
100+
sys.exit(0)
101+
102+
for item in channel.findall('item'):
103+
pub_date_elem = item.find('pubDate')
104+
if pub_date_elem is None:
105+
continue
106+
107+
pub_date_str = pub_date_elem.text
108+
# Parse RFC 822 date format: "Wed, 25 Dec 2024 12:00:00 GMT"
109+
try:
110+
pub_date = datetime.strptime(pub_date_str, '%a, %d %b %Y %H:%M:%S %Z')
111+
except ValueError:
112+
try:
113+
pub_date = datetime.strptime(pub_date_str, '%a, %d %b %Y %H:%M:%S %z')
114+
except ValueError:
115+
continue
116+
117+
post_date = pub_date.strftime('%Y-%m-%d')
118+
119+
if post_date != target_date:
120+
continue
121+
122+
title = item.find('title')
123+
link = item.find('link')
124+
description = item.find('description')
125+
enclosure = item.find('enclosure')
126+
127+
# Get categories
128+
categories = []
129+
for cat in item.findall('category'):
130+
if cat.text:
131+
categories.append(cat.text)
132+
133+
# Convert categories to hashtags
134+
hashtags = ' '.join(['#' + cat.replace(' ', '') for cat in categories])
135+
136+
post = {
137+
'title': title.text if title is not None else '',
138+
'url': link.text if link is not None else '',
139+
'description': description.text if description is not None else '',
140+
'categories': categories,
141+
'hashtags': hashtags,
142+
'image_url': enclosure.get('url') if enclosure is not None else '',
143+
'pub_date': post_date
144+
}
145+
posts.append(post)
146+
147+
print(json.dumps(posts))
148+
PYTHON_SCRIPT
149+
)
150+
151+
export TARGET_DATE="$TARGET_DATE"
152+
153+
POST_COUNT=$(echo "$POSTS_JSON" | python3 -c "import sys, json; print(len(json.load(sys.stdin)))")
154+
155+
if [ "$POST_COUNT" -eq 0 ]; then
156+
echo "No posts found for $TARGET_DATE"
157+
echo "has_posts=false" >> $GITHUB_OUTPUT
158+
echo "post_count=0" >> $GITHUB_OUTPUT
159+
echo "posts_json=[]" >> $GITHUB_OUTPUT
160+
exit 0
161+
fi
162+
163+
echo "Found $POST_COUNT post(s) for $TARGET_DATE"
164+
echo "has_posts=true" >> $GITHUB_OUTPUT
165+
echo "post_count=$POST_COUNT" >> $GITHUB_OUTPUT
166+
167+
# For multiline JSON, use heredoc
168+
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
169+
echo "posts_json<<$EOF" >> $GITHUB_OUTPUT
170+
echo "$POSTS_JSON" >> $GITHUB_OUTPUT
171+
echo "$EOF" >> $GITHUB_OUTPUT
172+
173+
# Extract first post details for single-post workflows
174+
FIRST_POST=$(echo "$POSTS_JSON" | python3 -c "import sys, json; posts = json.load(sys.stdin); print(json.dumps(posts[0]) if posts else '{}')")
175+
176+
POST_TITLE=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('title', ''))")
177+
POST_URL=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('url', ''))")
178+
POST_DESCRIPTION=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('description', ''))")
179+
POST_HASHTAGS=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('hashtags', ''))")
180+
POST_IMAGE_URL=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('image_url', ''))")
181+
182+
echo "Title: $POST_TITLE"
183+
echo "URL: $POST_URL"
184+
echo "Description: $POST_DESCRIPTION"
185+
echo "Hashtags: $POST_HASHTAGS"
186+
echo "Image URL: $POST_IMAGE_URL"
187+
188+
echo "post_title=$POST_TITLE" >> $GITHUB_OUTPUT
189+
echo "post_url=$POST_URL" >> $GITHUB_OUTPUT
190+
echo "post_description=$POST_DESCRIPTION" >> $GITHUB_OUTPUT
191+
echo "post_hashtags=$POST_HASHTAGS" >> $GITHUB_OUTPUT
192+
echo "post_image_url=$POST_IMAGE_URL" >> $GITHUB_OUTPUT
193+
env:
194+
TARGET_DATE: ${{ inputs.target_date }}

0 commit comments

Comments
 (0)