Skip to content

Commit 8dc95a6

Browse files
authored
fix(workflows): use temp file for RSS parsing (#11)
- Use temp file instead of piping to avoid broken pipe errors - Pass target_date as command line argument to Python - Remove conflicting env block that could override shell variable
1 parent 5765956 commit 8dc95a6

File tree

1 file changed

+68
-72
lines changed

1 file changed

+68
-72
lines changed

.github/workflows/detect-blog-post-from-rss.yml

Lines changed: 68 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -65,86 +65,84 @@ jobs:
6565
fi
6666
6767
echo "Fetching RSS from: $RSS_URL"
68-
RSS_CONTENT=$(curl -s "$RSS_URL")
68+
RSS_FILE=$(mktemp)
69+
curl -s "$RSS_URL" > "$RSS_FILE"
6970
70-
if [ -z "$RSS_CONTENT" ]; then
71+
if [ ! -s "$RSS_FILE" ]; then
7172
echo "Failed to fetch RSS feed"
7273
echo "has_posts=false" >> $GITHUB_OUTPUT
7374
echo "post_count=0" >> $GITHUB_OUTPUT
75+
rm -f "$RSS_FILE"
7476
exit 0
7577
fi
7678
7779
# Parse RSS and find posts matching target date
78-
# Extract items and filter by pubDate
79-
POSTS_JSON=$(echo "$RSS_CONTENT" | python3 << 'PYTHON_SCRIPT'
80-
import sys
81-
import xml.etree.ElementTree as ET
82-
import json
83-
from email.utils import parsedate_to_datetime
84-
import os
85-
86-
target_date = os.environ.get('TARGET_DATE', '')
87-
rss_content = sys.stdin.read()
88-
89-
try:
90-
root = ET.fromstring(rss_content)
91-
except ET.ParseError as e:
92-
print(json.dumps([]))
93-
sys.exit(0)
94-
95-
posts = []
96-
channel = root.find('channel')
97-
if channel is None:
98-
print(json.dumps([]))
99-
sys.exit(0)
100-
101-
for item in channel.findall('item'):
102-
pub_date_elem = item.find('pubDate')
103-
if pub_date_elem is None:
104-
continue
105-
106-
pub_date_str = pub_date_elem.text
107-
# Parse RFC 822 date format using email.utils (handles GMT correctly)
108-
try:
109-
pub_date = parsedate_to_datetime(pub_date_str)
110-
except (ValueError, TypeError):
111-
continue
112-
113-
post_date = pub_date.strftime('%Y-%m-%d')
114-
115-
if post_date != target_date:
116-
continue
117-
118-
title = item.find('title')
119-
link = item.find('link')
120-
description = item.find('description')
121-
enclosure = item.find('enclosure')
122-
123-
# Get categories
124-
categories = []
125-
for cat in item.findall('category'):
126-
if cat.text:
127-
categories.append(cat.text)
128-
129-
# Convert categories to hashtags
130-
hashtags = ' '.join(['#' + cat.replace(' ', '') for cat in categories])
131-
132-
post = {
133-
'title': title.text if title is not None else '',
134-
'url': link.text if link is not None else '',
135-
'description': description.text if description is not None else '',
136-
'categories': categories,
137-
'hashtags': hashtags,
138-
'image_url': enclosure.get('url') if enclosure is not None else '',
139-
'pub_date': post_date
140-
}
141-
posts.append(post)
142-
143-
print(json.dumps(posts))
144-
PYTHON_SCRIPT
80+
POSTS_JSON=$(python3 - "$TARGET_DATE" "$RSS_FILE" << 'PYTHON_SCRIPT'
81+
import sys
82+
import xml.etree.ElementTree as ET
83+
import json
84+
from email.utils import parsedate_to_datetime
85+
86+
target_date = sys.argv[1]
87+
rss_file = sys.argv[2]
88+
89+
try:
90+
tree = ET.parse(rss_file)
91+
root = tree.getroot()
92+
except ET.ParseError as e:
93+
print(json.dumps([]))
94+
sys.exit(0)
95+
96+
posts = []
97+
channel = root.find('channel')
98+
if channel is None:
99+
print(json.dumps([]))
100+
sys.exit(0)
101+
102+
for item in channel.findall('item'):
103+
pub_date_elem = item.find('pubDate')
104+
if pub_date_elem is None:
105+
continue
106+
107+
pub_date_str = pub_date_elem.text
108+
try:
109+
pub_date = parsedate_to_datetime(pub_date_str)
110+
except (ValueError, TypeError):
111+
continue
112+
113+
post_date = pub_date.strftime('%Y-%m-%d')
114+
115+
if post_date != target_date:
116+
continue
117+
118+
title = item.find('title')
119+
link = item.find('link')
120+
description = item.find('description')
121+
enclosure = item.find('enclosure')
122+
123+
categories = []
124+
for cat in item.findall('category'):
125+
if cat.text:
126+
categories.append(cat.text)
127+
128+
hashtags = ' '.join(['#' + cat.replace(' ', '') for cat in categories])
129+
130+
post = {
131+
'title': title.text if title is not None else '',
132+
'url': link.text if link is not None else '',
133+
'description': description.text if description is not None else '',
134+
'categories': categories,
135+
'hashtags': hashtags,
136+
'image_url': enclosure.get('url') if enclosure is not None else '',
137+
'pub_date': post_date
138+
}
139+
posts.append(post)
140+
141+
print(json.dumps(posts))
142+
PYTHON_SCRIPT
145143
)
146144

147-
export TARGET_DATE="$TARGET_DATE"
145+
rm -f "$RSS_FILE"
148146

149147
POST_COUNT=$(echo "$POSTS_JSON" | python3 -c "import sys, json; print(len(json.load(sys.stdin)))")
150148

@@ -186,5 +184,3 @@ jobs:
186184
echo "post_description=$POST_DESCRIPTION" >> $GITHUB_OUTPUT
187185
echo "post_hashtags=$POST_HASHTAGS" >> $GITHUB_OUTPUT
188186
echo "post_image_url=$POST_IMAGE_URL" >> $GITHUB_OUTPUT
189-
env:
190-
TARGET_DATE: ${{ inputs.target_date }}

0 commit comments

Comments
 (0)