|
| 1 | +name: Detect Blog Post From RSS |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_call: |
| 5 | + inputs: |
| 6 | + rss_url: |
| 7 | + description: 'URL of the RSS feed to check' |
| 8 | + required: true |
| 9 | + type: string |
| 10 | + target_date: |
| 11 | + description: 'Date to look for posts (YYYY-MM-DD format). Defaults to current Eastern date.' |
| 12 | + required: false |
| 13 | + type: string |
| 14 | + outputs: |
| 15 | + has_posts: |
| 16 | + description: 'Whether posts were found for the target date' |
| 17 | + value: ${{ jobs.detect.outputs.has_posts }} |
| 18 | + posts_json: |
| 19 | + description: 'JSON array of post objects with all metadata' |
| 20 | + value: ${{ jobs.detect.outputs.posts_json }} |
| 21 | + post_count: |
| 22 | + description: 'Number of posts found' |
| 23 | + value: ${{ jobs.detect.outputs.post_count }} |
| 24 | + post_title: |
| 25 | + description: 'Title of the first post (for single-post workflows)' |
| 26 | + value: ${{ jobs.detect.outputs.post_title }} |
| 27 | + post_url: |
| 28 | + description: 'URL of the first post' |
| 29 | + value: ${{ jobs.detect.outputs.post_url }} |
| 30 | + post_description: |
| 31 | + description: 'Description of the first post' |
| 32 | + value: ${{ jobs.detect.outputs.post_description }} |
| 33 | + post_hashtags: |
| 34 | + description: 'Hashtags from the first post categories' |
| 35 | + value: ${{ jobs.detect.outputs.post_hashtags }} |
| 36 | + post_image_url: |
| 37 | + description: 'Image URL from the first post enclosure' |
| 38 | + value: ${{ jobs.detect.outputs.post_image_url }} |
| 39 | + |
| 40 | +jobs: |
| 41 | + detect: |
| 42 | + runs-on: ubuntu-latest |
| 43 | + outputs: |
| 44 | + has_posts: ${{ steps.parse.outputs.has_posts }} |
| 45 | + posts_json: ${{ steps.parse.outputs.posts_json }} |
| 46 | + post_count: ${{ steps.parse.outputs.post_count }} |
| 47 | + post_title: ${{ steps.parse.outputs.post_title }} |
| 48 | + post_url: ${{ steps.parse.outputs.post_url }} |
| 49 | + post_description: ${{ steps.parse.outputs.post_description }} |
| 50 | + post_hashtags: ${{ steps.parse.outputs.post_hashtags }} |
| 51 | + post_image_url: ${{ steps.parse.outputs.post_image_url }} |
| 52 | + steps: |
| 53 | + - name: Detect posts from RSS feed |
| 54 | + id: parse |
| 55 | + run: | |
| 56 | + RSS_URL="${{ inputs.rss_url }}" |
| 57 | +
|
| 58 | + # Determine target date (Eastern time) |
| 59 | + if [ -n "${{ inputs.target_date }}" ]; then |
| 60 | + TARGET_DATE="${{ inputs.target_date }}" |
| 61 | + echo "Using provided target date: $TARGET_DATE" |
| 62 | + else |
| 63 | + TARGET_DATE=$(TZ="America/New_York" date +"%Y-%m-%d") |
| 64 | + echo "Using current Eastern date: $TARGET_DATE" |
| 65 | + fi |
| 66 | +
|
| 67 | + echo "Fetching RSS from: $RSS_URL" |
| 68 | + RSS_CONTENT=$(curl -s "$RSS_URL") |
| 69 | +
|
| 70 | + if [ -z "$RSS_CONTENT" ]; then |
| 71 | + echo "Failed to fetch RSS feed" |
| 72 | + echo "has_posts=false" >> $GITHUB_OUTPUT |
| 73 | + echo "post_count=0" >> $GITHUB_OUTPUT |
| 74 | + exit 0 |
| 75 | + fi |
| 76 | +
|
| 77 | + # Parse RSS and find posts matching target date |
| 78 | + # Extract items and filter by pubDate |
| 79 | + POSTS_JSON=$(echo "$RSS_CONTENT" | python3 << 'PYTHON_SCRIPT' |
| 80 | + import sys |
| 81 | + import xml.etree.ElementTree as ET |
| 82 | + import json |
| 83 | + import re |
| 84 | + from datetime import datetime |
| 85 | + import os |
| 86 | +
|
| 87 | + target_date = os.environ.get('TARGET_DATE', '') |
| 88 | + rss_content = sys.stdin.read() |
| 89 | +
|
| 90 | + try: |
| 91 | + root = ET.fromstring(rss_content) |
| 92 | + except ET.ParseError as e: |
| 93 | + print(json.dumps([])) |
| 94 | + sys.exit(0) |
| 95 | +
|
| 96 | + posts = [] |
| 97 | + channel = root.find('channel') |
| 98 | + if channel is None: |
| 99 | + print(json.dumps([])) |
| 100 | + sys.exit(0) |
| 101 | +
|
| 102 | + for item in channel.findall('item'): |
| 103 | + pub_date_elem = item.find('pubDate') |
| 104 | + if pub_date_elem is None: |
| 105 | + continue |
| 106 | +
|
| 107 | + pub_date_str = pub_date_elem.text |
| 108 | + # Parse RFC 822 date format: "Wed, 25 Dec 2024 12:00:00 GMT" |
| 109 | + try: |
| 110 | + pub_date = datetime.strptime(pub_date_str, '%a, %d %b %Y %H:%M:%S %Z') |
| 111 | + except ValueError: |
| 112 | + try: |
| 113 | + pub_date = datetime.strptime(pub_date_str, '%a, %d %b %Y %H:%M:%S %z') |
| 114 | + except ValueError: |
| 115 | + continue |
| 116 | +
|
| 117 | + post_date = pub_date.strftime('%Y-%m-%d') |
| 118 | +
|
| 119 | + if post_date != target_date: |
| 120 | + continue |
| 121 | +
|
| 122 | + title = item.find('title') |
| 123 | + link = item.find('link') |
| 124 | + description = item.find('description') |
| 125 | + enclosure = item.find('enclosure') |
| 126 | +
|
| 127 | + # Get categories |
| 128 | + categories = [] |
| 129 | + for cat in item.findall('category'): |
| 130 | + if cat.text: |
| 131 | + categories.append(cat.text) |
| 132 | +
|
| 133 | + # Convert categories to hashtags |
| 134 | + hashtags = ' '.join(['#' + cat.replace(' ', '') for cat in categories]) |
| 135 | +
|
| 136 | + post = { |
| 137 | + 'title': title.text if title is not None else '', |
| 138 | + 'url': link.text if link is not None else '', |
| 139 | + 'description': description.text if description is not None else '', |
| 140 | + 'categories': categories, |
| 141 | + 'hashtags': hashtags, |
| 142 | + 'image_url': enclosure.get('url') if enclosure is not None else '', |
| 143 | + 'pub_date': post_date |
| 144 | + } |
| 145 | + posts.append(post) |
| 146 | +
|
| 147 | + print(json.dumps(posts)) |
| 148 | + PYTHON_SCRIPT |
| 149 | + ) |
| 150 | +
|
| 151 | + export TARGET_DATE="$TARGET_DATE" |
| 152 | +
|
| 153 | + POST_COUNT=$(echo "$POSTS_JSON" | python3 -c "import sys, json; print(len(json.load(sys.stdin)))") |
| 154 | +
|
| 155 | + if [ "$POST_COUNT" -eq 0 ]; then |
| 156 | + echo "No posts found for $TARGET_DATE" |
| 157 | + echo "has_posts=false" >> $GITHUB_OUTPUT |
| 158 | + echo "post_count=0" >> $GITHUB_OUTPUT |
| 159 | + echo "posts_json=[]" >> $GITHUB_OUTPUT |
| 160 | + exit 0 |
| 161 | + fi |
| 162 | +
|
| 163 | + echo "Found $POST_COUNT post(s) for $TARGET_DATE" |
| 164 | + echo "has_posts=true" >> $GITHUB_OUTPUT |
| 165 | + echo "post_count=$POST_COUNT" >> $GITHUB_OUTPUT |
| 166 | +
|
| 167 | + # For multiline JSON, use heredoc |
| 168 | + EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64) |
| 169 | + echo "posts_json<<$EOF" >> $GITHUB_OUTPUT |
| 170 | + echo "$POSTS_JSON" >> $GITHUB_OUTPUT |
| 171 | + echo "$EOF" >> $GITHUB_OUTPUT |
| 172 | +
|
| 173 | + # Extract first post details for single-post workflows |
| 174 | + FIRST_POST=$(echo "$POSTS_JSON" | python3 -c "import sys, json; posts = json.load(sys.stdin); print(json.dumps(posts[0]) if posts else '{}')") |
| 175 | +
|
| 176 | + POST_TITLE=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('title', ''))") |
| 177 | + POST_URL=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('url', ''))") |
| 178 | + POST_DESCRIPTION=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('description', ''))") |
| 179 | + POST_HASHTAGS=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('hashtags', ''))") |
| 180 | + POST_IMAGE_URL=$(echo "$FIRST_POST" | python3 -c "import sys, json; print(json.load(sys.stdin).get('image_url', ''))") |
| 181 | +
|
| 182 | + echo "Title: $POST_TITLE" |
| 183 | + echo "URL: $POST_URL" |
| 184 | + echo "Description: $POST_DESCRIPTION" |
| 185 | + echo "Hashtags: $POST_HASHTAGS" |
| 186 | + echo "Image URL: $POST_IMAGE_URL" |
| 187 | +
|
| 188 | + echo "post_title=$POST_TITLE" >> $GITHUB_OUTPUT |
| 189 | + echo "post_url=$POST_URL" >> $GITHUB_OUTPUT |
| 190 | + echo "post_description=$POST_DESCRIPTION" >> $GITHUB_OUTPUT |
| 191 | + echo "post_hashtags=$POST_HASHTAGS" >> $GITHUB_OUTPUT |
| 192 | + echo "post_image_url=$POST_IMAGE_URL" >> $GITHUB_OUTPUT |
| 193 | + env: |
| 194 | + TARGET_DATE: ${{ inputs.target_date }} |
0 commit comments