Skip to content

Commit f8f547a

Browse files
Add automated PDF checker for new time standards
- Add check-for-new-pdf.js script to detect new OSI Time Standards PDFs - Add GitHub Actions workflow to run daily checks at 9 AM PT - Dynamically detects newer year ranges (2025-2026, etc.) and URL changes - Update JSON structure: use page-based title, add sourceUrl and generatedOn fields - Script compares against stored data and exits appropriately for workflow control
1 parent 8e8e105 commit f8f547a

File tree

3 files changed

+299
-1
lines changed

3 files changed

+299
-1
lines changed

.github/workflows/check-pdf.yml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
name: Check for New OSI Time Standards PDF
2+
3+
on:
4+
# Run daily at 9 AM Pacific Time (5 PM UTC)
5+
schedule:
6+
- cron: '0 17 * * *'
7+
# Allow manual trigger
8+
workflow_dispatch:
9+
10+
jobs:
11+
check-and-process:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v4
16+
17+
- name: Check for new PDF
18+
id: check_pdf
19+
run: |
20+
echo "Checking for changes to OSI Time Standards..."
21+
echo "This will detect:"
22+
echo " - Newer year ranges (e.g., 2025-2026, 2026-2027, etc.)"
23+
echo " - URL changes for the same year (data corrections)"
24+
node check-for-new-pdf.js
25+
continue-on-error: true
26+
27+
- name: Evaluate check result
28+
id: evaluate
29+
run: |
30+
if [ ${{ steps.check_pdf.outcome }} == 'success' ]; then
31+
echo "changes_detected=true" >> $GITHUB_OUTPUT
32+
echo "Check succeeded - changes detected (newer year or updated URL)"
33+
else
34+
echo "changes_detected=false" >> $GITHUB_OUTPUT
35+
echo "Check completed - no changes detected"
36+
fi
37+
38+
- name: Log final status
39+
run: |
40+
echo "=========================================="
41+
echo "Final Workflow Status"
42+
echo "=========================================="
43+
echo "Check outcome: ${{ steps.check_pdf.outcome }}"
44+
echo "Changes detected: ${{ steps.evaluate.outputs.changes_detected }}"
45+
if [ "${{ steps.evaluate.outputs.changes_detected }}" == "true" ]; then
46+
echo "Next: Workflow will continue to subsequent steps"
47+
else
48+
echo "Next: Workflow will end (no further action needed)"
49+
fi
50+
echo "=========================================="
51+
52+
# This step only runs if changes were detected
53+
- name: Process changes (placeholder)
54+
if: steps.evaluate.outputs.changes_detected == 'true'
55+
run: |
56+
echo "=========================================="
57+
echo "CHANGES DETECTED - Running next steps"
58+
echo "=========================================="
59+
echo "This is where you would add steps to:"
60+
echo " - Download the new/updated PDF"
61+
echo " - Parse the PDF data"
62+
echo " - Update swim_time_standards.json with new data and metadata"
63+
echo " - Commit changes"
64+
echo " - Or trigger another workflow"
65+
echo "=========================================="
66+
67+
# Example: Trigger another workflow or job
68+
# You can add more steps here or use workflow_dispatch to trigger another action
69+
70+
- name: Workflow complete
71+
run: |
72+
echo "Workflow execution complete"
73+
if [ "${{ steps.evaluate.outputs.changes_detected }}" == "false" ]; then
74+
echo "Ending workflow - will check again on next scheduled run"
75+
fi

check-for-new-pdf.js

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* Check for new Oregon Swimming Time Standards PDF
5+
* Dynamically detects newer year ranges or URL changes by comparing
6+
* against the stored data in swim_time_standards.json
7+
*/
8+
9+
const https = require('https');
10+
const fs = require('fs');
11+
const path = require('path');
12+
13+
const TARGET_URL = 'https://www.oregonswimming.org/page/competition/time-standards';
14+
const JSON_FILE = path.join(__dirname, 'swim_time_standards.json');
15+
16+
function fetchPage(url) {
17+
return new Promise((resolve, reject) => {
18+
https.get(url, (res) => {
19+
if (res.statusCode !== 200) {
20+
reject(new Error(`HTTP ${res.statusCode}: ${res.statusMessage}`));
21+
return;
22+
}
23+
24+
let data = '';
25+
res.on('data', (chunk) => {
26+
data += chunk;
27+
});
28+
29+
res.on('end', () => {
30+
resolve(data);
31+
});
32+
}).on('error', (err) => {
33+
reject(err);
34+
});
35+
});
36+
}
37+
38+
function loadCurrentData() {
39+
try {
40+
const data = fs.readFileSync(JSON_FILE, 'utf8');
41+
const json = JSON.parse(data);
42+
43+
// Extract year range from title (the actual text from the page's link)
44+
// e.g., "2024-2025 OSI Time Standards"
45+
const yearMatch = json.title.match(/(\d{4})-(\d{4})/);
46+
47+
return {
48+
title: json.title,
49+
yearRange: yearMatch ? yearMatch[0] : null,
50+
startYear: yearMatch ? parseInt(yearMatch[1]) : null,
51+
endYear: yearMatch ? parseInt(yearMatch[2]) : null,
52+
sourceUrl: json.sourceUrl,
53+
generatedOn: json.generatedOn
54+
};
55+
} catch (error) {
56+
throw new Error(`Failed to read ${JSON_FILE}: ${error.message}`);
57+
}
58+
}
59+
60+
function parseYearRange(yearStr) {
61+
const match = yearStr.match(/(\d{4})-(\d{4})/);
62+
if (!match) return null;
63+
64+
return {
65+
yearRange: match[0],
66+
startYear: parseInt(match[1]),
67+
endYear: parseInt(match[2])
68+
};
69+
}
70+
71+
function isNewerYear(currentStart, currentEnd, newStart, newEnd) {
72+
// Compare by start year first, then end year if needed
73+
if (newStart > currentStart) return true;
74+
if (newStart === currentStart && newEnd > currentEnd) return true;
75+
return false;
76+
}
77+
78+
function normalizeUrl(url) {
79+
// Ensure URL is absolute
80+
if (url.startsWith('http')) return url;
81+
return `https://www.oregonswimming.org${url}`;
82+
}
83+
84+
function checkForNewPDF(html, currentData) {
85+
console.log('='.repeat(60));
86+
console.log('Oregon Swimming Time Standards PDF Check');
87+
console.log('='.repeat(60));
88+
console.log(`Target URL: ${TARGET_URL}`);
89+
console.log(`Current title: "${currentData.title}"`);
90+
console.log(`Current year range: ${currentData.yearRange}`);
91+
console.log(`Current URL: ${currentData.sourceUrl}`);
92+
console.log('-'.repeat(60));
93+
94+
// Extract all OSI Time Standards links from the page
95+
const osiLinks = [];
96+
const anchorRegex = /<a[^>]*href=["']([^"']*)[^>]*>([^<]*)<\/a>/gi;
97+
let match;
98+
99+
while ((match = anchorRegex.exec(html)) !== null) {
100+
const href = match[1];
101+
const text = match[2].trim();
102+
const combined = text + ' ' + href;
103+
104+
// Check if it's the OSI Time Standards (not Championships)
105+
const isOSI = /osi.*time.*standards/i.test(combined) ||
106+
/time.*standards.*osi/i.test(combined);
107+
const isNotChampionships = !/championship/i.test(combined);
108+
const hasYear = /\d{4}-\d{4}/.test(combined);
109+
110+
if (isOSI && isNotChampionships && hasYear) {
111+
const yearInfo = parseYearRange(combined);
112+
if (yearInfo) {
113+
osiLinks.push({
114+
text: text,
115+
href: normalizeUrl(href),
116+
...yearInfo
117+
});
118+
}
119+
}
120+
}
121+
122+
console.log(`Found ${osiLinks.length} OSI Time Standards link(s) on page\n`);
123+
124+
// Check for changes
125+
let changeDetected = false;
126+
let changeType = null;
127+
let changeDetails = null;
128+
129+
for (const link of osiLinks) {
130+
// Case 1: Newer year detected
131+
if (isNewerYear(currentData.startYear, currentData.endYear, link.startYear, link.endYear)) {
132+
changeDetected = true;
133+
changeType = 'NEWER_YEAR';
134+
changeDetails = link;
135+
break;
136+
}
137+
138+
// Case 2: Same year but different URL (data correction)
139+
if (link.startYear === currentData.startYear && link.endYear === currentData.endYear) {
140+
if (link.href !== currentData.sourceUrl) {
141+
changeDetected = true;
142+
changeType = 'URL_CHANGED';
143+
changeDetails = link;
144+
break;
145+
}
146+
}
147+
}
148+
149+
console.log('Search Results:');
150+
console.log();
151+
152+
if (changeDetected) {
153+
if (changeType === 'NEWER_YEAR') {
154+
console.log(`✓ NEWER YEAR DETECTED!`);
155+
console.log(` Previous: ${currentData.yearRange}`);
156+
console.log(` New: ${changeDetails.yearRange}`);
157+
console.log(` Link text: "${changeDetails.text}"`);
158+
console.log(` URL: ${changeDetails.href}`);
159+
} else if (changeType === 'URL_CHANGED') {
160+
console.log(`✓ URL CHANGED (possible data correction)`);
161+
console.log(` Year range: ${currentData.yearRange} (unchanged)`);
162+
console.log(` Previous URL: ${currentData.sourceUrl}`);
163+
console.log(` New URL: ${changeDetails.href}`);
164+
console.log(` Link text: "${changeDetails.text}"`);
165+
}
166+
console.log();
167+
console.log('-'.repeat(60));
168+
console.log('Status: SUCCESS - Change detected');
169+
console.log('Action: Continuing to next steps...');
170+
console.log('='.repeat(60));
171+
return {
172+
changed: true,
173+
type: changeType,
174+
details: changeDetails
175+
};
176+
} else {
177+
console.log(`✗ NO CHANGES: Current version ${currentData.yearRange} is up to date`);
178+
console.log(` URL: ${currentData.sourceUrl}`);
179+
console.log();
180+
console.log('-'.repeat(60));
181+
console.log('Status: NO CHANGES - Already have latest version');
182+
console.log('Action: Ending workflow');
183+
console.log('='.repeat(60));
184+
return {
185+
changed: false,
186+
type: null,
187+
details: null
188+
};
189+
}
190+
}
191+
192+
async function main() {
193+
try {
194+
// Load current data from JSON file
195+
console.log(`Reading current data from ${path.basename(JSON_FILE)}...`);
196+
const currentData = loadCurrentData();
197+
console.log(`✓ Loaded: "${currentData.title}" (${currentData.yearRange})\n`);
198+
199+
// Fetch the webpage
200+
console.log('Fetching webpage...');
201+
const html = await fetchPage(TARGET_URL);
202+
console.log(`✓ Successfully fetched ${html.length} bytes\n`);
203+
204+
// Check for changes
205+
const result = checkForNewPDF(html, currentData);
206+
207+
// Exit with code 0 if changes detected (success, can continue)
208+
// Exit with code 1 if no changes (will end workflow)
209+
process.exit(result.changed ? 0 : 1);
210+
211+
} catch (error) {
212+
console.error('='.repeat(60));
213+
console.error('ERROR: Failed to check for new PDF');
214+
console.error('-'.repeat(60));
215+
console.error(error.message);
216+
console.error('='.repeat(60));
217+
process.exit(2); // Exit with error code 2 for unexpected errors
218+
}
219+
}
220+
221+
main();

swim_time_standards.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
{
2-
"title": "2024-2025 Oregon Swimming Time Standards",
2+
"title": "2024-2025 OSI Time Standards",
3+
"sourceUrl": "https://www.oregonswimming.org/wzorlsc/UserFiles/Image/QuickUpload/2024-2025-osi-time-standards-full_040221.pdf",
4+
"generatedOn": "2025-10-24",
35
"ageGroups": [
46
{
57
"age": "8 & Under",

0 commit comments

Comments
 (0)