Skip to content

Commit ac21ce5

Browse files
authored
Update integration-doc-generator.yml
1 parent b160519 commit ac21ce5

File tree

1 file changed

+182
-99
lines changed

1 file changed

+182
-99
lines changed

.github/workflows/integration-doc-generator.yml

Lines changed: 182 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ on:
1313
- 'true'
1414
- 'false'
1515

16+
# Run multiple times per day
17+
schedule:
18+
- cron: "0 */8 * * *" # Run every 8 hours (3 times per day)
19+
1620
# Event-based trigger - when changes are pushed to the integration-resources repo
1721
repository_dispatch:
1822
types: [integration_resources_updated]
@@ -26,137 +30,172 @@ jobs:
2630
uses: actions/checkout@v4
2731
with:
2832
token: ${{ secrets.REPO_ACCESS_TOKEN }} # Use a PAT with access to both repos
29-
fetch-depth: 0 # Fetch all history to check last commit
33+
34+
- name: Create exclusion list for new files
35+
run: |
36+
# Create list of SaaS integrations to exclude from new file detection
37+
cat > saas_exclusions.txt << EOF
38+
Asana_uqi_config.json
39+
AWS_Lambda_uqi_config.json
40+
Google_Docs_uqi_config.json
41+
Google_Sheets_uqi_config.json
42+
HubSpot_uqi_config.json
43+
Jira_uqi_config.json
44+
Salesforce_uqi_config.json
45+
Slack_uqi_config.json
46+
Twilio_uqi_config.json
47+
Zendesk_uqi_config.json
48+
EOF
3049
3150
- name: Ensure scripts directory exists
3251
run: |
3352
mkdir -p scripts
3453
if [ ! -f scripts/processed_files.txt ]; then
3554
touch scripts/processed_files.txt
3655
fi
37-
if [ ! -f scripts/last_commit_sha.txt ]; then
38-
touch scripts/last_commit_sha.txt
39-
fi
40-
41-
- name: Get last processed commit
42-
id: last_commit
43-
run: |
44-
if [ -s scripts/last_commit_sha.txt ]; then
45-
LAST_COMMIT=$(cat scripts/last_commit_sha.txt)
46-
echo "last_commit=$LAST_COMMIT" >> $GITHUB_ENV
47-
echo "Last processed commit: $LAST_COMMIT"
48-
else
49-
echo "No previous commit found, will process all files"
50-
echo "last_commit=" >> $GITHUB_ENV
56+
if [ ! -f scripts/file_hashes.json ]; then
57+
echo "{}" > scripts/file_hashes.json
5158
fi
5259
53-
- name: Fetch file list and metadata from integration-resources
60+
- name: Fetch file list with retry logic
61+
id: fetch_files
5462
run: |
55-
# Get the latest commit SHA from integration-resources repo
56-
LATEST_COMMIT=$(curl -s -H "Authorization: Bearer ${{ secrets.REPO_ACCESS_TOKEN }}" \
57-
-H "Accept: application/vnd.github+json" \
58-
https://api.github.com/repos/appsmithorg/integration-resources/commits/main | \
59-
jq -r '.sha' )
63+
MAX_RETRIES=5
64+
RETRY_COUNT=0
65+
SUCCESS=false
6066
61-
echo "Latest commit in integration-resources: $LATEST_COMMIT"
62-
echo "latest_commit=$LATEST_COMMIT" >> $GITHUB_ENV
67+
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ "$SUCCESS" != "true" ]; do
68+
echo "Attempt $(($RETRY_COUNT + 1)) to fetch files from integration-resources..."
69+
70+
# Add a small delay between retries
71+
if [ $RETRY_COUNT -gt 0 ]; then
72+
SLEEP_TIME=$(( RETRY_COUNT * 5 ))
73+
echo "Waiting $SLEEP_TIME seconds before retry..."
74+
sleep $SLEEP_TIME
75+
fi
76+
77+
# Fetch file list with timeout
78+
curl -s --max-time 30 -H "Authorization: Bearer ${{ secrets.REPO_ACCESS_TOKEN }}" \
79+
-H "Accept: application/vnd.github+json" \
80+
https://api.github.com/repos/appsmithorg/integration-resources/contents/Generic%20UQI%20Creation/uqi_configs \
81+
-o response.json
82+
83+
# Check if we got a valid response
84+
if jq -e 'type == "array"' response.json > /dev/null 2>&1; then
85+
echo "✅ Successfully fetched file list"
86+
SUCCESS=true
87+
else
88+
echo "❌ Failed to get valid file list. Response:"
89+
cat response.json
90+
RETRY_COUNT=$((RETRY_COUNT + 1 ))
91+
fi
92+
done
6393
64-
# Get all files in the repo
65-
curl -s -H "Authorization: Bearer ${{ secrets.REPO_ACCESS_TOKEN }}" \
66-
-H "Accept: application/vnd.github+json" \
67-
https://api.github.com/repos/appsmithorg/integration-resources/contents/Generic%20UQI%20Creation/uqi_configs \
68-
-o response.json
69-
70-
# Validate it's an array (not an error message )
71-
if ! jq 'type == "array"' response.json | grep -q true; then
72-
echo "❌ GitHub API did not return a file list. Possible error:"
73-
cat response.json
74-
exit 1
94+
if [ "$SUCCESS" != "true" ]; then
95+
echo "❌ Failed to fetch file list after $MAX_RETRIES attempts"
96+
echo "files_found=false" >> $GITHUB_ENV
97+
exit 0 # Exit gracefully to allow workflow to continue
7598
fi
76-
77-
# Extract file names
99+
100+
# Extract file names and their SHA hashes
101+
jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
78102
jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
103+
104+
echo "files_found=true" >> $GITHUB_ENV
79105
80-
- name: Identify changed files since last commit
106+
- name: Identify new and modified files
81107
id: detect_changes
108+
if: env.files_found == 'true'
82109
run: |
110+
# Load previous file hashes
111+
PREV_HASHES=$(cat scripts/file_hashes.json)
112+
83113
# Force check all files if requested
84114
if [ "${{ github.event.inputs.force_check_all }}" == "true" ]; then
85115
echo "🔄 Force checking all files as requested"
86116
cat latest_files.txt > files_to_process.txt
87-
echo "files_found=true" >> $GITHUB_ENV
88-
elif [ -z "${{ env.last_commit }}" ]; then
89-
# No previous commit, process all files
90-
cat latest_files.txt > files_to_process.txt
91-
echo "files_found=true" >> $GITHUB_ENV
92117
else
93-
# Get files changed since last commit
94-
echo "🔍 Finding files changed since commit ${{ env.last_commit }}"
95-
96-
# Get list of files changed between commits
97-
CHANGED_FILES=$(curl -s -H "Authorization: Bearer ${{ secrets.REPO_ACCESS_TOKEN }}" \
98-
-H "Accept: application/vnd.github+json" \
99-
"https://api.github.com/repos/appsmithorg/integration-resources/compare/${{ env.last_commit }}...${{ env.latest_commit }}" | \
100-
jq -r '.files[] | select(.filename | startswith("Generic UQI Creation/uqi_configs/" )) | .filename | split("/") | last' | \
101-
grep -v "^$")
102-
103-
if [ -n "$CHANGED_FILES" ]; then
104-
echo "$CHANGED_FILES" > files_to_process.txt
105-
echo "🆕 Found files changed since last commit:"
106-
cat files_to_process.txt
107-
echo "files_found=true" >> $GITHUB_ENV
108-
else
109-
echo "✅ No files changed since last commit."
110-
echo "files_found=false" >> $GITHUB_ENV
111-
fi
112-
fi
113-
114-
# Check for existing documentation
115-
if [ "${{ env.files_found }}" == "true" ]; then
116-
echo "🔍 Checking for existing documentation..."
117-
FILTERED_FILES=""
118-
while IFS= read -r FILE_NAME; do
119-
INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]')
120-
DOC_PATH="website/docs/connect-data/reference/${INTEGRATION}.md"
121-
122-
if [ -f "$DOC_PATH" ]; then
123-
echo "🔄 Documentation exists for $INTEGRATION, will update"
118+
# Find new files (not in processed_files.txt) - excluding SaaS integrations
119+
NEW_FILES=$(comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) | grep -v -f saas_exclusions.txt || true)
120+
121+
# Check for modified files (SHA changed) - including SaaS integrations
122+
MODIFIED_FILES=""
123+
while IFS=$'\t' read -r FILE_NAME FILE_SHA; do
124+
PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
125+
if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ] && grep -q "^$FILE_NAME$" scripts/processed_files.txt; then
126+
echo "🔄 File modified: $FILE_NAME (SHA changed)"
127+
MODIFIED_FILES="$MODIFIED_FILES$FILE_NAME"$'\n'
124128
fi
125-
126-
FILTERED_FILES="${FILTERED_FILES}${FILE_NAME}"$'\n'
127-
done < files_to_process.txt
129+
done < latest_files_with_sha.txt
128130
129-
echo "$FILTERED_FILES" | grep -v "^$" > files_to_process.txt
131+
# Combine new and modified files
132+
{ echo "$NEW_FILES"; echo "$MODIFIED_FILES"; } | grep -v "^$" > files_to_process.txt
133+
fi
134+
135+
# Check if we have files to process
136+
if [ -s files_to_process.txt ]; then
137+
echo "🆕 Found files to process:"
138+
cat files_to_process.txt
139+
echo "changes_found=true" >> $GITHUB_ENV
140+
else
141+
echo "✅ No new or modified files to process."
142+
echo "changes_found=false" >> $GITHUB_ENV
130143
fi
131144
132145
# Count files to process
133146
FILE_COUNT=$(wc -l < files_to_process.txt || echo "0")
134147
echo "file_count=$FILE_COUNT" >> $GITHUB_ENV
135148
136149
- name: Exit if no files to process
137-
if: env.files_found != 'true' || env.file_count == '0'
150+
if: env.files_found != 'true' || env.changes_found != 'true'
138151
run: |
139-
echo "No files to process. Exiting."
152+
echo "No new integration updates found. Workflow completed successfully."
140153
exit 0
141154
142155
- name: Process files
156+
if: env.changes_found == 'true'
143157
run: |
144158
# Create a directory for generated docs
145159
mkdir -p generated_docs
146160
161+
# Update file hashes JSON for tracking changes
162+
HASHES_JSON=$(cat scripts/file_hashes.json)
163+
147164
# Process each file
148165
while IFS= read -r FILE_NAME; do
149166
echo "⏳ Processing: $FILE_NAME"
150167
151-
# Download the file
152-
FILE_URL="https://raw.githubusercontent.com/appsmithorg/integration-resources/main/Generic%20UQI%20Creation/uqi_configs/$FILE_NAME"
153-
curl -sSL "$FILE_URL" -o "input_file.json"
168+
# Download the file with retry logic
169+
MAX_RETRIES=3
170+
RETRY_COUNT=0
171+
DOWNLOAD_SUCCESS=false
172+
173+
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ "$DOWNLOAD_SUCCESS" != "true" ]; do
174+
FILE_URL="https://raw.githubusercontent.com/appsmithorg/integration-resources/main/Generic%20UQI%20Creation/uqi_configs/$FILE_NAME"
175+
176+
if curl -sSL --max-time 30 "$FILE_URL" -o "input_file.json" && [ -s "input_file.json" ]; then
177+
DOWNLOAD_SUCCESS=true
178+
else
179+
echo "⚠️ Failed to download $FILE_NAME, attempt $(($RETRY_COUNT + 1 ))"
180+
RETRY_COUNT=$((RETRY_COUNT + 1))
181+
sleep 3
182+
fi
183+
done
184+
185+
if [ "$DOWNLOAD_SUCCESS" != "true" ]; then
186+
echo "❌ Failed to download $FILE_NAME after $MAX_RETRIES attempts, skipping"
187+
continue
188+
fi
154189
155-
# Process with OpenAI API (using completion API, not chat )
190+
# Update hash in our tracking JSON
191+
FILE_SHA=$(grep "$FILE_NAME" latest_files_with_sha.txt | cut -f2)
192+
HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha')
193+
194+
# Process with OpenAI API (using completion API, not chat)
156195
echo "🧠 Extracting information with OpenAI API..."
157196
158197
# Extract information using OpenAI API
159-
SYSTEM_PROMPT=$(cat .github/prompts/extract_prompt.txt || echo "Extract the key information from this integration configuration file.")
198+
SYSTEM_PROMPT=$(cat .github/prompts/extract_prompt.txt 2>/dev/null || echo "Extract the key information from this integration configuration file.")
160199
USER_CONTENT=$(cat input_file.json)
161200
162201
# Use OpenAI Completion API (not Chat API)
@@ -167,15 +206,37 @@ jobs:
167206
temperature: 0
168207
}')
169208
170-
curl -s https://api.openai.com/v1/completions \
171-
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
172-
-H "Content-Type: application/json" \
173-
-d "$PAYLOAD" | jq -r '.choices[0].text' > "extracted_info.md"
209+
# Call OpenAI API with retry logic
210+
MAX_RETRIES=3
211+
RETRY_COUNT=0
212+
API_SUCCESS=false
213+
214+
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ "$API_SUCCESS" != "true" ]; do
215+
RESPONSE=$(curl -s --max-time 60 https://api.openai.com/v1/completions \
216+
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
217+
-H "Content-Type: application/json" \
218+
-d "$PAYLOAD" )
219+
220+
if echo "$RESPONSE" | jq -e '.choices[0].text' > /dev/null 2>&1; then
221+
echo "$RESPONSE" | jq -r '.choices[0].text' > "extracted_info.md"
222+
API_SUCCESS=true
223+
else
224+
echo "⚠️ OpenAI API error, attempt $(($RETRY_COUNT + 1))"
225+
echo "$RESPONSE"
226+
RETRY_COUNT=$((RETRY_COUNT + 1))
227+
sleep 5
228+
fi
229+
done
230+
231+
if [ "$API_SUCCESS" != "true" ]; then
232+
echo "❌ Failed to extract information after $MAX_RETRIES attempts, skipping"
233+
continue
234+
fi
174235
175236
# Generate documentation
176237
echo "📝 Generating documentation..."
177238
178-
SYSTEM_PROMPT=$(cat .github/prompts/generate_prompt.txt || echo "Generate comprehensive markdown documentation based on the extracted information." )
239+
SYSTEM_PROMPT=$(cat .github/prompts/generate_prompt.txt 2>/dev/null || echo "Generate comprehensive markdown documentation based on the extracted information.")
179240
EXTRACTED_CONTENT=$(cat extracted_info.md)
180241
181242
# Use OpenAI Completion API again
@@ -186,13 +247,35 @@ jobs:
186247
temperature: 0.3
187248
}')
188249
189-
curl -s https://api.openai.com/v1/completions \
190-
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
191-
-H "Content-Type: application/json" \
192-
-d "$PAYLOAD" | jq -r '.choices[0].text' > "generated_doc.md"
250+
# Call OpenAI API with retry logic
251+
MAX_RETRIES=3
252+
RETRY_COUNT=0
253+
API_SUCCESS=false
254+
255+
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ "$API_SUCCESS" != "true" ]; do
256+
RESPONSE=$(curl -s --max-time 60 https://api.openai.com/v1/completions \
257+
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
258+
-H "Content-Type: application/json" \
259+
-d "$PAYLOAD" )
260+
261+
if echo "$RESPONSE" | jq -e '.choices[0].text' > /dev/null 2>&1; then
262+
echo "$RESPONSE" | jq -r '.choices[0].text' > "generated_doc.md"
263+
API_SUCCESS=true
264+
else
265+
echo "⚠️ OpenAI API error, attempt $(($RETRY_COUNT + 1))"
266+
echo "$RESPONSE"
267+
RETRY_COUNT=$((RETRY_COUNT + 1))
268+
sleep 5
269+
fi
270+
done
271+
272+
if [ "$API_SUCCESS" != "true" ]; then
273+
echo "❌ Failed to generate documentation after $MAX_RETRIES attempts, skipping"
274+
continue
275+
fi
193276
194277
# Prepare final path
195-
INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]' )
278+
INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]')
196279
FINAL_PATH="website/docs/connect-data/reference/${INTEGRATION}.md"
197280
mkdir -p "$(dirname "$FINAL_PATH")"
198281
@@ -210,11 +293,11 @@ jobs:
210293
echo "✅ Completed processing: $FILE_NAME"
211294
done < files_to_process.txt
212295
213-
# Save the latest commit SHA for next run
214-
echo "${{ env.latest_commit }}" > scripts/last_commit_sha.txt
296+
# Save updated hashes
297+
echo "$HASHES_JSON" > scripts/file_hashes.json
215298
216299
- name: Commit and open PR
217-
if: env.files_found == 'true' && env.file_count != '0'
300+
if: env.changes_found == 'true'
218301
uses: peter-evans/create-pull-request@v5
219302
with:
220303
token: ${{ secrets.REPO_ACCESS_TOKEN }}
@@ -225,8 +308,8 @@ jobs:
225308
add-paths: |
226309
website/docs/connect-data/reference/
227310
scripts/processed_files.txt
228-
scripts/last_commit_sha.txt
311+
scripts/file_hashes.json
229312
body: |
230313
This PR adds or updates integration reference documentation for **${{ env.file_count }}** integrations.
231314
232-
Generated from files changed since commit ${{ env.last_commit || 'initial' }} in the [integration-resources repository](https://github.com/appsmithorg/integration-resources/tree/main/Generic%20UQI%20Creation/uqi_configs ).
315+
Generated from the latest configuration files in the [integration-resources repository](https://github.com/appsmithorg/integration-resources/tree/main/Generic%20UQI%20Creation/uqi_configs ).

0 commit comments

Comments
 (0)