Skip to content

Commit e754dfb

Browse files
authored
Update test-doc-generator.yml
1 parent f95d431 commit e754dfb

File tree

1 file changed

+174
-41
lines changed

1 file changed

+174
-41
lines changed

.github/workflows/test-doc-generator.yml

Lines changed: 174 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,76 +2,164 @@ name: Test Doc Generator
22

33
on:
44
workflow_dispatch:
5+
inputs:
6+
target_branch:
7+
description: 'The branch in appsmith-docs to checkout and create PR against'
8+
required: true
9+
default: 'docs-staging' # Default to docs-staging
10+
type: string
511

612
jobs:
713
generate_docs:
814
runs-on: ubuntu-latest
915

1016
steps:
11-
- name: Checkout appsmith-docs
17+
- name: Checkout appsmith-docs target branch
1218
uses: actions/checkout@v4
1319
with:
1420
token: ${{ secrets.test_REPO_ACCESS_TOKEN }}
21+
ref: ${{ github.event.inputs.target_branch }} # Checkout the specified branch
22+
# Fetch depth 0 to get all history needed for base branch detection by create-pull-request
23+
fetch-depth: 0
1524

16-
- name: Create exclusion list
17-
run: echo > saas_exclusions.txt
25+
# No need for exclusion list step if not used
26+
# - name: Create exclusion list
27+
# run: echo > saas_exclusions.txt
1828

19-
- name: Ensure scripts directory exists
29+
- name: Ensure scripts directory and tracking files exist
2030
run: |
2131
mkdir -p scripts
32+
# Initialize tracking files if they don't exist in the checked-out branch
2233
[ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
2334
[ -f scripts/file_hashes.json ] || echo "{}" > scripts/file_hashes.json
2435
2536
- name: Fetch file list from test repo
2637
id: fetch_files
2738
run: |
28-
curl -s --max-time 30 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
39+
echo "Fetching files from source repo..."
40+
curl -s --max-time 60 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
2941
-H "Accept: application/vnd.github+json" \
3042
https://api.github.com/repos/harshilp24/integration-resources-test/contents/Generic%20UQI%20Creation/uqi_configs \
3143
-o response.json
3244
33-
jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
34-
jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
45+
if ! jq -e '.' response.json > /dev/null; then
46+
echo "Error: Invalid JSON received from GitHub API."
47+
cat response.json # Print response for debugging
48+
exit 1
49+
fi
3550
36-
echo "files_found=true" >> $GITHUB_ENV
51+
# Check if the response is an array (list of files) or an object (error message)
52+
if jq -e 'type == "array"' response.json > /dev/null; then
53+
jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
54+
jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
55+
echo "files_found=true" >> $GITHUB_ENV
56+
echo "Files list fetched successfully."
57+
else
58+
echo "Warning: Received non-array response from GitHub API (maybe empty dir or error?):"
59+
cat response.json
60+
# Create empty files to avoid errors downstream if dir is empty
61+
touch latest_files_with_sha.txt
62+
touch latest_files.txt
63+
echo "files_found=false" >> $GITHUB_ENV # Indicate no files found
64+
fi
3765
3866
- name: Identify new and modified files
3967
id: detect_changes
68+
# Only run if files were actually found in the source repo
69+
if: env.files_found == 'true'
4070
run: |
71+
echo "Identifying changes against branch: ${{ github.event.inputs.target_branch }}"
72+
# Read tracking files FROM THE CHECKED-OUT BRANCH
4173
PREV_HASHES=$(cat scripts/file_hashes.json)
42-
NEW_FILES=$(comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) || true)
43-
MODIFIED_FILES=""
44-
while IFS=$'\t' read -r FILE_NAME FILE_SHA; do
45-
PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
46-
if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ] && grep -q "^$FILE_NAME$" scripts/processed_files.txt; then
47-
MODIFIED_FILES="$MODIFIED_FILES$FILE_NAME"$'\n'
74+
# Ensure processed_files.txt exists before sorting
75+
[ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
76+
77+
# Find files present in latest_files.txt but not in processed_files.txt
78+
comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) > new_files.tmp || true
79+
echo "--- New Files ---"
80+
cat new_files.tmp
81+
echo "-----------------"
82+
83+
MODIFIED_FILES_LIST="modified_files.tmp"
84+
touch $MODIFIED_FILES_LIST
85+
echo "--- Checking for Modifications ---" >&2 # Debug output to stderr
86+
while IFS=$ '\t' read -r FILE_NAME FILE_SHA; do
87+
# Check if the file is listed in processed_files.txt (meaning it's not new)
88+
if grep -q -x -F "$FILE_NAME" scripts/processed_files.txt; then
89+
PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
90+
echo "Checking: $FILE_NAME, Current SHA: $FILE_SHA, Previous SHA: $PREV_SHA" >&2
91+
if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ]; then
92+
echo "$FILE_NAME" >> $MODIFIED_FILES_LIST
93+
echo " -> Marked as modified." >&2
94+
fi
4895
fi
4996
done < latest_files_with_sha.txt
50-
{ echo "$NEW_FILES"; echo "$MODIFIED_FILES"; } | grep -v "^$" > files_to_process.txt
97+
echo "--- Modified Files ---"
98+
cat $MODIFIED_FILES_LIST
99+
echo "----------------------"
100+
101+
# Combine new and modified files, ensuring uniqueness and removing empty lines
102+
cat new_files.tmp $MODIFIED_FILES_LIST | sort | uniq | grep -v '^$' > files_to_process.txt || true
103+
104+
echo "--- Files to Process ---"
105+
cat files_to_process.txt
106+
echo "------------------------"
107+
51108
if [ -s files_to_process.txt ]; then
52109
echo "changes_found=true" >> $GITHUB_ENV
110+
echo "Changes detected."
53111
else
54112
echo "changes_found=false" >> $GITHUB_ENV
113+
echo "No new or modified files detected."
55114
fi
115+
# Clean up temporary files
116+
rm -f new_files.tmp modified_files.tmp
56117
57118
- name: Exit if no files to process
58119
if: env.changes_found != 'true'
59-
run: exit 0
120+
run: |
121+
echo "No changes detected in source files relative to branch '${{ github.event.inputs.target_branch }}'. Exiting."
122+
exit 0
60123
61124
- name: Process files with OpenAI
125+
# This step now correctly reads the initial hashes from the checked-out branch
126+
# and updates the local files, which are then committed in the next step.
127+
if: env.changes_found == 'true'
62128
run: |
63129
mkdir -p generated_docs
130+
# Read initial hashes from the checked-out branch state
64131
HASHES_JSON=$(cat scripts/file_hashes.json)
65132
PROCESSED_COUNT=0
66133
67134
while IFS= read -r FILE_NAME; do
135+
# Ensure FILE_NAME is not empty
136+
if [ -z "$FILE_NAME" ]; then
137+
continue
138+
fi
139+
68140
echo "⏳ Processing $FILE_NAME"
69-
FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$FILE_NAME"
70-
curl -sSL --max-time 30 "$FILE_URL" -o input_file.json
141+
# URL encode the filename for the URL
142+
ENCODED_FILE_NAME=$(printf '%s' "$FILE_NAME" | jq -sRr @uri)
143+
FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$ENCODED_FILE_NAME"
144+
echo "Fetching content from: $FILE_URL"
145+
curl -fsSL --max-time 60 "$FILE_URL" -o input_file.json
146+
if [ $? -ne 0 ]; then
147+
echo "Error: Failed to download $FILE_NAME from $FILE_URL" >&2
148+
continue # Skip this file if download fails
149+
fi
71150
72-
FILE_SHA=$(grep "$FILE_NAME" latest_files_with_sha.txt | cut -f2)
73-
HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha')
151+
# Find the SHA for the current file from the fetched list
152+
FILE_SHA_LINE=$(grep -F "$FILE_NAME"$ '\t' latest_files_with_sha.txt)
153+
if [ -z "$FILE_SHA_LINE" ]; then
154+
echo "Warning: Could not find SHA for $FILE_NAME in latest_files_with_sha.txt. Skipping hash update." >&2
155+
else
156+
FILE_SHA=$(echo "$FILE_SHA_LINE" | cut -f2)
157+
echo "Updating hash for $FILE_NAME to $FILE_SHA"
158+
# Update the hash in our JSON object
159+
HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha')
160+
fi
74161
162+
# --- OpenAI Processing Start ---
75163
# Prompt 1: Extract Info
76164
SYSTEM_PROMPT=$(cat .github/prompts/extract_prompt.txt || echo "Extract important integration details.")
77165
USER_CONTENT=$(cat input_file.json)
@@ -89,14 +177,18 @@ jobs:
89177
temperature: 0
90178
}')
91179
92-
RESPONSE=$(curl -s https://api.openai.com/v1/chat/completions \
180+
RESPONSE1=$(curl -s https://api.openai.com/v1/chat/completions \
93181
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
94182
-H "Content-Type: application/json" \
95183
-d "$PAYLOAD")
96184
97-
echo "$RESPONSE" | jq '.'
98-
99-
echo "$RESPONSE" | jq -r '.choices[0].message.content' > extracted_info.md
185+
# Check for API errors
186+
if echo "$RESPONSE1" | jq -e '.error' > /dev/null; then
187+
echo "Error during OpenAI Prompt 1 for $FILE_NAME:" >&2
188+
echo "$RESPONSE1" | jq '.' >&2
189+
continue # Skip this file
190+
fi
191+
echo "$RESPONSE1" | jq -r '.choices[0].message.content' > extracted_info.md
100192
101193
# Prompt 2: Generate Markdown
102194
SYSTEM_PROMPT=$(cat .github/prompts/generate_prompt.txt || echo "Generate reference documentation in markdown.")
@@ -115,44 +207,85 @@ jobs:
115207
temperature: 0.3
116208
}')
117209
118-
RESPONSE=$(curl -s https://api.openai.com/v1/chat/completions \
210+
RESPONSE2=$(curl -s https://api.openai.com/v1/chat/completions \
119211
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
120212
-H "Content-Type: application/json" \
121213
-d "$PAYLOAD")
122214
123-
echo "$RESPONSE" | jq '.'
124-
125-
echo "$RESPONSE" | jq -r '.choices[0].message.content' > generated_doc.md
215+
# Check for API errors
216+
if echo "$RESPONSE2" | jq -e '.error' > /dev/null; then
217+
echo "Error during OpenAI Prompt 2 for $FILE_NAME:" >&2
218+
echo "$RESPONSE2" | jq '.' >&2
219+
continue # Skip this file
220+
fi
221+
echo "$RESPONSE2" | jq -r '.choices[0].message.content' > generated_doc.md
222+
# --- OpenAI Processing End ---
126223
224+
# Determine output path
127225
INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]')
128226
FINAL_PATH="website/docs/connect-data/reference/${INTEGRATION}.md"
129227
130228
mkdir -p "$(dirname "$FINAL_PATH")"
131229
cp generated_doc.md "$FINAL_PATH"
132-
cp generated_doc.md "generated_docs/${INTEGRATION}.md"
230+
# Optional: Keep a copy in a separate dir if needed for artifacts
231+
# cp generated_doc.md "generated_docs/${INTEGRATION}.md"
133232
134-
echo "$FILE_NAME" >> scripts/processed_files.txt
233+
# Add the successfully processed file to the list for this run
234+
echo "$FILE_NAME" >> processed_files_this_run.txt
135235
PROCESSED_COUNT=$((PROCESSED_COUNT + 1))
136-
echo "✅ Finished $FILE_NAME"
236+
echo "✅ Finished processing $FILE_NAME"
237+
137238
done < files_to_process.txt
138239
139-
echo "$HASHES_JSON" > scripts/file_hashes.json
240+
# Update the main tracking files with the results of this run
241+
# Append newly processed files to the persistent list
242+
if [ -f processed_files_this_run.txt ]; then
243+
cat processed_files_this_run.txt >> scripts/processed_files.txt
244+
# Ensure uniqueness and sort the persistent list
245+
sort -u scripts/processed_files.txt -o scripts/processed_files.txt
246+
rm processed_files_this_run.txt
247+
fi
248+
# Overwrite the persistent hash file with the updated JSON
249+
echo "$HASHES_JSON" | jq '.' > scripts/file_hashes.json
250+
140251
echo "processed_count=$PROCESSED_COUNT" >> $GITHUB_ENV
141-
echo "content_generated=true" >> $GITHUB_ENV
252+
if [ "$PROCESSED_COUNT" -gt 0 ]; then
253+
echo "content_generated=true" >> $GITHUB_ENV
254+
else
255+
echo "content_generated=false" >> $GITHUB_ENV
256+
fi
257+
# Clean up intermediate files
258+
rm -f input_file.json extracted_info.md generated_doc.md
142259
143-
- name: Commit and open PR
260+
- name: Commit and open PR against target branch
261+
# Only run if content was actually generated in the previous step
144262
if: env.content_generated == 'true'
145-
uses: peter-evans/create-pull-request@v5
263+
uses: peter-evans/create-pull-request@v6 # Use v6 for latest features/fixes
146264
with:
147265
token: ${{ secrets.test_REPO_ACCESS_TOKEN }}
148-
title: "test: generate integration docs from test repo"
149-
commit-message: "test: generated docs from harshilp24/integration-resources-test"
150-
branch: "test/docs-update-${{ github.run_id }}"
151-
base: main
266+
# Make title and commit message specific to the target branch
267+
title: "docs: update integration docs for ${{ github.event.inputs.target_branch }}"
268+
commit-message: "docs: automated generation for ${{ github.event.inputs.target_branch }}\n\nProcessed files based on changes in harshilp24/integration-resources-test."
269+
# Create a branch name that includes the target branch for clarity
270+
branch: "docs-update/${{ github.event.inputs.target_branch }}-${{ github.run_id }}"
271+
# Set the base branch for the PR to the target branch
272+
base: ${{ github.event.inputs.target_branch }}
273+
# Add the generated docs and the UPDATED tracking files
152274
add-paths: |
153275
website/docs/connect-data/reference/
154276
scripts/processed_files.txt
155277
scripts/file_hashes.json
278+
# Update PR body
156279
body: |
157-
✅ Test PR: Generated integration documentation from your test repo.
158-
Source: [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)
280+
✅ Automated PR: Generated/updated integration documentation based on changes in the source repository.
281+
282+
**Target Branch:** `${{ github.event.inputs.target_branch }}`
283+
**Source Repo:** [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)
284+
285+
This PR includes:
286+
- Updated markdown files in `website/docs/connect-data/reference/`
287+
- Updated tracking files in `scripts/` to reflect the processed state for this branch.
288+
# Optional: Add labels, assignees etc.
289+
# labels: automated-pr, documentation
290+
# assignees: your-github-username
291+

0 commit comments

Comments
 (0)