@@ -2,76 +2,164 @@ name: Test Doc Generator
22
33on :
44 workflow_dispatch :
5+ inputs :
6+ target_branch :
7+ description : ' The branch in appsmith-docs to checkout and create PR against'
8+ required : true
9+ default : ' docs-staging' # Default to docs-staging
10+ type : string
511
612jobs :
713 generate_docs :
814 runs-on : ubuntu-latest
915
1016 steps :
11- - name : Checkout appsmith-docs
17+ - name : Checkout appsmith-docs target branch
1218 uses : actions/checkout@v4
1319 with :
1420 token : ${{ secrets.test_REPO_ACCESS_TOKEN }}
21+ ref : ${{ github.event.inputs.target_branch }} # Checkout the specified branch
22+ # Fetch depth 0 to get all history needed for base branch detection by create-pull-request
23+ fetch-depth : 0
1524
16- - name : Create exclusion list
17- run : echo > saas_exclusions.txt
25+ # No need for exclusion list step if not used
26+ # - name: Create exclusion list
27+ # run: echo > saas_exclusions.txt
1828
19- - name : Ensure scripts directory exists
29+ - name : Ensure scripts directory and tracking files exist
2030 run : |
2131 mkdir -p scripts
32+ # Initialize tracking files if they don't exist in the checked-out branch
2233 [ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
2334 [ -f scripts/file_hashes.json ] || echo "{}" > scripts/file_hashes.json
2435
2536 - name : Fetch file list from test repo
2637 id : fetch_files
2738 run : |
28- curl -s --max-time 30 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
39+ echo "Fetching files from source repo..."
40+ curl -s --max-time 60 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
2941 -H "Accept: application/vnd.github+json" \
3042 https://api.github.com/repos/harshilp24/integration-resources-test/contents/Generic%20UQI%20Creation/uqi_configs \
3143 -o response.json
3244
33- jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
34- jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
45+ if ! jq -e '.' response.json > /dev/null; then
46+ echo "Error: Invalid JSON received from GitHub API."
47+ cat response.json # Print response for debugging
48+ exit 1
49+ fi
3550
36- echo "files_found=true" >> $GITHUB_ENV
51+ # Check if the response is an array (list of files) or an object (error message)
52+ if jq -e 'type == "array"' response.json > /dev/null; then
53+ jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
54+ jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
55+ echo "files_found=true" >> $GITHUB_ENV
56+ echo "Files list fetched successfully."
57+ else
58+ echo "Warning: Received non-array response from GitHub API (maybe empty dir or error?):"
59+ cat response.json
60+ # Create empty files to avoid errors downstream if dir is empty
61+ touch latest_files_with_sha.txt
62+ touch latest_files.txt
63+ echo "files_found=false" >> $GITHUB_ENV # Indicate no files found
64+ fi
3765
3866 - name : Identify new and modified files
3967 id : detect_changes
68+ # Only run if files were actually found in the source repo
69+ if : env.files_found == 'true'
4070 run : |
71+ echo "Identifying changes against branch: ${{ github.event.inputs.target_branch }}"
72+ # Read tracking files FROM THE CHECKED-OUT BRANCH
4173 PREV_HASHES=$(cat scripts/file_hashes.json)
42- NEW_FILES=$(comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) || true)
43- MODIFIED_FILES=""
44- while IFS=$'\t' read -r FILE_NAME FILE_SHA; do
45- PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
46- if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ] && grep -q "^$FILE_NAME$" scripts/processed_files.txt; then
47- MODIFIED_FILES="$MODIFIED_FILES$FILE_NAME"$'\n'
74+ # Ensure processed_files.txt exists before sorting
75+ [ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
76+
77+ # Find files present in latest_files.txt but not in processed_files.txt
78+ comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) > new_files.tmp || true
79+ echo "--- New Files ---"
80+ cat new_files.tmp
81+ echo "-----------------"
82+
83+ MODIFIED_FILES_LIST="modified_files.tmp"
84+ touch $MODIFIED_FILES_LIST
85+ echo "--- Checking for Modifications ---" >&2 # Debug output to stderr
86+ while IFS=$ '\t' read -r FILE_NAME FILE_SHA; do
87+ # Check if the file is listed in processed_files.txt (meaning it's not new)
88+ if grep -q -x -F "$FILE_NAME" scripts/processed_files.txt; then
89+ PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
90+ echo "Checking: $FILE_NAME, Current SHA: $FILE_SHA, Previous SHA: $PREV_SHA" >&2
91+ if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ]; then
92+ echo "$FILE_NAME" >> $MODIFIED_FILES_LIST
93+ echo " -> Marked as modified." >&2
94+ fi
4895 fi
4996 done < latest_files_with_sha.txt
50- { echo "$NEW_FILES"; echo "$MODIFIED_FILES"; } | grep -v "^$" > files_to_process.txt
97+ echo "--- Modified Files ---"
98+ cat $MODIFIED_FILES_LIST
99+ echo "----------------------"
100+
101+ # Combine new and modified files, ensuring uniqueness and removing empty lines
102+ cat new_files.tmp $MODIFIED_FILES_LIST | sort | uniq | grep -v '^$' > files_to_process.txt || true
103+
104+ echo "--- Files to Process ---"
105+ cat files_to_process.txt
106+ echo "------------------------"
107+
51108 if [ -s files_to_process.txt ]; then
52109 echo "changes_found=true" >> $GITHUB_ENV
110+ echo "Changes detected."
53111 else
54112 echo "changes_found=false" >> $GITHUB_ENV
113+ echo "No new or modified files detected."
55114 fi
115+ # Clean up temporary files
116+ rm -f new_files.tmp modified_files.tmp
56117
57118 - name : Exit if no files to process
58119 if : env.changes_found != 'true'
59- run : exit 0
120+ run : |
121+ echo "No changes detected in source files relative to branch '${{ github.event.inputs.target_branch }}'. Exiting."
122+ exit 0
60123
61124 - name : Process files with OpenAI
125+ # This step now correctly reads the initial hashes from the checked-out branch
126+ # and updates the local files, which are then committed in the next step.
127+ if : env.changes_found == 'true'
62128 run : |
63129 mkdir -p generated_docs
130+ # Read initial hashes from the checked-out branch state
64131 HASHES_JSON=$(cat scripts/file_hashes.json)
65132 PROCESSED_COUNT=0
66133
67134 while IFS= read -r FILE_NAME; do
135+ # Ensure FILE_NAME is not empty
136+ if [ -z "$FILE_NAME" ]; then
137+ continue
138+ fi
139+
68140 echo "⏳ Processing $FILE_NAME"
69- FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$FILE_NAME"
70- curl -sSL --max-time 30 "$FILE_URL" -o input_file.json
141+ # URL encode the filename for the URL
142+ ENCODED_FILE_NAME=$(printf '%s' "$FILE_NAME" | jq -sRr @uri)
143+ FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$ENCODED_FILE_NAME"
144+ echo "Fetching content from: $FILE_URL"
145+ curl -fsSL --max-time 60 "$FILE_URL" -o input_file.json
146+ if [ $? -ne 0 ]; then
147+ echo "Error: Failed to download $FILE_NAME from $FILE_URL" >&2
148+ continue # Skip this file if download fails
149+ fi
71150
72- FILE_SHA=$(grep "$FILE_NAME" latest_files_with_sha.txt | cut -f2)
73- HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha')
151+ # Find the SHA for the current file from the fetched list
152+ FILE_SHA_LINE=$(grep -F "$FILE_NAME"$ '\t' latest_files_with_sha.txt)
153+ if [ -z "$FILE_SHA_LINE" ]; then
154+ echo "Warning: Could not find SHA for $FILE_NAME in latest_files_with_sha.txt. Skipping hash update." >&2
155+ else
156+ FILE_SHA=$(echo "$FILE_SHA_LINE" | cut -f2)
157+ echo "Updating hash for $FILE_NAME to $FILE_SHA"
158+ # Update the hash in our JSON object
159+ HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha')
160+ fi
74161
162+ # --- OpenAI Processing Start ---
75163 # Prompt 1: Extract Info
76164 SYSTEM_PROMPT=$(cat .github/prompts/extract_prompt.txt || echo "Extract important integration details.")
77165 USER_CONTENT=$(cat input_file.json)
@@ -89,14 +177,18 @@ jobs:
89177 temperature: 0
90178 }')
91179
92- RESPONSE =$(curl -s https://api.openai.com/v1/chat/completions \
180+ RESPONSE1 =$(curl -s https://api.openai.com/v1/chat/completions \
93181 -H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
94182 -H "Content-Type: application/json" \
95183 -d "$PAYLOAD")
96184
97- echo "$RESPONSE" | jq '.'
98-
99- echo "$RESPONSE" | jq -r '.choices[0].message.content' > extracted_info.md
185+ # Check for API errors
186+ if echo "$RESPONSE1" | jq -e '.error' > /dev/null; then
187+ echo "Error during OpenAI Prompt 1 for $FILE_NAME:" >&2
188+ echo "$RESPONSE1" | jq '.' >&2
189+ continue # Skip this file
190+ fi
191+ echo "$RESPONSE1" | jq -r '.choices[0].message.content' > extracted_info.md
100192
101193 # Prompt 2: Generate Markdown
102194 SYSTEM_PROMPT=$(cat .github/prompts/generate_prompt.txt || echo "Generate reference documentation in markdown.")
@@ -115,44 +207,85 @@ jobs:
115207 temperature: 0.3
116208 }')
117209
118- RESPONSE =$(curl -s https://api.openai.com/v1/chat/completions \
210+ RESPONSE2 =$(curl -s https://api.openai.com/v1/chat/completions \
119211 -H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
120212 -H "Content-Type: application/json" \
121213 -d "$PAYLOAD")
122214
123- echo "$RESPONSE" | jq '.'
124-
125- echo "$RESPONSE" | jq -r '.choices[0].message.content' > generated_doc.md
215+ # Check for API errors
216+ if echo "$RESPONSE2" | jq -e '.error' > /dev/null; then
217+ echo "Error during OpenAI Prompt 2 for $FILE_NAME:" >&2
218+ echo "$RESPONSE2" | jq '.' >&2
219+ continue # Skip this file
220+ fi
221+ echo "$RESPONSE2" | jq -r '.choices[0].message.content' > generated_doc.md
222+ # --- OpenAI Processing End ---
126223
224+ # Determine output path
127225 INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]')
128226 FINAL_PATH="website/docs/connect-data/reference/${INTEGRATION}.md"
129227
130228 mkdir -p "$(dirname "$FINAL_PATH")"
131229 cp generated_doc.md "$FINAL_PATH"
132- cp generated_doc.md "generated_docs/${INTEGRATION}.md"
230+ # Optional: Keep a copy in a separate dir if needed for artifacts
231+ # cp generated_doc.md "generated_docs/${INTEGRATION}.md"
133232
134- echo "$FILE_NAME" >> scripts/processed_files.txt
233+ # Add the successfully processed file to the list for this run
234+ echo "$FILE_NAME" >> processed_files_this_run.txt
135235 PROCESSED_COUNT=$((PROCESSED_COUNT + 1))
136- echo "✅ Finished $FILE_NAME"
236+ echo "✅ Finished processing $FILE_NAME"
237+
137238 done < files_to_process.txt
138239
139- echo "$HASHES_JSON" > scripts/file_hashes.json
240+ # Update the main tracking files with the results of this run
241+ # Append newly processed files to the persistent list
242+ if [ -f processed_files_this_run.txt ]; then
243+ cat processed_files_this_run.txt >> scripts/processed_files.txt
244+ # Ensure uniqueness and sort the persistent list
245+ sort -u scripts/processed_files.txt -o scripts/processed_files.txt
246+ rm processed_files_this_run.txt
247+ fi
248+ # Overwrite the persistent hash file with the updated JSON
249+ echo "$HASHES_JSON" | jq '.' > scripts/file_hashes.json
250+
140251 echo "processed_count=$PROCESSED_COUNT" >> $GITHUB_ENV
141- echo "content_generated=true" >> $GITHUB_ENV
252+ if [ "$PROCESSED_COUNT" -gt 0 ]; then
253+ echo "content_generated=true" >> $GITHUB_ENV
254+ else
255+ echo "content_generated=false" >> $GITHUB_ENV
256+ fi
257+ # Clean up intermediate files
258+ rm -f input_file.json extracted_info.md generated_doc.md
142259
143- - name : Commit and open PR
260+ - name : Commit and open PR against target branch
261+ # Only run if content was actually generated in the previous step
144262 if : env.content_generated == 'true'
145- uses : peter-evans/create-pull-request@v5
263+ uses : peter-evans/create-pull-request@v6 # Use v6 for latest features/fixes
146264 with :
147265 token : ${{ secrets.test_REPO_ACCESS_TOKEN }}
148- title : " test: generate integration docs from test repo"
149- commit-message : " test: generated docs from harshilp24/integration-resources-test"
150- branch : " test/docs-update-${{ github.run_id }}"
151- base : main
266+ # Make title and commit message specific to the target branch
267+ title : " docs: update integration docs for ${{ github.event.inputs.target_branch }}"
268+ commit-message : " docs: automated generation for ${{ github.event.inputs.target_branch }}\n\n Processed files based on changes in harshilp24/integration-resources-test."
269+ # Create a branch name that includes the target branch for clarity
270+ branch : " docs-update/${{ github.event.inputs.target_branch }}-${{ github.run_id }}"
271+ # Set the base branch for the PR to the target branch
272+ base : ${{ github.event.inputs.target_branch }}
273+ # Add the generated docs and the UPDATED tracking files
152274 add-paths : |
153275 website/docs/connect-data/reference/
154276 scripts/processed_files.txt
155277 scripts/file_hashes.json
278+ # Update PR body
156279 body : |
157- ✅ Test PR: Generated integration documentation from your test repo.
158- Source: [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)
280+ ✅ Automated PR: Generated/updated integration documentation based on changes in the source repository.
281+
282+ **Target Branch:** `${{ github.event.inputs.target_branch }}`
283+ **Source Repo:** [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)
284+
285+ This PR includes:
286+ - Updated markdown files in `website/docs/connect-data/reference/`
287+ - Updated tracking files in `scripts/` to reflect the processed state for this branch.
288+ # Optional: Add labels, assignees etc.
289+ # labels: automated-pr, documentation
290+ # assignees: your-github-username
291+
0 commit comments