@@ -2,160 +2,76 @@ name: Test Doc Generator
22
33on :
44 workflow_dispatch :
5- inputs :
6- target_branch :
7- description : ' The branch in appsmith-docs to checkout and create PR against'
8- required : true
9- default : ' docs-staging' # Default to docs-staging
10- type : string
115
126jobs :
137 generate_docs :
148 runs-on : ubuntu-latest
159
1610 steps :
17- - name : Checkout appsmith-docs target branch
11+ - name : Checkout appsmith-docs
1812 uses : actions/checkout@v4
1913 with :
2014 token : ${{ secrets.test_REPO_ACCESS_TOKEN }}
21- ref : ${{ github.event.inputs.target_branch }} # Checkout the specified branch
22- # Fetch depth 0 to get all history needed for base branch detection by create-pull-request
23- fetch-depth : 0
2415
25- - name : Ensure scripts directory and tracking files exist
16+ - name : Create exclusion list
17+ run : echo > saas_exclusions.txt
18+
19+ - name : Ensure scripts directory exists
2620 run : |
2721 mkdir -p scripts
28- # Initialize tracking files if they don't exist in the checked-out branch
2922 [ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
3023 [ -f scripts/file_hashes.json ] || echo "{}" > scripts/file_hashes.json
3124
3225 - name : Fetch file list from test repo
3326 id : fetch_files
3427 run : |
35- echo "Fetching files from source repo..."
36- curl -s --max-time 60 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
28+ curl -s --max-time 30 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
3729 -H "Accept: application/vnd.github+json" \
3830 https://api.github.com/repos/harshilp24/integration-resources-test/contents/Generic%20UQI%20Creation/uqi_configs \
3931 -o response.json
4032
41- if ! jq -e '.' response.json > /dev/null; then
42- echo "Error: Invalid JSON received from GitHub API."
43- cat response.json # Print response for debugging
44- exit 1
45- fi
33+ jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
34+ jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
4635
47- # Check if the response is an array (list of files) or an object (error message)
48- if jq -e 'type == "array"' response.json > /dev/null; then
49- jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
50- jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
51- echo "files_found=true" >> $GITHUB_ENV
52- echo "Files list fetched successfully."
53- else
54- echo "Warning: Received non-array response from GitHub API (maybe empty dir or error?):"
55- cat response.json
56- # Create empty files to avoid errors downstream if dir is empty
57- touch latest_files_with_sha.txt
58- touch latest_files.txt
59- echo "files_found=false" >> $GITHUB_ENV # Indicate no files found
60- fi
36+ echo "files_found=true" >> $GITHUB_ENV
6137
6238 - name : Identify new and modified files
6339 id : detect_changes
64- # Only run if files were actually found in the source repo
65- if : env.files_found == 'true'
6640 run : |
67- echo "Identifying changes against branch: ${{ github.event.inputs.target_branch }}"
68- # Read tracking files FROM THE CHECKED-OUT BRANCH
6941 PREV_HASHES=$(cat scripts/file_hashes.json)
70- # Ensure processed_files.txt exists before sorting
71- [ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
72-
73- # Find files present in latest_files.txt but not in processed_files.txt
74- comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) > new_files.tmp || true
75- echo "--- New Files ---"
76- cat new_files.tmp
77- echo "-----------------"
78-
79- MODIFIED_FILES_LIST="modified_files.tmp"
80- touch $MODIFIED_FILES_LIST
81- echo "--- Checking for Modifications ---" >&2 # Debug output to stderr
42+ NEW_FILES=$(comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) || true)
43+ MODIFIED_FILES=""
8244 while IFS=$'\t' read -r FILE_NAME FILE_SHA; do
83- # Check if the file is listed in processed_files.txt (meaning it's not new)
84- if grep -q -x -F "$FILE_NAME" scripts/processed_files.txt; then
85- PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.["$file"] // ""')
86- echo "Checking: $FILE_NAME, Current SHA: $FILE_SHA, Previous SHA: $PREV_SHA" >&2
87- if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ]; then
88- echo "$FILE_NAME" >> $MODIFIED_FILES_LIST
89- echo " -> Marked as modified." >&2
90- fi
45+ PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
46+ if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ] && grep -q "^$FILE_NAME$" scripts/processed_files.txt; then
47+ MODIFIED_FILES="$MODIFIED_FILES$FILE_NAME"$'\n'
9148 fi
9249 done < latest_files_with_sha.txt
93- echo "--- Modified Files ---"
94- cat $MODIFIED_FILES_LIST
95- echo "----------------------"
96-
97- # Combine new and modified files, ensuring uniqueness and removing empty lines
98- cat new_files.tmp $MODIFIED_FILES_LIST | sort | uniq | grep -v '^$' > files_to_process.txt || true
99-
100- echo "--- Files to Process ---"
101- cat files_to_process.txt
102- echo "------------------------"
103-
50+ { echo "$NEW_FILES"; echo "$MODIFIED_FILES"; } | grep -v "^$" > files_to_process.txt
10451 if [ -s files_to_process.txt ]; then
10552 echo "changes_found=true" >> $GITHUB_ENV
106- echo "Changes detected."
10753 else
10854 echo "changes_found=false" >> $GITHUB_ENV
109- echo "No new or modified files detected."
11055 fi
111- # Clean up temporary files
112- rm -f new_files.tmp modified_files.tmp
11356
11457 - name : Exit if no files to process
11558 if : env.changes_found != 'true'
116- run : |
117- echo "No changes detected in source files relative to branch '${{ github.event.inputs.target_branch }}'. Exiting."
118- exit 0
59+ run : exit 0
11960
12061 - name : Process files with OpenAI
121- # This step now correctly reads the initial hashes from the checked-out branch
122- # and updates the local files, which are then committed in the next step.
123- if : env.changes_found == 'true'
12462 run : |
12563 mkdir -p generated_docs
126- # Read initial hashes from the checked-out branch state
12764 HASHES_JSON=$(cat scripts/file_hashes.json)
12865 PROCESSED_COUNT=0
12966
13067 while IFS= read -r FILE_NAME; do
131- # Ensure FILE_NAME is not empty
132- if [ -z "$FILE_NAME" ]; then
133- continue
134- fi
135-
13668 echo "⏳ Processing $FILE_NAME"
137- # URL encode the filename for the URL
138- ENCODED_FILE_NAME=$(printf '%s' "$FILE_NAME" | jq -sRr @uri)
139- FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$ENCODED_FILE_NAME"
140- echo "Fetching content from: $FILE_URL"
141- curl -fsSL --max-time 60 "$FILE_URL" -o input_file.json
142- if [ $? -ne 0 ]; then
143- echo "Error: Failed to download $FILE_NAME from $FILE_URL" >&2
144- continue # Skip this file if download fails
145- fi
69+ FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$FILE_NAME"
70+ curl -sSL --max-time 30 "$FILE_URL" -o input_file.json
14671
147- # Find the SHA for the current file using awk (more robust than grep+ cut)
148- FILE_SHA =$(awk -F'\t' -v filename= "$FILE_NAME" '$1 == filename { print $2; exit }' latest_files_with_sha.txt )
72+ FILE_SHA=$(grep "$FILE_NAME" latest_files_with_sha.txt | cut -f2 )
73+ HASHES_JSON =$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha' )
14974
150- if [ -z "$FILE_SHA" ]; then
151- echo "Warning: Could not find SHA for '$FILE_NAME' in latest_files_with_sha.txt. Skipping hash update." >&2
152- else
153- echo "Updating hash for $FILE_NAME to $FILE_SHA"
154- # Update the hash in our JSON object
155- HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.["$file"] = $sha')
156- fi
157-
158- # --- OpenAI Processing Start ---
15975 # Prompt 1: Extract Info
16076 SYSTEM_PROMPT=$(cat .github/prompts/extract_prompt.txt || echo "Extract important integration details.")
16177 USER_CONTENT=$(cat input_file.json)
@@ -173,18 +89,14 @@ jobs:
17389 temperature: 0
17490 }')
17591
176- RESPONSE1 =$(curl -s https://api.openai.com/v1/chat/completions \
177- -H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
92+ RESPONSE =$(curl -s https://api.openai.com/v1/chat/completions \
93+ -H "Authorization: Bearer ${{ secrets.test_OPENAI_API_KEY }}" \
17894 -H "Content-Type: application/json" \
17995 -d "$PAYLOAD")
18096
181- # Check for API errors
182- if echo "$RESPONSE1" | jq -e '.error' > /dev/null; then
183- echo "Error during OpenAI Prompt 1 for $FILE_NAME:" >&2
184- echo "$RESPONSE1" | jq '.' >&2
185- continue # Skip this file
186- fi
187- echo "$RESPONSE1" | jq -r '.choices[0].message.content' > extracted_info.md
97+ echo "$RESPONSE" | jq '.'
98+
99+ echo "$RESPONSE" | jq -r '.choices[0].message.content' > extracted_info.md
188100
189101 # Prompt 2: Generate Markdown
190102 SYSTEM_PROMPT=$(cat .github/prompts/generate_prompt.txt || echo "Generate reference documentation in markdown.")
@@ -203,85 +115,44 @@ jobs:
203115 temperature: 0.3
204116 }')
205117
206- RESPONSE2 =$(curl -s https://api.openai.com/v1/chat/completions \
207- -H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
118+ RESPONSE =$(curl -s https://api.openai.com/v1/chat/completions \
119+ -H "Authorization: Bearer ${{ secrets.test_OPENAI_API_KEY }}" \
208120 -H "Content-Type: application/json" \
209121 -d "$PAYLOAD")
210122
211- # Check for API errors
212- if echo "$RESPONSE2" | jq -e '.error' > /dev/null; then
213- echo "Error during OpenAI Prompt 2 for $FILE_NAME:" >&2
214- echo "$RESPONSE2" | jq '.' >&2
215- continue # Skip this file
216- fi
217- echo "$RESPONSE2" | jq -r '.choices[0].message.content' > generated_doc.md
218- # --- OpenAI Processing End ---
123+ echo "$RESPONSE" | jq '.'
124+
125+ echo "$RESPONSE" | jq -r '.choices[0].message.content' > generated_doc.md
219126
220- # Determine output path
221127 INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]')
222128 FINAL_PATH="website/docs/connect-data/reference/${INTEGRATION}.md"
223129
224130 mkdir -p "$(dirname "$FINAL_PATH")"
225131 cp generated_doc.md "$FINAL_PATH"
226- # Optional: Keep a copy in a separate dir if needed for artifacts
227- # cp generated_doc.md "generated_docs/${INTEGRATION}.md"
132+ cp generated_doc.md "generated_docs/${INTEGRATION}.md"
228133
229- # Add the successfully processed file to the list for this run
230- echo "$FILE_NAME" >> processed_files_this_run.txt
134+ echo "$FILE_NAME" >> scripts/processed_files.txt
231135 PROCESSED_COUNT=$((PROCESSED_COUNT + 1))
232- echo "✅ Finished processing $FILE_NAME"
233-
136+ echo "✅ Finished $FILE_NAME"
234137 done < files_to_process.txt
235138
236- # Update the main tracking files with the results of this run
237- # Append newly processed files to the persistent list
238- if [ -f processed_files_this_run.txt ]; then
239- cat processed_files_this_run.txt >> scripts/processed_files.txt
240- # Ensure uniqueness and sort the persistent list
241- sort -u scripts/processed_files.txt -o scripts/processed_files.txt
242- rm processed_files_this_run.txt
243- fi
244- # Overwrite the persistent hash file with the updated JSON
245- echo "$HASHES_JSON" | jq '.' > scripts/file_hashes.json
246-
139+ echo "$HASHES_JSON" > scripts/file_hashes.json
247140 echo "processed_count=$PROCESSED_COUNT" >> $GITHUB_ENV
248- if [ "$PROCESSED_COUNT" -gt 0 ]; then
249- echo "content_generated=true" >> $GITHUB_ENV
250- else
251- echo "content_generated=false" >> $GITHUB_ENV
252- fi
253- # Clean up intermediate files
254- rm -f input_file.json extracted_info.md generated_doc.md
141+ echo "content_generated=true" >> $GITHUB_ENV
255142
256- - name : Commit and open PR against target branch
257- # Only run if content was actually generated in the previous step
143+ - name : Commit and open PR
258144 if : env.content_generated == 'true'
259- uses : peter-evans/create-pull-request@v6 # Use v6 for latest features/fixes
145+ uses : peter-evans/create-pull-request@v5
260146 with :
261147 token : ${{ secrets.test_REPO_ACCESS_TOKEN }}
262- # Make title and commit message specific to the target branch
263- title : " docs: update integration docs for ${{ github.event.inputs.target_branch }}"
264- commit-message : " docs: automated generation for ${{ github.event.inputs.target_branch }}\n\n Processed files based on changes in harshilp24/integration-resources-test."
265- # Create a branch name that includes the target branch for clarity
266- branch : " docs-update/${{ github.event.inputs.target_branch }}-${{ github.run_id }}"
267- # Set the base branch for the PR to the target branch
268- base : ${{ github.event.inputs.target_branch }}
269- # Add the generated docs and the UPDATED tracking files
148+ title : " test: generate integration docs from test repo"
149+ commit-message : " test: generated docs from harshilp24/integration-resources-test"
150+ branch : " test/docs-update-${{ github.run_id }}"
151+ base : main
270152 add-paths : |
271153 website/docs/connect-data/reference/
272154 scripts/processed_files.txt
273155 scripts/file_hashes.json
274- # Update PR body
275156 body : |
276- ✅ Automated PR: Generated/updated integration documentation based on changes in the source repository.
277-
278- **Target Branch:** `${{ github.event.inputs.target_branch }}`
279- **Source Repo:** [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)
280-
281- This PR includes:
282- - Updated markdown files in `website/docs/connect-data/reference/`
283- - Updated tracking files in `scripts/` to reflect the processed state for this branch.
284- # Optional: Add labels, assignees etc.
285- # labels: automated-pr, documentation
286- # assignees: your-github-username
287-
157+ ✅ Test PR: Generated integration documentation from your test repo.
158+ Source: [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)
0 commit comments