Skip to content

Commit e605a97

Browse files
authored
Update test-doc-generator.yml
1 parent c4671ed commit e605a97

File tree

1 file changed

+43
-172
lines changed

1 file changed

+43
-172
lines changed

.github/workflows/test-doc-generator.yml

Lines changed: 43 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -2,160 +2,76 @@ name: Test Doc Generator
22

33
on:
44
workflow_dispatch:
5-
inputs:
6-
target_branch:
7-
description: 'The branch in appsmith-docs to checkout and create PR against'
8-
required: true
9-
default: 'docs-staging' # Default to docs-staging
10-
type: string
115

126
jobs:
137
generate_docs:
148
runs-on: ubuntu-latest
159

1610
steps:
17-
- name: Checkout appsmith-docs target branch
11+
- name: Checkout appsmith-docs
1812
uses: actions/checkout@v4
1913
with:
2014
token: ${{ secrets.test_REPO_ACCESS_TOKEN }}
21-
ref: ${{ github.event.inputs.target_branch }} # Checkout the specified branch
22-
# Fetch depth 0 to get all history needed for base branch detection by create-pull-request
23-
fetch-depth: 0
2415

25-
- name: Ensure scripts directory and tracking files exist
16+
- name: Create exclusion list
17+
run: echo > saas_exclusions.txt
18+
19+
- name: Ensure scripts directory exists
2620
run: |
2721
mkdir -p scripts
28-
# Initialize tracking files if they don't exist in the checked-out branch
2922
[ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
3023
[ -f scripts/file_hashes.json ] || echo "{}" > scripts/file_hashes.json
3124
3225
- name: Fetch file list from test repo
3326
id: fetch_files
3427
run: |
35-
echo "Fetching files from source repo..."
36-
curl -s --max-time 60 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
28+
curl -s --max-time 30 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
3729
-H "Accept: application/vnd.github+json" \
3830
https://api.github.com/repos/harshilp24/integration-resources-test/contents/Generic%20UQI%20Creation/uqi_configs \
3931
-o response.json
4032
41-
if ! jq -e '.' response.json > /dev/null; then
42-
echo "Error: Invalid JSON received from GitHub API."
43-
cat response.json # Print response for debugging
44-
exit 1
45-
fi
33+
jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
34+
jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
4635
47-
# Check if the response is an array (list of files) or an object (error message)
48-
if jq -e 'type == "array"' response.json > /dev/null; then
49-
jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
50-
jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
51-
echo "files_found=true" >> $GITHUB_ENV
52-
echo "Files list fetched successfully."
53-
else
54-
echo "Warning: Received non-array response from GitHub API (maybe empty dir or error?):"
55-
cat response.json
56-
# Create empty files to avoid errors downstream if dir is empty
57-
touch latest_files_with_sha.txt
58-
touch latest_files.txt
59-
echo "files_found=false" >> $GITHUB_ENV # Indicate no files found
60-
fi
36+
echo "files_found=true" >> $GITHUB_ENV
6137
6238
- name: Identify new and modified files
6339
id: detect_changes
64-
# Only run if files were actually found in the source repo
65-
if: env.files_found == 'true'
6640
run: |
67-
echo "Identifying changes against branch: ${{ github.event.inputs.target_branch }}"
68-
# Read tracking files FROM THE CHECKED-OUT BRANCH
6941
PREV_HASHES=$(cat scripts/file_hashes.json)
70-
# Ensure processed_files.txt exists before sorting
71-
[ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
72-
73-
# Find files present in latest_files.txt but not in processed_files.txt
74-
comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) > new_files.tmp || true
75-
echo "--- New Files ---"
76-
cat new_files.tmp
77-
echo "-----------------"
78-
79-
MODIFIED_FILES_LIST="modified_files.tmp"
80-
touch $MODIFIED_FILES_LIST
81-
echo "--- Checking for Modifications ---" >&2 # Debug output to stderr
42+
NEW_FILES=$(comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) || true)
43+
MODIFIED_FILES=""
8244
while IFS=$'\t' read -r FILE_NAME FILE_SHA; do
83-
# Check if the file is listed in processed_files.txt (meaning it's not new)
84-
if grep -q -x -F "$FILE_NAME" scripts/processed_files.txt; then
85-
PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.["$file"] // ""')
86-
echo "Checking: $FILE_NAME, Current SHA: $FILE_SHA, Previous SHA: $PREV_SHA" >&2
87-
if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ]; then
88-
echo "$FILE_NAME" >> $MODIFIED_FILES_LIST
89-
echo " -> Marked as modified." >&2
90-
fi
45+
PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
46+
if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ] && grep -q "^$FILE_NAME$" scripts/processed_files.txt; then
47+
MODIFIED_FILES="$MODIFIED_FILES$FILE_NAME"$'\n'
9148
fi
9249
done < latest_files_with_sha.txt
93-
echo "--- Modified Files ---"
94-
cat $MODIFIED_FILES_LIST
95-
echo "----------------------"
96-
97-
# Combine new and modified files, ensuring uniqueness and removing empty lines
98-
cat new_files.tmp $MODIFIED_FILES_LIST | sort | uniq | grep -v '^$' > files_to_process.txt || true
99-
100-
echo "--- Files to Process ---"
101-
cat files_to_process.txt
102-
echo "------------------------"
103-
50+
{ echo "$NEW_FILES"; echo "$MODIFIED_FILES"; } | grep -v "^$" > files_to_process.txt
10451
if [ -s files_to_process.txt ]; then
10552
echo "changes_found=true" >> $GITHUB_ENV
106-
echo "Changes detected."
10753
else
10854
echo "changes_found=false" >> $GITHUB_ENV
109-
echo "No new or modified files detected."
11055
fi
111-
# Clean up temporary files
112-
rm -f new_files.tmp modified_files.tmp
11356
11457
- name: Exit if no files to process
11558
if: env.changes_found != 'true'
116-
run: |
117-
echo "No changes detected in source files relative to branch '${{ github.event.inputs.target_branch }}'. Exiting."
118-
exit 0
59+
run: exit 0
11960

12061
- name: Process files with OpenAI
121-
# This step now correctly reads the initial hashes from the checked-out branch
122-
# and updates the local files, which are then committed in the next step.
123-
if: env.changes_found == 'true'
12462
run: |
12563
mkdir -p generated_docs
126-
# Read initial hashes from the checked-out branch state
12764
HASHES_JSON=$(cat scripts/file_hashes.json)
12865
PROCESSED_COUNT=0
12966
13067
while IFS= read -r FILE_NAME; do
131-
# Ensure FILE_NAME is not empty
132-
if [ -z "$FILE_NAME" ]; then
133-
continue
134-
fi
135-
13668
echo "⏳ Processing $FILE_NAME"
137-
# URL encode the filename for the URL
138-
ENCODED_FILE_NAME=$(printf '%s' "$FILE_NAME" | jq -sRr @uri)
139-
FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$ENCODED_FILE_NAME"
140-
echo "Fetching content from: $FILE_URL"
141-
curl -fsSL --max-time 60 "$FILE_URL" -o input_file.json
142-
if [ $? -ne 0 ]; then
143-
echo "Error: Failed to download $FILE_NAME from $FILE_URL" >&2
144-
continue # Skip this file if download fails
145-
fi
69+
FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$FILE_NAME"
70+
curl -sSL --max-time 30 "$FILE_URL" -o input_file.json
14671
147-
# Find the SHA for the current file using awk (more robust than grep+cut)
148-
FILE_SHA=$(awk -F'\t' -v filename="$FILE_NAME" '$1 == filename { print $2; exit }' latest_files_with_sha.txt)
72+
FILE_SHA=$(grep "$FILE_NAME" latest_files_with_sha.txt | cut -f2)
73+
HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha')
14974
150-
if [ -z "$FILE_SHA" ]; then
151-
echo "Warning: Could not find SHA for '$FILE_NAME' in latest_files_with_sha.txt. Skipping hash update." >&2
152-
else
153-
echo "Updating hash for $FILE_NAME to $FILE_SHA"
154-
# Update the hash in our JSON object
155-
HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.["$file"] = $sha')
156-
fi
157-
158-
# --- OpenAI Processing Start ---
15975
# Prompt 1: Extract Info
16076
SYSTEM_PROMPT=$(cat .github/prompts/extract_prompt.txt || echo "Extract important integration details.")
16177
USER_CONTENT=$(cat input_file.json)
@@ -173,18 +89,14 @@ jobs:
17389
temperature: 0
17490
}')
17591
176-
RESPONSE1=$(curl -s https://api.openai.com/v1/chat/completions \
177-
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
92+
RESPONSE=$(curl -s https://api.openai.com/v1/chat/completions \
93+
-H "Authorization: Bearer ${{ secrets.test_OPENAI_API_KEY }}" \
17894
-H "Content-Type: application/json" \
17995
-d "$PAYLOAD")
18096
181-
# Check for API errors
182-
if echo "$RESPONSE1" | jq -e '.error' > /dev/null; then
183-
echo "Error during OpenAI Prompt 1 for $FILE_NAME:" >&2
184-
echo "$RESPONSE1" | jq '.' >&2
185-
continue # Skip this file
186-
fi
187-
echo "$RESPONSE1" | jq -r '.choices[0].message.content' > extracted_info.md
97+
echo "$RESPONSE" | jq '.'
98+
99+
echo "$RESPONSE" | jq -r '.choices[0].message.content' > extracted_info.md
188100
189101
# Prompt 2: Generate Markdown
190102
SYSTEM_PROMPT=$(cat .github/prompts/generate_prompt.txt || echo "Generate reference documentation in markdown.")
@@ -203,85 +115,44 @@ jobs:
203115
temperature: 0.3
204116
}')
205117
206-
RESPONSE2=$(curl -s https://api.openai.com/v1/chat/completions \
207-
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
118+
RESPONSE=$(curl -s https://api.openai.com/v1/chat/completions \
119+
-H "Authorization: Bearer ${{ secrets.test_OPENAI_API_KEY }}" \
208120
-H "Content-Type: application/json" \
209121
-d "$PAYLOAD")
210122
211-
# Check for API errors
212-
if echo "$RESPONSE2" | jq -e '.error' > /dev/null; then
213-
echo "Error during OpenAI Prompt 2 for $FILE_NAME:" >&2
214-
echo "$RESPONSE2" | jq '.' >&2
215-
continue # Skip this file
216-
fi
217-
echo "$RESPONSE2" | jq -r '.choices[0].message.content' > generated_doc.md
218-
# --- OpenAI Processing End ---
123+
echo "$RESPONSE" | jq '.'
124+
125+
echo "$RESPONSE" | jq -r '.choices[0].message.content' > generated_doc.md
219126
220-
# Determine output path
221127
INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]')
222128
FINAL_PATH="website/docs/connect-data/reference/${INTEGRATION}.md"
223129
224130
mkdir -p "$(dirname "$FINAL_PATH")"
225131
cp generated_doc.md "$FINAL_PATH"
226-
# Optional: Keep a copy in a separate dir if needed for artifacts
227-
# cp generated_doc.md "generated_docs/${INTEGRATION}.md"
132+
cp generated_doc.md "generated_docs/${INTEGRATION}.md"
228133
229-
# Add the successfully processed file to the list for this run
230-
echo "$FILE_NAME" >> processed_files_this_run.txt
134+
echo "$FILE_NAME" >> scripts/processed_files.txt
231135
PROCESSED_COUNT=$((PROCESSED_COUNT + 1))
232-
echo "✅ Finished processing $FILE_NAME"
233-
136+
echo "✅ Finished $FILE_NAME"
234137
done < files_to_process.txt
235138
236-
# Update the main tracking files with the results of this run
237-
# Append newly processed files to the persistent list
238-
if [ -f processed_files_this_run.txt ]; then
239-
cat processed_files_this_run.txt >> scripts/processed_files.txt
240-
# Ensure uniqueness and sort the persistent list
241-
sort -u scripts/processed_files.txt -o scripts/processed_files.txt
242-
rm processed_files_this_run.txt
243-
fi
244-
# Overwrite the persistent hash file with the updated JSON
245-
echo "$HASHES_JSON" | jq '.' > scripts/file_hashes.json
246-
139+
echo "$HASHES_JSON" > scripts/file_hashes.json
247140
echo "processed_count=$PROCESSED_COUNT" >> $GITHUB_ENV
248-
if [ "$PROCESSED_COUNT" -gt 0 ]; then
249-
echo "content_generated=true" >> $GITHUB_ENV
250-
else
251-
echo "content_generated=false" >> $GITHUB_ENV
252-
fi
253-
# Clean up intermediate files
254-
rm -f input_file.json extracted_info.md generated_doc.md
141+
echo "content_generated=true" >> $GITHUB_ENV
255142
256-
- name: Commit and open PR against target branch
257-
# Only run if content was actually generated in the previous step
143+
- name: Commit and open PR
258144
if: env.content_generated == 'true'
259-
uses: peter-evans/create-pull-request@v6 # Use v6 for latest features/fixes
145+
uses: peter-evans/create-pull-request@v5
260146
with:
261147
token: ${{ secrets.test_REPO_ACCESS_TOKEN }}
262-
# Make title and commit message specific to the target branch
263-
title: "docs: update integration docs for ${{ github.event.inputs.target_branch }}"
264-
commit-message: "docs: automated generation for ${{ github.event.inputs.target_branch }}\n\nProcessed files based on changes in harshilp24/integration-resources-test."
265-
# Create a branch name that includes the target branch for clarity
266-
branch: "docs-update/${{ github.event.inputs.target_branch }}-${{ github.run_id }}"
267-
# Set the base branch for the PR to the target branch
268-
base: ${{ github.event.inputs.target_branch }}
269-
# Add the generated docs and the UPDATED tracking files
148+
title: "test: generate integration docs from test repo"
149+
commit-message: "test: generated docs from harshilp24/integration-resources-test"
150+
branch: "test/docs-update-${{ github.run_id }}"
151+
base: main
270152
add-paths: |
271153
website/docs/connect-data/reference/
272154
scripts/processed_files.txt
273155
scripts/file_hashes.json
274-
# Update PR body
275156
body: |
276-
✅ Automated PR: Generated/updated integration documentation based on changes in the source repository.
277-
278-
**Target Branch:** `${{ github.event.inputs.target_branch }}`
279-
**Source Repo:** [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)
280-
281-
This PR includes:
282-
- Updated markdown files in `website/docs/connect-data/reference/`
283-
- Updated tracking files in `scripts/` to reflect the processed state for this branch.
284-
# Optional: Add labels, assignees etc.
285-
# labels: automated-pr, documentation
286-
# assignees: your-github-username
287-
157+
✅ Test PR: Generated integration documentation from your test repo.
158+
Source: [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)

0 commit comments

Comments
 (0)