Skip to content

Commit d3137f0

Browse files
committed
Add changelog context extraction and embedding generation scripts
This commit introduces several new scripts for generating changelog context and embeddings. Key additions include `extract_changelog_context.py`, which gathers repository context such as README content, module docstrings, project structure, and changelog history. The `create_changelog_embeddings.py` script generates embeddings for these context files, enhancing the efficiency of changelog entry generation. Additionally, a Dockerfile and entrypoint script are added to facilitate containerization of the application, along with a .dockerignore file to manage ignored files during the build process. GitHub workflows for testing and updating changelogs are also included, streamlining the CI/CD process for changelog management.
1 parent 644a102 commit d3137f0

13 files changed

+2111
-0
lines changed

.dockerignore

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Git
2+
.git
3+
.github
4+
.gitignore
5+
6+
# Docker
7+
.dockerignore
8+
Dockerfile
9+
10+
# Python
11+
__pycache__/
12+
*.py[cod]
13+
*$py.class
14+
*.so
15+
.Python
16+
env/
17+
build/
18+
develop-eggs/
19+
dist/
20+
downloads/
21+
eggs/
22+
.eggs/
23+
lib64/
24+
parts/
25+
sdist/
26+
var/
27+
*.egg-info/
28+
.installed.cfg
29+
*.egg
30+
.pytest_cache/
31+
.coverage
32+
htmlcov/
33+
.tox/
34+
.nox/
35+
36+
# DB files (unless you need them)
37+
*.db
38+
39+
# Benchmark results
40+
benchmark_results/
41+
42+
# VSCode
43+
.vscode/
44+
45+
# Other
46+
*.log
47+
*.swp
48+
.DS_Store
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: Test Changelog Scripts
2+
3+
on:
4+
push:
5+
paths:
6+
- 'scripts/**'
7+
- '.github/workflows/test-changelog-scripts.yml'
8+
pull_request:
9+
paths:
10+
- 'scripts/**'
11+
- '.github/workflows/test-changelog-scripts.yml'
12+
13+
jobs:
14+
test:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- name: Checkout repository
18+
uses: actions/checkout@v4
19+
20+
- name: Set up Python
21+
uses: actions/setup-python@v5
22+
with:
23+
python-version: '3.11'
24+
25+
- name: Install dependencies
26+
run: |
27+
python -m pip install --upgrade pip
28+
pip install markdown matplotlib
29+
# Install any other dependencies your project needs
30+
if [ -f requirements.txt ]; then
31+
pip install -r requirements.txt
32+
fi
33+
34+
- name: Run unit tests for extract_changelog_context.py
35+
run: python -m scripts.test_extract_changelog_context
36+
37+
- name: Run unit tests for generate_changelog_entry.py
38+
run: python -m scripts.test_generate_changelog_entry
39+
40+
- name: Run unit tests for generate_changelog_html.py
41+
run: python -m scripts.test_generate_changelog_html
42+
43+
- name: Run integration tests
44+
run: python -m scripts.integration_test_changelog
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
name: Update Changelog on PR
2+
3+
on:
4+
pull_request:
5+
types: [opened, synchronize, reopened]
6+
branches:
7+
- main
8+
9+
jobs:
10+
generate-pr-changelog:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Checkout repository
14+
uses: actions/checkout@v4
15+
with:
16+
fetch-depth: 0
17+
18+
- name: Set up Python
19+
uses: actions/setup-python@v5
20+
with:
21+
python-version: '3.11'
22+
23+
- name: Install dependencies
24+
run: |
25+
python -m pip install --upgrade pip
26+
pip install -r requirements.txt
27+
pip install openai markdown matplotlib numpy tiktoken
28+
29+
- name: Get PR diff information
30+
id: get_pr_diff
31+
run: |
32+
# Get the base and head commits for the PR
33+
PR_HEAD_SHA="${{ github.event.pull_request.head.sha }}"
34+
PR_BASE_SHA="${{ github.event.pull_request.base.sha }}"
35+
36+
# Get commits in this PR
37+
git log --pretty=format:"%h - %s (%an)" $PR_BASE_SHA..$PR_HEAD_SHA > pr_commits.txt
38+
39+
# Get files changed in this PR
40+
git diff --name-status $PR_BASE_SHA..$PR_HEAD_SHA > pr_files_changed.txt
41+
42+
# Get detailed diff of changed files (for context)
43+
git diff --stat $PR_BASE_SHA..$PR_HEAD_SHA > pr_diff_stats.txt
44+
45+
# Output for debugging
46+
echo "PR diff between: $PR_BASE_SHA and $PR_HEAD_SHA"
47+
48+
- name: Analyze PR labels
49+
run: |
50+
PR_NUMBER="${{ github.event.pull_request.number }}"
51+
gh pr view $PR_NUMBER --json labels -q '.labels[].name' > pr_labels.txt
52+
env:
53+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
54+
55+
- name: Gather library context
56+
id: gather_context
57+
run: |
58+
# Run the context extraction script
59+
python scripts/extract_changelog_context.py
60+
61+
- name: Create context embeddings
62+
id: create_embeddings
63+
env:
64+
OPENAI_API_KEY: ${{ vars.OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
65+
run: |
66+
# Generate embeddings for all context files to reduce token usage
67+
python scripts/create_changelog_embeddings.py
68+
69+
# Store embedding statistics for monitoring
70+
echo "Embedding stats:"
71+
python -c "import json; from pathlib import Path; data = json.loads(Path('changelog_embeddings.json').read_text()); print(f'Total files embedded: {len(data[\"embeddings\"])}'); total_tokens = sum(data['token_counts'].values()); print(f'Total tokens in original files: {total_tokens}')"
72+
73+
- name: Generate changelog entry for PR
74+
id: generate_pr_changelog
75+
env:
76+
OPENAI_API_KEY: ${{ vars.OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
77+
PR_TITLE: ${{ github.event.pull_request.title }}
78+
PR_BODY: ${{ github.event.pull_request.body }}
79+
PR_NUMBER: ${{ github.event.pull_request.number }}
80+
REPO_NAME: ${{ github.repository }}
81+
run: |
82+
# Use the embedding-based approach for more efficient token usage
83+
# Read the embeddings
84+
EMBEDDINGS=$(cat changelog_embeddings.json)
85+
86+
# Create a temporary payload file for the API call
87+
cat > payload.json << 'EOF'
88+
{
89+
"model": "gpt-4o",
90+
"messages": [
91+
{"role": "system", "content": "You are a changelog generator that creates detailed, structured entries for pull requests. Generate a concise changelog entry in markdown format for the PR."},
92+
{"role": "user", "content": "Generate a changelog entry for PR #${{ github.event.pull_request.number }} with title: \"${{ github.event.pull_request.title }}\". The PR description is: \"${{ github.event.pull_request.body }}\". Use the context embeddings to understand the codebase and create an appropriate entry."}
93+
],
94+
"context_embeddings":
95+
EOF
96+
97+
# Append the embeddings JSON content to the payload
98+
cat changelog_embeddings.json >> payload.json
99+
100+
# Make the API call
101+
curl -s -X POST https://api.openai.com/v1/chat/completions \
102+
-H "Content-Type: application/json" \
103+
-H "Authorization: Bearer $OPENAI_API_KEY" \
104+
--data-binary @payload.json | \
105+
jq -r '.choices[0].message.content' > pr_changelog_entry.md
106+
107+
# Save the output to GitHub step output
108+
cat pr_changelog_entry.md >> $GITHUB_OUTPUT
109+
110+
# Fallback to traditional method if embedding approach fails
111+
if [ ! -s pr_changelog_entry.md ]; then
112+
echo "Embedding approach failed, falling back to traditional method..."
113+
python scripts/generate_changelog_entry.py > changelog_output.txt
114+
cat pr_changelog_entry.md >> $GITHUB_OUTPUT
115+
fi
116+
117+
- name: Generate visual changelog
118+
env:
119+
PR_TITLE: ${{ github.event.pull_request.title }}
120+
PR_NUMBER: ${{ github.event.pull_request.number }}
121+
REPO_NAME: ${{ github.repository }}
122+
run: |
123+
# Generate visual HTML changelog
124+
python scripts/generate_changelog_html.py
125+
126+
- name: Create or update PR changelog file
127+
run: |
128+
PR_NUMBER="${{ github.event.pull_request.number }}"
129+
130+
# Create the PR comment and save to a file
131+
cat pr_changelog_entry.md > "PR_${PR_NUMBER}_CHANGELOG.md"
132+
133+
# Prepare comment with links to assets
134+
echo "## Changelog Preview for this PR:" > pr_comment.txt
135+
echo "" >> pr_comment.txt
136+
cat pr_changelog_entry.md >> pr_comment.txt
137+
echo "" >> pr_comment.txt
138+
echo "### Additional resources:" >> pr_comment.txt
139+
echo "- [Release Notes](RELEASE_NOTES.md)" >> pr_comment.txt
140+
echo "" >> pr_comment.txt
141+
echo "This will be automatically added to CHANGELOG.md when merged." >> pr_comment.txt
142+
echo "" >> pr_comment.txt
143+
echo "💡 *Generated using vector embeddings for efficient token usage*" >> pr_comment.txt
144+
145+
# Add a comment to the PR with the changelog preview
146+
gh pr comment $PR_NUMBER --body-file pr_comment.txt
147+
env:
148+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
149+
150+
- name: Upload changelog assets
151+
uses: actions/upload-artifact@v3
152+
with:
153+
name: changelog-assets
154+
path: |
155+
pr_changelog_entry.md
156+
RELEASE_NOTES.md
157+
changelog_visual.html
158+
impact_analysis.txt
159+
test_coverage_analysis.txt
160+
impact_analysis.json
161+
commit_categories.json
162+
commit_categories.txt
163+
changelog_embeddings.json
164+
165+
update-changelog-on-merge:
166+
runs-on: ubuntu-latest
167+
# Only run this job when PR is merged to main
168+
if: github.event.pull_request.merged == true
169+
needs: generate-pr-changelog
170+
steps:
171+
- name: Checkout repository
172+
uses: actions/checkout@v4
173+
with:
174+
fetch-depth: 0
175+
# We need to use a token with write permissions to push to main
176+
token: ${{ secrets.GITHUB_TOKEN }}
177+
178+
- name: Download changelog assets
179+
uses: actions/download-artifact@v3
180+
with:
181+
name: changelog-assets
182+
183+
- name: Update CHANGELOG.md
184+
run: |
185+
if [ -f CHANGELOG.md ]; then
186+
NEW_ENTRY=$(cat pr_changelog_entry.md)
187+
EXISTING=$(cat CHANGELOG.md)
188+
echo -e "$NEW_ENTRY\n\n$EXISTING" > CHANGELOG.md
189+
else
190+
HEADER="# Changelog\n\n"
191+
NEW_ENTRY=$(cat pr_changelog_entry.md)
192+
echo -e "$HEADER$NEW_ENTRY" > CHANGELOG.md
193+
fi
194+
195+
- name: Commit and push changes
196+
run: |
197+
git config --global user.name 'github-actions[bot]'
198+
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
199+
git add CHANGELOG.md
200+
201+
# Also add release assets if available
202+
if [ -f RELEASE_NOTES.md ]; then
203+
git add RELEASE_NOTES.md
204+
fi
205+
if [ -f changelog_visual.html ]; then
206+
git add changelog_visual.html
207+
fi
208+
209+
git commit -m "Update CHANGELOG.md with changes from PR #${{ github.event.pull_request.number }}"
210+
git push
211+
env:
212+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
FROM python:3.11-slim
2+
3+
WORKDIR /app
4+
5+
# Copy requirements first for better caching
6+
COPY requirements.txt .
7+
RUN pip install --no-cache-dir -r requirements.txt
8+
9+
# Copy the application code
10+
COPY . .
11+
12+
# Create an entrypoint script
13+
COPY docker-entrypoint.sh /docker-entrypoint.sh
14+
RUN chmod +x /docker-entrypoint.sh
15+
16+
# Set the entrypoint
17+
ENTRYPOINT ["/docker-entrypoint.sh"]

0 commit comments

Comments
 (0)