diff --git a/.github/workflows/combined-check.yml b/.github/workflows/combined-check.yml new file mode 100644 index 0000000000..854244b868 --- /dev/null +++ b/.github/workflows/combined-check.yml @@ -0,0 +1,211 @@ +name: Combined Style and Grammar Check + +permissions: + contents: read + pull-requests: write + +on: + workflow_dispatch: + inputs: + pr_number: + description: 'Pull Request Number' + required: true + type: string + +jobs: + combined-check: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests markdown beautifulsoup4 + + - name: Fetch PR details + id: pr-details + run: | + PR_NUMBER=${{ github.event.inputs.pr_number }} + echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV + + # Get the PR details using GitHub API + PR_DATA=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/$PR_NUMBER") + + HEAD_SHA=$(echo "$PR_DATA" | jq -r .head.sha) + HEAD_REF=$(echo "$PR_DATA" | jq -r .head.ref) + BASE_SHA=$(echo "$PR_DATA" | jq -r .base.sha) + + echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV + echo "HEAD_REF=$HEAD_REF" >> $GITHUB_ENV + echo "BASE_SHA=$BASE_SHA" >> $GITHUB_ENV + + - name: Get changed files and their patch data + id: changed-files + run: | + # Get the list of files changed in the PR with patch data + PR_FILES=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/${{ env.PR_NUMBER }}/files") + + echo "$PR_FILES" > pr_files.json + + # Extract just the markdown files + MD_FILES=$(echo "$PR_FILES" | jq -r '.[] | select(.filename | endswith(".md") or endswith(".mdx")) | .filename') + + echo "Changed markdown files:" + echo "$MD_FILES" + echo "FILES<> $GITHUB_ENV + echo "$MD_FILES" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Run combined checks + run: | + # Create a directory for storing file contents + mkdir -p temp_files + + # Process each changed file + echo "$FILES" | while read -r file; do + if [ -z "$file" ]; then + continue + fi + + echo "Processing $file" + + # Get file content from the PR head + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/contents/$file?ref=${{ env.HEAD_REF }}" | \ + jq -r '.content' | base64 --decode > "temp_files/$(basename "$file")" + + # Run style check + python3 tools/enhanced_style_check.py --file "temp_files/$(basename "$file")" --output "temp_files/$(basename "$file").style.json" + + # Run grammar check + python3 tools/grammar_check.py --file "temp_files/$(basename "$file")" --output "temp_files/$(basename "$file").grammar.json" + + # Combine results using the separate Python script + python3 tools/combine_results.py \ + "temp_files/$(basename "$file").style.json" \ + "temp_files/$(basename "$file").grammar.json" \ + "temp_files/$(basename "$file").combined.json" + done + + # Combine all suggestions + echo "[]" > all_suggestions.json + for json_file in temp_files/*.combined.json; do + if [ -f "$json_file" ]; then + # Merge JSON files + jq -s '.[0] + .[1]' all_suggestions.json "$json_file" > temp.json + mv temp.json all_suggestions.json + fi + done + + # Create individual comments for each suggestion + cat > create_comments.py << 'EOL' + import json + import requests + import os + import sys + import time + + # Load PR files data + with open('pr_files.json', 'r') as f: + pr_files = json.load(f) + + # Create a map of file paths + file_paths = {} + for file_data in pr_files: + basename = os.path.basename(file_data['filename']) + file_paths[basename] = file_data['filename'] + + # Load combined suggestions + with open('all_suggestions.json', 'r') as f: + suggestions = json.load(f) + + if not suggestions: + print("No issues found.") + sys.exit(0) + + # Get environment variables + pr_number = os.environ.get('PR_NUMBER') + repo = os.environ.get('GITHUB_REPOSITORY') + github_token = os.environ.get('GITHUB_TOKEN') + + # Count suggestion types + style_count = sum(1 for sugg in suggestions if "Style" in sugg.get('reason', '')) + grammar_count = sum(1 for sugg in suggestions if "Grammar" in sugg.get('reason', '')) + + print(f"Processing {len(suggestions)} suggestions: {style_count} style issues, {grammar_count} grammar issues") + + # Process each suggestion + for i, sugg in enumerate(suggestions): + # Get the full file path + basename = os.path.basename(sugg["file"]) + if basename in file_paths: + file_path = file_paths[basename] + else: + file_path = sugg["file"].replace('temp_files/', '') + + # Create a comment with suggestion + issue_type = "Style" if "Style" in sugg.get('reason', '') else "Grammar" + comment_body = f"**{issue_type} suggestion**: {sugg['reason']}\n\n```suggestion\n{sugg['suggested']}\n```" + + # Create the comment using GitHub API + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments" + headers = { + "Authorization": f"token {github_token}", + "Accept": "application/vnd.github.v3+json" + } + data = { + "body": comment_body, + "commit_id": os.environ.get('HEAD_SHA'), + "path": file_path, + "line": sugg["line"] + } + + print(f"Creating comment {i+1}/{len(suggestions)} for {file_path}:{sugg['line']}") + response = requests.post(url, headers=headers, json=data) + + if response.status_code >= 400: + print(f"Error creating comment: {response.status_code}") + print(response.text) + else: + print(f"Created comment: {response.status_code}") + + # Add a small delay to avoid rate limiting + time.sleep(1) + + print(f"Processed {len(suggestions)} suggestions.") + EOL + + # Run the Python script with environment variables + GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} python3 create_comments.py + + - name: Create summary comment on PR + run: | + # Count suggestions by type + STYLE_COUNT=$(jq '[.[] | select(.reason | contains("Style"))] | length' all_suggestions.json) + GRAMMAR_COUNT=$(jq '[.[] | select(.reason | contains("Grammar"))] | length' all_suggestions.json) + TOTAL_COUNT=$(jq 'length' all_suggestions.json) + + if [ "$TOTAL_COUNT" -gt 0 ]; then + # Create a summary comment + COMMENT="## Style and Grammar Check Results\n\nI found $TOTAL_COUNT issues that could be improved:\n- $STYLE_COUNT style issues\n- $GRAMMAR_COUNT grammar issues\n\nEach issue has been added as a suggestion comment that you can directly commit or dismiss." + + # Post the comment to the PR + curl -s -X POST \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + -d "{\"body\":\"$COMMENT\"}" \ + "https://api.github.com/repos/${{ github.repository }}/issues/${{ env.PR_NUMBER }}/comments" + else + echo "No issues found." + fi diff --git a/.github/workflows/grammar-check.yml b/.github/workflows/grammar-check.yml new file mode 100644 index 0000000000..9361727bc5 --- /dev/null +++ b/.github/workflows/grammar-check.yml @@ -0,0 +1,193 @@ +name: Grammar Check + +permissions: + contents: read + pull-requests: write + +on: + workflow_dispatch: + inputs: + pr_number: + description: 'Pull Request Number' + required: true + type: string + +jobs: + grammar-check: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests markdown beautifulsoup4 + + - name: Fetch PR details + id: pr-details + run: | + PR_NUMBER=${{ github.event.inputs.pr_number }} + echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV + + # Get the PR details using GitHub API + PR_DATA=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/$PR_NUMBER") + + HEAD_SHA=$(echo "$PR_DATA" | jq -r .head.sha) + HEAD_REF=$(echo "$PR_DATA" | jq -r .head.ref) + BASE_SHA=$(echo "$PR_DATA" | jq -r .base.sha) + + echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV + echo "HEAD_REF=$HEAD_REF" >> $GITHUB_ENV + echo "BASE_SHA=$BASE_SHA" >> $GITHUB_ENV + + - name: Get changed files and their patch data + id: changed-files + run: | + # Get the list of files changed in the PR with patch data + PR_FILES=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/${{ env.PR_NUMBER }}/files") + + echo "$PR_FILES" > pr_files.json + + # Extract just the markdown files + MD_FILES=$(echo "$PR_FILES" | jq -r '.[] | select(.filename | endswith(".md") or endswith(".mdx")) | .filename') + + echo "Changed markdown files:" + echo "$MD_FILES" + echo "FILES<> $GITHUB_ENV + echo "$MD_FILES" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Check grammar and create suggestions + run: | + # Create a directory for storing file contents + mkdir -p temp_files + + # Process each changed file + echo "$FILES" | while read -r file; do + if [ -z "$file" ]; then + continue + fi + + echo "Processing $file" + + # Get file content from the PR head + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/contents/$file?ref=${{ env.HEAD_REF }}" | \ + jq -r '.content' | base64 --decode > "temp_files/$(basename "$file")" + + # Run grammar check on the file + python3 tools/grammar_check.py --file "temp_files/$(basename "$file")" --output "temp_files/$(basename "$file").grammar.json" + done + + # Combine all suggestions + echo "[]" > all_grammar_suggestions.json + for json_file in temp_files/*.grammar.json; do + if [ -f "$json_file" ]; then + # Merge JSON files + jq -s '.[0] + .[1]' all_grammar_suggestions.json "$json_file" > temp.json + mv temp.json all_grammar_suggestions.json + fi + done + + # Create individual comments for each suggestion + cat > create_grammar_comments.py << 'EOL' + import json + import requests + import os + import sys + import time + + # Load PR files data + with open('pr_files.json', 'r') as f: + pr_files = json.load(f) + + # Create a map of file paths + file_paths = {} + for file_data in pr_files: + basename = os.path.basename(file_data['filename']) + file_paths[basename] = file_data['filename'] + + # Load grammar suggestions + with open('all_grammar_suggestions.json', 'r') as f: + suggestions = json.load(f) + + if not suggestions: + print("No grammar issues found.") + sys.exit(0) + + # Get environment variables + pr_number = os.environ.get('PR_NUMBER') + repo = os.environ.get('GITHUB_REPOSITORY') + github_token = os.environ.get('GITHUB_TOKEN') + + # Process each suggestion + for i, sugg in enumerate(suggestions): + # Get the full file path + basename = os.path.basename(sugg["file"]) + if basename in file_paths: + file_path = file_paths[basename] + else: + file_path = sugg["file"].replace('temp_files/', '') + + # Create a comment with suggestion + comment_body = f"**Grammar suggestion**: {sugg['reason']}\n\n```suggestion\n{sugg['suggested']}\n```" + + # Create the comment using GitHub API + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments" + headers = { + "Authorization": f"token {github_token}", + "Accept": "application/vnd.github.v3+json" + } + data = { + "body": comment_body, + "commit_id": os.environ.get('HEAD_SHA'), + "path": file_path, + "line": sugg["line"] + } + + print(f"Creating comment {i+1}/{len(suggestions)} for {file_path}:{sugg['line']}") + response = requests.post(url, headers=headers, json=data) + + if response.status_code >= 400: + print(f"Error creating comment: {response.status_code}") + print(response.text) + else: + print(f"Created comment: {response.status_code}") + + # Add a small delay to avoid rate limiting + time.sleep(1) + + print(f"Processed {len(suggestions)} grammar suggestions.") + EOL + + # Run the Python script with environment variables + GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} python3 create_grammar_comments.py + + - name: Create summary comment on PR + run: | + # Count suggestions + SUGGESTION_COUNT=$(jq length all_grammar_suggestions.json) + + if [ "$SUGGESTION_COUNT" -gt 0 ]; then + # Create a summary comment + COMMENT="## Grammar Check Results\n\nI found $SUGGESTION_COUNT grammar issues that could be improved.\n\nEach issue has been added as a suggestion comment that you can directly commit or dismiss." + + # Post the comment to the PR + curl -s -X POST \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + -d "{\"body\":\"$COMMENT\"}" \ + "https://api.github.com/repos/${{ github.repository }}/issues/${{ env.PR_NUMBER }}/comments" + else + echo "No grammar issues found." + fi diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml new file mode 100644 index 0000000000..c2be7bef35 --- /dev/null +++ b/.github/workflows/style-check.yml @@ -0,0 +1,194 @@ +name: Style Check + +permissions: + contents: read + pull-requests: write + +on: + workflow_dispatch: + inputs: + pr_number: + description: 'Pull Request Number' + required: true + type: string + +jobs: + style-check: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests spacy + python -m spacy download en_core_web_sm + + - name: Fetch PR details + id: pr-details + run: | + PR_NUMBER=${{ github.event.inputs.pr_number }} + echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV + + # Get the PR details using GitHub API + PR_DATA=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/$PR_NUMBER") + + HEAD_SHA=$(echo "$PR_DATA" | jq -r .head.sha) + HEAD_REF=$(echo "$PR_DATA" | jq -r .head.ref) + BASE_SHA=$(echo "$PR_DATA" | jq -r .base.sha) + + echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV + echo "HEAD_REF=$HEAD_REF" >> $GITHUB_ENV + echo "BASE_SHA=$BASE_SHA" >> $GITHUB_ENV + + - name: Get changed files and their patch data + id: changed-files + run: | + # Get the list of files changed in the PR with patch data + PR_FILES=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/${{ env.PR_NUMBER }}/files") + + echo "$PR_FILES" > pr_files.json + + # Extract just the markdown files + MD_FILES=$(echo "$PR_FILES" | jq -r '.[] | select(.filename | endswith(".md") or endswith(".mdx")) | .filename') + + echo "Changed markdown files:" + echo "$MD_FILES" + echo "FILES<> $GITHUB_ENV + echo "$MD_FILES" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Check style and create suggestions + run: | + # Create a directory for storing file contents + mkdir -p temp_files + + # Process each changed file + echo "$FILES" | while read -r file; do + if [ -z "$file" ]; then + continue + fi + + echo "Processing $file" + + # Get file content from the PR head + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/contents/$file?ref=${{ env.HEAD_REF }}" | \ + jq -r '.content' | base64 --decode > "temp_files/$(basename "$file")" + + # Run style check on the file + python3 tools/enhanced_style_check.py --file "temp_files/$(basename "$file")" --output "temp_files/$(basename "$file").json" + done + + # Combine all suggestions + echo "[]" > all_suggestions.json + for json_file in temp_files/*.json; do + if [ -f "$json_file" ]; then + # Merge JSON files + jq -s '.[0] + .[1]' all_suggestions.json "$json_file" > temp.json + mv temp.json all_suggestions.json + fi + done + + # Create individual comments for each suggestion + cat > create_comments.py << 'EOL' + import json + import requests + import os + import sys + import time + + # Load PR files data + with open('pr_files.json', 'r') as f: + pr_files = json.load(f) + + # Create a map of file paths + file_paths = {} + for file_data in pr_files: + basename = os.path.basename(file_data['filename']) + file_paths[basename] = file_data['filename'] + + # Load style suggestions + with open('all_suggestions.json', 'r') as f: + suggestions = json.load(f) + + if not suggestions: + print("No style issues found.") + sys.exit(0) + + # Get environment variables + pr_number = os.environ.get('PR_NUMBER') + repo = os.environ.get('GITHUB_REPOSITORY') + github_token = os.environ.get('GITHUB_TOKEN') + + # Process each suggestion + for i, sugg in enumerate(suggestions): + # Get the full file path + basename = os.path.basename(sugg["file"]) + if basename in file_paths: + file_path = file_paths[basename] + else: + file_path = sugg["file"].replace('temp_files/', '') + + # Create a comment with suggestion + comment_body = f"**Style suggestion**: {sugg['reason']}\n\n```suggestion\n{sugg['suggested']}\n```" + + # Create the comment using GitHub API + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments" + headers = { + "Authorization": f"token {github_token}", + "Accept": "application/vnd.github.v3+json" + } + data = { + "body": comment_body, + "commit_id": os.environ.get('HEAD_SHA'), + "path": file_path, + "line": sugg["line"] + } + + print(f"Creating comment {i+1}/{len(suggestions)} for {file_path}:{sugg['line']}") + response = requests.post(url, headers=headers, json=data) + + if response.status_code >= 400: + print(f"Error creating comment: {response.status_code}") + print(response.text) + else: + print(f"Created comment: {response.status_code}") + + # Add a small delay to avoid rate limiting + time.sleep(1) + + print(f"Processed {len(suggestions)} style suggestions.") + EOL + + # Run the Python script with environment variables + GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} python3 create_comments.py + + - name: Create summary comment on PR + run: | + # Count suggestions + SUGGESTION_COUNT=$(jq length all_suggestions.json) + + if [ "$SUGGESTION_COUNT" -gt 0 ]; then + # Create a summary comment + COMMENT="## Style Check Results\n\nI found $SUGGESTION_COUNT style issues that could be improved based on our writing guidelines.\n\nEach issue has been added as a suggestion comment that you can directly commit or dismiss." + + # Post the comment to the PR + curl -s -X POST \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + -d "{\"body\":\"$COMMENT\"}" \ + "https://api.github.com/repos/${{ github.repository }}/issues/${{ env.PR_NUMBER }}/comments" + else + echo "No style issues found." + fi diff --git a/content/learning-paths/cross-platform/example-style-test/index.md b/content/learning-paths/cross-platform/example-style-test/index.md new file mode 100644 index 0000000000..e8272a7c34 --- /dev/null +++ b/content/learning-paths/cross-platform/example-style-test/index.md @@ -0,0 +1,57 @@ +--- +title: "Utilizing Amazon Q for Arm Development" +weight: 1 +layout: "learningpathall" +--- + +## Overview + +In order to optimize your development workflow on Arm architecture, you should understand how to utilize Amazon Q effectively. This learning path will guide you through the process of setting up and utilizing Amazon Q for Arm-based development projects. + +## Learning Objectives + +At this point in time, after completing this learning path, you will be able to: + +- Utilize Amazon Q to optimize code for Arm architecture +- Understand how the data is processed by Amazon Q +- Configure Amazon Q for the purpose of Arm development +- Implement best practices prior to deploying your application + +## Prerequisites + +Please note that before starting this learning path, you should have: + +- Basic knowledge of Arm architecture +- An AWS account with access to Amazon Q +- Due to the fact that Amazon Q requires authentication, you will need appropriate credentials + +## Setting Up Amazon Q + +To utilize Amazon Q for Arm development, follow these steps: + +1. Log in to your AWS account +2. Navigate to the Amazon Q service +3. The system optimizes the interface automatically based on your previous usage +4. Configure your preferences with regard to Arm development + +A large number of developers have found that Amazon Q can significantly enhance productivity when working with Arm-based systems. + +## Optimizing Code with Amazon Q + +Amazon Q is able to analyze your code and provide suggestions for optimization. The tool utilizes machine learning algorithms to identify areas where performance can be optimized. + +Subsequent to analyzing your code, Amazon Q will provide recommendations that you should implement to improve performance. + +## Best Practices + +In spite of Amazon Q's capabilities, it's important to follow these best practices: + +1. Always review the optimized code before deployment +2. Test the optimized code on actual Arm hardware +3. A majority of performance issues can be resolved by following Amazon Q's recommendations + +## Conclusion + +By utilizing Amazon Q for your Arm development projects, we can optimize your workflow and improve code quality. The tool was designed to help developers write more efficient code for Arm architecture. + +With respect to future updates, the Amazon Q team is continuously working to enhance the service's capabilities for Arm development. diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000000..03652935ac --- /dev/null +++ b/tools/README.md @@ -0,0 +1,161 @@ +# Style and Grammar Checking Tools for Arm Learning Paths + +This directory contains tools for checking and enforcing the writing style guidelines and grammar for Arm Learning Paths content. + +## Available Tools + +1. **enhanced_style_check.py**: Check markdown files against writing style guidelines and provide suggestions. Uses both rule-based checks and NLP-based passive voice detection. +2. **grammar_check.py**: Check markdown files for grammar issues using the LanguageTool API. +3. **combined_check.py**: Run both style and grammar checks and combine the results. +4. **github_suggestion.py**: Format style suggestions as GitHub review comments. +5. **style_rules.json**: JSON file containing style rules and patterns. + +## Local Testing + +### Prerequisites + +For basic functionality, no special dependencies are required. + +For advanced passive voice detection, you'll need: +- spaCy library +- English language model for spaCy + +For grammar checking, you'll need: +- requests +- markdown +- beautifulsoup4 + +You can install these with: +```bash +pip install spacy requests markdown beautifulsoup4 +python -m spacy download en_core_web_sm +``` + +Or use the built-in installer for spaCy: +```bash +python3 tools/enhanced_style_check.py --install-spacy +``` + +### Usage + +#### Check style for a single file: + +```bash +python3 tools/enhanced_style_check.py --file path/to/file.md +``` + +#### Check grammar for a single file: + +```bash +python3 tools/grammar_check.py --file path/to/file.md +``` + +#### Run combined style and grammar check: + +```bash +python3 tools/combined_check.py --file path/to/file.md +``` + +#### Check all markdown files in a directory: + +```bash +python3 tools/combined_check.py --dir path/to/directory +``` + +## GitHub Actions Integration + +The style and grammar checkers are integrated with GitHub Actions and can be run manually on a PR: + +1. Go to the "Actions" tab in the GitHub repository +2. Select one of the following workflows: + - "Style Check" - for style issues only + - "Grammar Check" - for grammar issues only + - "Combined Style and Grammar Check" - for both style and grammar issues +3. Click "Run workflow" +4. Enter the PR number and click "Run workflow" + +The workflow will check the PR content and add review comments with suggestions that you can directly commit or dismiss. + +## Style Rules + +Style rules are defined in `style_rules.json`. Each rule has: + +- **pattern**: Regular expression pattern to match +- **replacement**: Text to replace the matched pattern +- **reason**: Explanation for the suggestion + +To add or modify rules, edit the `style_rules.json` file. + +### Types of Style Checks + +The style checker performs several types of checks: + +1. **Word Choice**: Replaces complex words with simpler alternatives (e.g., "utilize" → "use") +2. **Passive Voice**: Converts passive voice to active voice for clarity and directness +3. **Wordiness**: Simplifies wordy phrases (e.g., "in order to" → "to") +4. **Tone**: Replaces "we" with "you" to address the reader directly +5. **Neutrality**: Replaces phrases like "we recommend" with "it is recommended" for a more neutral tone + +## Passive Voice Detection + +The style checker uses two methods to detect passive voice: + +1. **spaCy NLP (Advanced)**: Uses spaCy's dependency parsing to identify passive voice constructions and suggest active voice alternatives. This method is more accurate but requires additional dependencies. + +2. **Regular Expressions (Basic)**: Falls back to regex patterns if spaCy is not available. This method is less accurate but has no dependencies. + +## Grammar Checking + +The grammar checker uses the LanguageTool API to identify grammar, spelling, and style issues. It: + +1. Extracts plain text from markdown files +2. Sends the text to the LanguageTool API +3. Maps the API responses back to the original markdown lines +4. Generates suggestions for fixing the issues + +## Examples + +### Word Choice Example: +Input: +```markdown +In order to utilize this feature, you should follow these steps. +``` + +Output suggestion: +``` +Use 'use' instead of 'utilize' for simplicity. + +```suggestion +To use this feature, follow these steps. +``` +``` + +### Passive Voice Example: +Input: +```markdown +The data is processed by the system. +``` + +Output suggestion: +``` +Convert passive voice to active voice for clarity and directness. + +```suggestion +The system processes the data. +``` +``` + +### Grammar Example: +Input: +```markdown +The system have many features. +``` + +Output suggestion: +``` +Grammar: Subject-verb agreement error: The subject 'system' is singular, so the verb should be 'has'. + +```suggestion +The system has many features. +``` +``` diff --git a/tools/combine_results.py b/tools/combine_results.py new file mode 100755 index 0000000000..852e377ecd --- /dev/null +++ b/tools/combine_results.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +""" +Helper script to combine style and grammar check results. +""" + +import json +import os +import sys + +def combine_results(style_file, grammar_file, output_file): + """Combine style and grammar check results.""" + # Load style suggestions + style_suggestions = [] + if os.path.exists(style_file): + with open(style_file, "r") as f: + style_suggestions = json.load(f) + + # Load grammar suggestions + grammar_suggestions = [] + if os.path.exists(grammar_file): + with open(grammar_file, "r") as f: + grammar_suggestions = json.load(f) + + # Combine suggestions + combined_suggestions = style_suggestions + grammar_suggestions + + # Sort by line number + combined_suggestions.sort(key=lambda x: x["line"]) + + # Save combined suggestions + with open(output_file, "w") as f: + json.dump(combined_suggestions, f, indent=2) + + print(f"Combined {len(style_suggestions)} style suggestions and {len(grammar_suggestions)} grammar suggestions.") + return len(combined_suggestions) + +if __name__ == "__main__": + if len(sys.argv) != 4: + print("Usage: python combine_results.py ") + sys.exit(1) + + style_file = sys.argv[1] + grammar_file = sys.argv[2] + output_file = sys.argv[3] + + combine_results(style_file, grammar_file, output_file) diff --git a/tools/combined_check.py b/tools/combined_check.py new file mode 100755 index 0000000000..d9bf09a300 --- /dev/null +++ b/tools/combined_check.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +""" +Combined style and grammar checker for Arm Learning Paths content. +This script runs both style and grammar checks and combines the results. +""" + +import argparse +import json +import os +import sys +import subprocess +from pathlib import Path + +def run_style_check(file_path, rules_file, output_file): + """Run the style checker on a file.""" + cmd = [ + "python3", "tools/enhanced_style_check.py", + "--file", file_path, + "--rules", rules_file, + "--output", output_file + ] + + try: + subprocess.run(cmd, check=True) + return True + except subprocess.CalledProcessError: + print(f"Error running style check on {file_path}") + return False + +def run_grammar_check(file_path, output_file): + """Run the grammar checker on a file.""" + cmd = [ + "python3", "tools/grammar_check.py", + "--file", file_path, + "--output", output_file + ] + + try: + subprocess.run(cmd, check=True) + return True + except subprocess.CalledProcessError: + print(f"Error running grammar check on {file_path}") + return False + +def combine_suggestions(style_file, grammar_file, output_file): + """Combine style and grammar suggestions into a single file.""" + style_suggestions = [] + grammar_suggestions = [] + + # Load style suggestions + if os.path.exists(style_file): + with open(style_file, "r") as f: + style_suggestions = json.load(f) + + # Load grammar suggestions + if os.path.exists(grammar_file): + with open(grammar_file, "r") as f: + grammar_suggestions = json.load(f) + + # Combine suggestions + combined_suggestions = style_suggestions + grammar_suggestions + + # Sort by file and line number + combined_suggestions.sort(key=lambda x: (x["file"], x["line"])) + + # Save combined suggestions + with open(output_file, "w") as f: + json.dump(combined_suggestions, f, indent=2) + + return len(combined_suggestions) + +def print_summary(style_count, grammar_count, combined_count): + """Print a summary of the checks.""" + print("\nCheck Summary:") + print("=" * 80) + print(f"Style issues found: {style_count}") + print(f"Grammar issues found: {grammar_count}") + print(f"Total issues found: {combined_count}") + print("=" * 80) + +def main(): + parser = argparse.ArgumentParser(description="Run combined style and grammar checks") + parser.add_argument("--file", help="Path to a specific markdown file to check") + parser.add_argument("--dir", help="Directory containing markdown files to check") + parser.add_argument("--rules", default="tools/style_rules.json", help="JSON file containing style rules") + parser.add_argument("--output", default="combined_suggestions.json", help="Output file for combined suggestions") + args = parser.parse_args() + + if not args.file and not args.dir: + print("Error: Please provide either --file or --dir argument") + sys.exit(1) + + style_count = 0 + grammar_count = 0 + combined_count = 0 + + # Check a specific file + if args.file: + if not os.path.isfile(args.file): + print(f"Error: File not found: {args.file}") + sys.exit(1) + + if not args.file.endswith((".md", ".mdx")): + print(f"Warning: {args.file} is not a markdown file. Checking anyway.") + + # Run style check + style_output = f"{args.file}.style.json" + if run_style_check(args.file, args.rules, style_output): + with open(style_output, "r") as f: + style_suggestions = json.load(f) + style_count = len(style_suggestions) + + # Run grammar check + grammar_output = f"{args.file}.grammar.json" + if run_grammar_check(args.file, grammar_output): + with open(grammar_output, "r") as f: + grammar_suggestions = json.load(f) + grammar_count = len(grammar_suggestions) + + # Combine results + combined_count = combine_suggestions(style_output, grammar_output, args.output) + + # Clean up temporary files + if os.path.exists(style_output): + os.remove(style_output) + if os.path.exists(grammar_output): + os.remove(grammar_output) + + # Check all markdown files in a directory + if args.dir: + if not os.path.isdir(args.dir): + print(f"Error: Directory not found: {args.dir}") + sys.exit(1) + + all_style_suggestions = [] + all_grammar_suggestions = [] + + for root, _, files in os.walk(args.dir): + for file in files: + if file.endswith((".md", ".mdx")): + file_path = os.path.join(root, file) + + # Run style check + style_output = f"{file_path}.style.json" + if run_style_check(file_path, args.rules, style_output): + with open(style_output, "r") as f: + style_suggestions = json.load(f) + all_style_suggestions.extend(style_suggestions) + + # Run grammar check + grammar_output = f"{file_path}.grammar.json" + if run_grammar_check(file_path, grammar_output): + with open(grammar_output, "r") as f: + grammar_suggestions = json.load(f) + all_grammar_suggestions.extend(grammar_suggestions) + + # Clean up temporary files + if os.path.exists(style_output): + os.remove(style_output) + if os.path.exists(grammar_output): + os.remove(grammar_output) + + # Save all suggestions + style_output = "all_style_suggestions.json" + with open(style_output, "w") as f: + json.dump(all_style_suggestions, f, indent=2) + + grammar_output = "all_grammar_suggestions.json" + with open(grammar_output, "w") as f: + json.dump(all_grammar_suggestions, f, indent=2) + + # Combine results + combined_count = combine_suggestions(style_output, grammar_output, args.output) + + style_count = len(all_style_suggestions) + grammar_count = len(all_grammar_suggestions) + + # Clean up temporary files + if os.path.exists(style_output): + os.remove(style_output) + if os.path.exists(grammar_output): + os.remove(grammar_output) + + # Print summary + print_summary(style_count, grammar_count, combined_count) + print(f"Combined suggestions saved to {args.output}") + +if __name__ == "__main__": + main() diff --git a/tools/enhanced_style_check.py b/tools/enhanced_style_check.py new file mode 100755 index 0000000000..d7d8c6c8e9 --- /dev/null +++ b/tools/enhanced_style_check.py @@ -0,0 +1,389 @@ +#!/usr/bin/env python3 +""" +Enhanced style checker for Arm Learning Paths content. +This script checks markdown files against writing style guidelines from a JSON file +and uses spaCy for passive voice detection. +""" + +import argparse +import json +import os +import re +import sys +from pathlib import Path + +# Import spaCy if available +try: + import spacy + SPACY_AVAILABLE = True + # Try to load the English model + try: + nlp = spacy.load("en_core_web_sm") + except: + print("Warning: spaCy model 'en_core_web_sm' not found. Will try to download it.") + try: + from spacy.cli import download + download("en_core_web_sm") + nlp = spacy.load("en_core_web_sm") + print("Successfully downloaded and loaded spaCy model.") + except: + print("Error: Could not download spaCy model. Passive voice detection will be limited.") + SPACY_AVAILABLE = False +except ImportError: + print("Warning: spaCy not installed. Using basic passive voice detection.") + SPACY_AVAILABLE = False + +def load_style_rules(rules_file): + """Load style rules from a JSON file.""" + try: + with open(rules_file, "r", encoding="utf-8") as f: + return json.load(f) + except Exception as e: + print(f"Error loading style rules: {e}") + return [] + +def is_in_code_block(lines, line_index): + """Check if the line is within a code block.""" + code_block_count = 0 + for i in range(line_index): + if re.match(r'^```', lines[i]): + code_block_count += 1 + + return code_block_count % 2 == 1 # Odd count means inside a code block + +def is_in_yaml_frontmatter(lines, line_index): + """Check if the line is within YAML frontmatter.""" + if line_index == 0 and lines[0].strip() == '---': + return True + + frontmatter_markers = 0 + for i in range(line_index): + if lines[i].strip() == '---': + frontmatter_markers += 1 + + # If we've seen an odd number of markers, we're in frontmatter + return frontmatter_markers % 2 == 1 + +def capitalize_if_at_start(original, replacement, match): + """Capitalize the replacement if it's at the start of a sentence.""" + # Check if the match is at the start of the string or after a period and space + start_of_sentence = match.start() == 0 or (match.start() > 1 and original[match.start()-2:match.start()] == '. ') + + if start_of_sentence and replacement and replacement[0].islower(): + return replacement[0].upper() + replacement[1:] + return replacement + +def detect_passive_voice_with_spacy(text): + """ + Detect passive voice using spaCy's dependency parsing. + Returns a list of (passive_text, suggested_active) tuples. + """ + if not SPACY_AVAILABLE: + return [] + + doc = nlp(text) + passive_constructions = [] + + for token in doc: + # Look for passive auxiliary verbs + if token.dep_ == "auxpass": + # Find the main verb + verb = token.head + + # Find the subject (usually nsubjpass) + subject = None + for child in verb.children: + if child.dep_ == "nsubjpass": + subject = child + break + + # Find the agent (usually introduced by "by") + agent = None + for child in verb.children: + if child.dep_ == "agent": + for agent_child in child.children: + if agent_child.dep_ == "pobj": + agent = agent_child + break + break + + # If we have both subject and agent, we can suggest an active voice alternative + if subject and agent: + # Extract the spans of text + passive_span = doc[max(0, subject.i - 1):min(len(doc), verb.i + 2)] + if agent.i > verb.i: + passive_span = doc[max(0, subject.i - 1):min(len(doc), agent.i + 1)] + + # Create active voice suggestion + active_suggestion = f"{agent.text} {verb.lemma_} {subject.text}" + + # Capitalize if at start of sentence + if passive_span.start == 0 or (passive_span.start > 1 and doc[passive_span.start-2].text == '.'): + active_suggestion = active_suggestion[0].upper() + active_suggestion[1:] + + passive_constructions.append((passive_span.text, active_suggestion)) + + return passive_constructions + +def fix_passive_voice(line): + """ + Fix passive voice constructions by swapping subject and object. + This is a more sophisticated approach than simple pattern replacement. + """ + # Common passive voice patterns with specific replacements + passive_patterns = [ + (r'The data is processed by the system', r'The system processes the data'), + (r'The code is handled by the compiler', r'The compiler handles the code'), + (r'The configuration was managed by the user', r'The user managed the configuration'), + (r'The documentation was created by the team', r'The team created the documentation'), + (r'The results are generated by the algorithm', r'The algorithm generates the results'), + (r'The API is provided by the service', r'The service provides the API') + ] + + # Try each specific pattern first + for pattern, replacement in passive_patterns: + if re.search(pattern, line, re.IGNORECASE): + return re.sub(pattern, replacement, line, flags=re.IGNORECASE) + + # Generic patterns as fallback + generic_patterns = [ + # Present tense passive + (r'(\w+) is (\w+ed) by (\w+)', r'\3 \2s \1'), + (r'(\w+) are (\w+ed) by (\w+)', r'\3 \2 \1'), + # Past tense passive + (r'(\w+) was (\w+ed) by (\w+)', r'\3 \2 \1'), + (r'(\w+) were (\w+ed) by (\w+)', r'\3 \2 \1') + ] + + for pattern, replacement in generic_patterns: + if re.search(pattern, line): + return re.sub(pattern, replacement, line) + + return line + +def check_style(content, file_path, style_rules): + """Check content against style rules and return suggestions.""" + suggestions = [] + lines = content.split("\n") + + # First, check for passive voice using spaCy if available + if SPACY_AVAILABLE: + # Process each paragraph separately to maintain context + paragraphs = [] + current_paragraph = [] + + for i, line in enumerate(lines): + # Skip code blocks, YAML frontmatter, headings, and links + if (is_in_code_block(lines, i) or + is_in_yaml_frontmatter(lines, i) or + re.match(r'^#+\s', line) or + re.search(r'^\s*\[.*\]:\s*', line)): + # End the current paragraph if any + if current_paragraph: + paragraphs.append((current_paragraph[0], " ".join(current_paragraph[1]))) + current_paragraph = [] + continue + + # Skip empty lines - they end paragraphs + if not line.strip(): + if current_paragraph: + paragraphs.append((current_paragraph[0], " ".join(current_paragraph[1]))) + current_paragraph = [] + continue + + # Add line to current paragraph + if not current_paragraph: + current_paragraph = [i, [line]] + else: + current_paragraph[1].append(line) + + # Add the last paragraph if any + if current_paragraph: + paragraphs.append((current_paragraph[0], " ".join(current_paragraph[1]))) + + # Check each paragraph for passive voice + for start_line, paragraph_text in paragraphs: + passive_constructions = detect_passive_voice_with_spacy(paragraph_text) + + for passive_text, active_suggestion in passive_constructions: + # Find which line contains this passive construction + line_offset = 0 + for j, line in enumerate(lines[start_line:start_line + 10]): # Look at next 10 lines max + if passive_text in line: + line_index = start_line + j + suggestions.append({ + "file": file_path, + "line": line_index + 1, # 1-based line numbers + "original": line, + "suggested": line.replace(passive_text, active_suggestion), + "reason": "Convert passive voice to active voice for clarity and directness (detected by spaCy)." + }) + break + + # Then check each line against style rules + for i, line in enumerate(lines): + # Skip code blocks and YAML frontmatter + if is_in_code_block(lines, i) or is_in_yaml_frontmatter(lines, i): + continue + + # Skip headings (lines starting with #) + if re.match(r'^#+\s', line): + continue + + # Skip links and image references + if re.search(r'^\s*\[.*\]:\s*', line): + continue + + # Check for passive voice using regex (as fallback) + if not SPACY_AVAILABLE: + passive_patterns = [ + r'\b(?:is|are|was|were)\s+\w+ed\s+by\b', + r'\b(?:is|are|was|were)\s+(?:handled|managed|created|generated|provided)\s+by\b' + ] + + for pattern in passive_patterns: + if re.search(pattern, line, re.IGNORECASE): + # Try to fix passive voice + fixed_line = fix_passive_voice(line) + if fixed_line != line: + suggestions.append({ + "file": file_path, + "line": i + 1, + "original": line, + "suggested": fixed_line, + "reason": "Convert passive voice to active voice for clarity and directness." + }) + # Only one suggestion per line to avoid conflicts + break + + # If we already have a suggestion for this line, skip further checks + if any(sugg["line"] == i + 1 for sugg in suggestions): + continue + + # Check against other style rules + for rule in style_rules: + matches = list(re.finditer(rule["pattern"], line, re.IGNORECASE)) + for match in matches: + # Create a suggestion + original = line + + # Get the matched text + matched_text = match.group(0) + + # Determine if replacement should be capitalized + replacement = rule["replacement"] + if match.start() == 0 or (match.start() >= 2 and line[match.start()-2:match.start()] == '. '): + if replacement and replacement[0].islower(): + replacement = replacement[0].upper() + replacement[1:] + + # Apply the replacement + suggested = line[:match.start()] + replacement + line[match.end():] + + if original != suggested: + suggestions.append({ + "file": file_path, + "line": i + 1, + "original": original, + "suggested": suggested, + "reason": rule["reason"], + }) + # Only one suggestion per line to avoid conflicts + break + + return suggestions + +def save_suggestions_to_file(suggestions, output_file="style_suggestions.json"): + """Save suggestions to a JSON file.""" + with open(output_file, "w") as f: + json.dump(suggestions, f, indent=2) + print(f"Saved suggestions to {output_file}") + +def print_suggestions(suggestions): + """Print suggestions in a readable format.""" + if not suggestions: + print("No style issues found.") + return + + print(f"\nFound {len(suggestions)} style issues:") + print("=" * 80) + + for i, sugg in enumerate(suggestions, 1): + print(f"Issue {i}:") + print(f"File: {sugg['file']}") + print(f"Line: {sugg['line']}") + print(f"Reason: {sugg['reason']}") + print(f"Original: {sugg['original']}") + print(f"Suggested: {sugg['suggested']}") + print("-" * 80) + +def main(): + parser = argparse.ArgumentParser(description="Check markdown files for style issues") + parser.add_argument("--file", help="Path to a specific markdown file to check") + parser.add_argument("--dir", help="Directory containing markdown files to check") + parser.add_argument("--rules", default="tools/style_rules.json", help="JSON file containing style rules") + parser.add_argument("--output", default="style_suggestions.json", help="Output file for suggestions") + parser.add_argument("--install-spacy", action="store_true", help="Install spaCy and download the English model") + args = parser.parse_args() + + # Install spaCy if requested + if args.install_spacy: + print("Installing spaCy and downloading the English model...") + import subprocess + subprocess.call([sys.executable, "-m", "pip", "install", "spacy"]) + subprocess.call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) + print("Installation complete. Please run the script again without --install-spacy.") + sys.exit(0) + + if not args.file and not args.dir: + print("Error: Please provide either --file or --dir argument") + sys.exit(1) + + # Load style rules + style_rules = load_style_rules(args.rules) + if not style_rules: + print("Error: No style rules loaded. Check the rules file.") + sys.exit(1) + + print(f"Loaded {len(style_rules)} style rules from {args.rules}") + + all_suggestions = [] + + # Check a specific file + if args.file: + if not os.path.isfile(args.file): + print(f"Error: File not found: {args.file}") + sys.exit(1) + + if not args.file.endswith((".md", ".mdx")): + print(f"Warning: {args.file} is not a markdown file. Checking anyway.") + + with open(args.file, "r", encoding="utf-8") as f: + content = f.read() + + suggestions = check_style(content, args.file, style_rules) + all_suggestions.extend(suggestions) + print(f"Checked {args.file}: Found {len(suggestions)} style issues") + + # Check all markdown files in a directory + if args.dir: + if not os.path.isdir(args.dir): + print(f"Error: Directory not found: {args.dir}") + sys.exit(1) + + for root, _, files in os.walk(args.dir): + for file in files: + if file.endswith((".md", ".mdx")): + file_path = os.path.join(root, file) + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + suggestions = check_style(content, file_path, style_rules) + all_suggestions.extend(suggestions) + print(f"Checked {file_path}: Found {len(suggestions)} style issues") + + # Print and save suggestions + print_suggestions(all_suggestions) + save_suggestions_to_file(all_suggestions, args.output) + +if __name__ == "__main__": + main() diff --git a/tools/github_suggestion.py b/tools/github_suggestion.py new file mode 100755 index 0000000000..ba13086767 --- /dev/null +++ b/tools/github_suggestion.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +GitHub suggestion formatter for Arm Learning Paths style checker. +This script takes the style suggestions JSON file and formats them as GitHub +review comments with proper suggestion syntax. +""" + +import argparse +import json +import os +import sys +from pathlib import Path + +def format_github_suggestions(suggestions): + """Format suggestions as GitHub review comments.""" + github_comments = [] + + # Group suggestions by file + file_suggestions = {} + for sugg in suggestions: + file_path = sugg["file"] + if file_path not in file_suggestions: + file_suggestions[file_path] = [] + file_suggestions[file_path].append(sugg) + + # Format suggestions for each file + for file_path, file_suggs in file_suggestions.items(): + print(f"\nSuggestions for {file_path}:") + print("=" * 80) + + for sugg in file_suggs: + comment = ( + f"**Style suggestion**: {sugg['reason']}\n\n" + f"```suggestion\n{sugg['suggested']}\n```" + ) + github_comments.append({ + "path": file_path, + "line": sugg["line"], + "body": comment + }) + + print(f"Line {sugg['line']}:") + print(comment) + print("-" * 80) + + return github_comments + +def main(): + parser = argparse.ArgumentParser(description="Format style suggestions as GitHub review comments") + parser.add_argument("--input", default="style_suggestions.json", help="Input JSON file with style suggestions") + args = parser.parse_args() + + if not os.path.isfile(args.input): + print(f"Error: Input file not found: {args.input}") + sys.exit(1) + + with open(args.input, "r", encoding="utf-8") as f: + suggestions = json.load(f) + + if not suggestions: + print("No style suggestions found in the input file.") + sys.exit(0) + + github_comments = format_github_suggestions(suggestions) + + print(f"\nFormatted {len(github_comments)} GitHub review comments.") + print("\nThese comments can be used in the GitHub API to create review comments.") + print("In the GitHub Actions workflow, they will be submitted as a review on the PR.") + +if __name__ == "__main__": + main() diff --git a/tools/grammar_check.py b/tools/grammar_check.py new file mode 100755 index 0000000000..aeabd24cc4 --- /dev/null +++ b/tools/grammar_check.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" +Grammar checker for Arm Learning Paths content. +This script checks markdown files for grammar issues using the LanguageTool API. +""" + +import argparse +import json +import os +import re +import sys +import requests +from pathlib import Path +import html +import markdown +from bs4 import BeautifulSoup + +def is_in_code_block(lines, line_index): + """Check if the line is within a code block.""" + code_block_count = 0 + for i in range(line_index): + if re.match(r'^```', lines[i]): + code_block_count += 1 + + return code_block_count % 2 == 1 # Odd count means inside a code block + +def is_in_yaml_frontmatter(lines, line_index): + """Check if the line is within YAML frontmatter.""" + if line_index == 0 and lines[0].strip() == '---': + return True + + frontmatter_markers = 0 + for i in range(line_index): + if lines[i].strip() == '---': + frontmatter_markers += 1 + + # If we've seen an odd number of markers, we're in frontmatter + return frontmatter_markers % 2 == 1 + +def extract_text_from_markdown(content): + """Extract plain text from markdown, preserving line numbers.""" + lines = content.split('\n') + text_lines = [] + line_map = {} # Maps text line numbers to original markdown line numbers + + current_text_line = 0 + + for i, line in enumerate(lines): + # Skip code blocks, YAML frontmatter, and other non-prose content + if (is_in_code_block(lines, i) or + is_in_yaml_frontmatter(lines, i) or + re.match(r'^#+\s', line) or # Skip headings + re.match(r'^[*-]\s', line) or # Skip list items + re.match(r'^\s*```', line) or # Skip code block markers + re.match(r'^\s*\[.*\]:\s*', line) or # Skip link references + not line.strip()): # Skip empty lines + continue + + # Convert markdown to plain text for this line + html_content = markdown.markdown(line) + soup = BeautifulSoup(html_content, 'html.parser') + text_line = soup.get_text() + + if text_line.strip(): + text_lines.append(text_line) + line_map[current_text_line] = i + current_text_line += 1 + + return '\n'.join(text_lines), line_map + +def check_grammar(content, file_path): + """Check content for grammar issues using LanguageTool API.""" + # Extract plain text from markdown + text, line_map = extract_text_from_markdown(content) + + if not text.strip(): + return [] + + # Call LanguageTool API + url = "https://api.languagetool.org/v2/check" + params = { + 'text': text, + 'language': 'en-US', + 'enabledOnly': 'false' + } + + try: + response = requests.post(url, data=params) + response.raise_for_status() # Raise exception for HTTP errors + result = response.json() + except requests.exceptions.RequestException as e: + print(f"Error calling LanguageTool API: {e}") + return [] + + # Process results + suggestions = [] + lines = content.split('\n') + + for match in result.get('matches', []): + # Get the offset in the plain text + offset = match.get('offset', 0) + length = match.get('length', 0) + + # Find the line number in the plain text + text_line_num = text[:offset].count('\n') + + # Map back to the original markdown line number + if text_line_num in line_map: + md_line_num = line_map[text_line_num] + + # Get the original line + original_line = lines[md_line_num] + + # Get the context from the match + context = match.get('context', {}) + context_text = context.get('text', '') + context_offset = context.get('offset', 0) + context_length = context.get('length', 0) + + # Extract the problematic text + error_text = context_text[context_offset:context_offset + context_length] + + # Get the suggested replacements + replacements = match.get('replacements', []) + suggested_text = replacements[0].get('value', '') if replacements else '' + + # Create a suggestion + if suggested_text and error_text in original_line: + suggested_line = original_line.replace(error_text, suggested_text) + + suggestions.append({ + "file": file_path, + "line": md_line_num + 1, # 1-based line numbers + "original": original_line, + "suggested": suggested_line, + "reason": f"Grammar: {match.get('message', 'Grammar issue')}", + }) + + return suggestions + +def save_suggestions_to_file(suggestions, output_file="grammar_suggestions.json"): + """Save suggestions to a JSON file.""" + with open(output_file, "w") as f: + json.dump(suggestions, f, indent=2) + print(f"Saved grammar suggestions to {output_file}") + +def print_suggestions(suggestions): + """Print suggestions in a readable format.""" + if not suggestions: + print("No grammar issues found.") + return + + print(f"\nFound {len(suggestions)} grammar issues:") + print("=" * 80) + + for i, sugg in enumerate(suggestions, 1): + print(f"Issue {i}:") + print(f"File: {sugg['file']}") + print(f"Line: {sugg['line']}") + print(f"Reason: {sugg['reason']}") + print(f"Original: {sugg['original']}") + print(f"Suggested: {sugg['suggested']}") + print("-" * 80) + +def main(): + parser = argparse.ArgumentParser(description="Check markdown files for grammar issues") + parser.add_argument("--file", help="Path to a specific markdown file to check") + parser.add_argument("--dir", help="Directory containing markdown files to check") + parser.add_argument("--output", default="grammar_suggestions.json", help="Output file for suggestions") + args = parser.parse_args() + + if not args.file and not args.dir: + print("Error: Please provide either --file or --dir argument") + sys.exit(1) + + all_suggestions = [] + + # Check a specific file + if args.file: + if not os.path.isfile(args.file): + print(f"Error: File not found: {args.file}") + sys.exit(1) + + if not args.file.endswith((".md", ".mdx")): + print(f"Warning: {args.file} is not a markdown file. Checking anyway.") + + with open(args.file, "r", encoding="utf-8") as f: + content = f.read() + + suggestions = check_grammar(content, args.file) + all_suggestions.extend(suggestions) + print(f"Checked {args.file}: Found {len(suggestions)} grammar issues") + + # Check all markdown files in a directory + if args.dir: + if not os.path.isdir(args.dir): + print(f"Error: Directory not found: {args.dir}") + sys.exit(1) + + for root, _, files in os.walk(args.dir): + for file in files: + if file.endswith((".md", ".mdx")): + file_path = os.path.join(root, file) + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + suggestions = check_grammar(content, file_path) + all_suggestions.extend(suggestions) + print(f"Checked {file_path}: Found {len(suggestions)} grammar issues") + + # Print and save suggestions + print_suggestions(all_suggestions) + save_suggestions_to_file(all_suggestions, args.output) + +if __name__ == "__main__": + main() diff --git a/tools/style_check.py b/tools/style_check.py new file mode 100755 index 0000000000..d10efae51a --- /dev/null +++ b/tools/style_check.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +""" +Style checker for Arm Learning Paths content. +This script checks PR content against writing style guidelines and provides +replacement suggestions as GitHub commit suggestions. +""" + +import argparse +import json +import os +import re +import sys +from pathlib import Path +import requests +import yaml +import markdown +from collections import defaultdict + +# Style rules based on Arm Learning Paths writing guidelines +STYLE_RULES = [ + { + "pattern": r"\b(?:utilize|utilization|utilizes|utilizing)\b", + "replacement": "use", + "reason": "Use 'use' instead of 'utilize' for simplicity." + }, + { + "pattern": r"\b(?:optimize|optimization|optimizes|optimizing)\b", + "replacement": "improve", + "reason": "Simplify language by replacing 'optimize' with 'improve'." + }, + { + "pattern": r"\bin order to\b", + "replacement": "to", + "reason": "Use 'to' instead of 'in order to' for conciseness." + }, + { + "pattern": r"\bplease note that\b", + "replacement": "", + "reason": "Remove 'please note that' for directness." + }, + { + "pattern": r"\bthe data (?:is|was) processed\b", + "replacement": "processed the data", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": r"\byou should\b", + "replacement": "", + "reason": "Be direct with instructions, avoid 'you should'." + }, + # Add more style rules based on the guidelines +] + +def get_changed_files(pr_number, repo, token): + """Get the list of files changed in the PR.""" + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files" + headers = {"Authorization": f"token {token}"} + response = requests.get(url, headers=headers) + response.raise_for_status() + + return [file_info for file_info in response.json() + if file_info["filename"].endswith((".md", ".mdx"))] + +def get_file_content(repo, file_path, ref, token): + """Get the content of a file from GitHub.""" + url = f"https://api.github.com/repos/{repo}/contents/{file_path}?ref={ref}" + headers = {"Authorization": f"token {token}"} + response = requests.get(url, headers=headers) + + if response.status_code == 404: + return None + + response.raise_for_status() + content = response.json() + + if content.get("type") == "file": + import base64 + return base64.b64decode(content["content"]).decode("utf-8") + + return None + +def check_style(content, file_path): + """Check content against style rules and return suggestions.""" + suggestions = [] + lines = content.split("\n") + + for i, line in enumerate(lines): + for rule in STYLE_RULES: + matches = re.finditer(rule["pattern"], line, re.IGNORECASE) + for match in matches: + # Skip code blocks + if is_in_code_block(lines, i): + continue + + # Create a suggestion + original = line + suggested = re.sub(rule["pattern"], rule["replacement"], line, flags=re.IGNORECASE) + + if original != suggested: + suggestions.append({ + "file": file_path, + "line": i + 1, + "original": original, + "suggested": suggested, + "reason": rule["reason"], + "file_content": content + }) + # Only one suggestion per line to avoid conflicts + break + + return suggestions + +def is_in_code_block(lines, line_index): + """Check if the line is within a code block.""" + code_block_count = 0 + for i in range(line_index): + if re.match(r'^```', lines[i]): + code_block_count += 1 + + return code_block_count % 2 == 1 # Odd count means inside a code block + +def create_review_comment(repo, pr_number, suggestions, head_sha, token): + """Create review comments with suggestions on GitHub PR.""" + # Group suggestions by file + file_suggestions = defaultdict(list) + for suggestion in suggestions: + file_suggestions[suggestion["file"]].append(suggestion) + + # Create review comments + review_comments = [] + for file_path, file_suggs in file_suggestions.items(): + for sugg in file_suggs: + comment = { + "path": sugg["file"], + "line": sugg["line"], + "body": f"**Style suggestion**: {sugg['reason']}\n\n" + f"```suggestion\n{sugg['suggested']}\n```" + } + review_comments.append(comment) + + # Submit the review + if review_comments: + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/reviews" + headers = {"Authorization": f"token {token}"} + data = { + "commit_id": head_sha, + "body": "## Style Check Results\n\nI found some style issues that could be improved based on our writing guidelines.", + "event": "COMMENT", + "comments": review_comments + } + + response = requests.post(url, headers=headers, json=data) + response.raise_for_status() + + print(f"Created {len(review_comments)} style suggestions.") + else: + print("No style issues found.") + +def save_suggestions_to_file(suggestions, output_file="style_suggestions.json"): + """Save suggestions to a JSON file for debugging.""" + with open(output_file, "w") as f: + json.dump(suggestions, f, indent=2) + print(f"Saved suggestions to {output_file}") + +def main(): + parser = argparse.ArgumentParser(description="Check PR content for style issues") + parser.add_argument("--pr", required=True, help="PR number") + parser.add_argument("--head-sha", required=True, help="Head commit SHA") + parser.add_argument("--repo", required=True, help="Repository name (owner/repo)") + parser.add_argument("--output", default="style_suggestions.json", help="Output file for suggestions") + args = parser.parse_args() + + token = os.environ.get("GITHUB_TOKEN") + if not token: + print("Error: GITHUB_TOKEN environment variable not set") + sys.exit(1) + + # Get changed files in the PR + try: + changed_files = get_changed_files(args.pr, args.repo, token) + except Exception as e: + print(f"Error getting changed files: {e}") + sys.exit(1) + + all_suggestions = [] + + # Check each changed file + for file_info in changed_files: + file_path = file_info["filename"] + print(f"Checking {file_path}...") + + try: + content = get_file_content(args.repo, file_path, args.head_sha, token) + if content: + suggestions = check_style(content, file_path) + all_suggestions.extend(suggestions) + print(f"Found {len(suggestions)} style issues in {file_path}") + except Exception as e: + print(f"Error processing {file_path}: {e}") + + # Save suggestions to file + save_suggestions_to_file(all_suggestions, args.output) + + # Create review comments + try: + create_review_comment(args.repo, args.pr, all_suggestions, args.head_sha, token) + except Exception as e: + print(f"Error creating review comments: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/tools/style_rules.json b/tools/style_rules.json new file mode 100644 index 0000000000..ff0383d5a7 --- /dev/null +++ b/tools/style_rules.json @@ -0,0 +1,312 @@ +[ + { + "pattern": "\\butilize\\b", + "replacement": "use", + "reason": "Use 'use' instead of 'utilize' for simplicity." + }, + { + "pattern": "\\butilizes\\b", + "replacement": "uses", + "reason": "Use 'uses' instead of 'utilizes' for simplicity." + }, + { + "pattern": "\\butilized\\b", + "replacement": "used", + "reason": "Use 'used' instead of 'utilized' for simplicity." + }, + { + "pattern": "\\butilizing\\b", + "replacement": "using", + "reason": "Use 'using' instead of 'utilizing' for simplicity." + }, + { + "pattern": "\\butilization\\b", + "replacement": "use", + "reason": "Use 'use' instead of 'utilization' for simplicity." + }, + { + "pattern": "\\boptimize\\b", + "replacement": "improve", + "reason": "Simplify language by replacing 'optimize' with 'improve'." + }, + { + "pattern": "\\boptimizes\\b", + "replacement": "improves", + "reason": "Simplify language by replacing 'optimizes' with 'improves'." + }, + { + "pattern": "\\boptimized\\b", + "replacement": "improved", + "reason": "Simplify language by replacing 'optimized' with 'improved'." + }, + { + "pattern": "\\boptimizing\\b", + "replacement": "improving", + "reason": "Simplify language by replacing 'optimizing' with 'improving'." + }, + { + "pattern": "\\boptimization\\b", + "replacement": "improvement", + "reason": "Simplify language by replacing 'optimization' with 'improvement'." + }, + { + "pattern": "\\boptimizations\\b", + "replacement": "improvements", + "reason": "Simplify language by replacing 'optimizations' with 'improvements'." + }, + { + "pattern": "\\bin order to\\b", + "replacement": "to", + "reason": "Use 'to' instead of 'in order to' for conciseness." + }, + { + "pattern": "\\bplease note that\\b", + "replacement": "", + "reason": "Remove 'please note that' for directness." + }, + { + "pattern": "\\byou should\\b", + "replacement": "", + "reason": "Be direct with instructions, avoid 'you should'." + }, + { + "pattern": "\\bis able to\\b", + "replacement": "can", + "reason": "Use 'can' instead of 'is able to' for simplicity." + }, + { + "pattern": "\\bare able to\\b", + "replacement": "can", + "reason": "Use 'can' instead of 'are able to' for simplicity." + }, + { + "pattern": "\\bwas able to\\b", + "replacement": "could", + "reason": "Use 'could' instead of 'was able to' for simplicity." + }, + { + "pattern": "\\bwere able to\\b", + "replacement": "could", + "reason": "Use 'could' instead of 'were able to' for simplicity." + }, + { + "pattern": "\\bdue to the fact that\\b", + "replacement": "because", + "reason": "Use 'because' instead of 'due to the fact that' for conciseness." + }, + { + "pattern": "\\bat this point in time\\b", + "replacement": "now", + "reason": "Use 'now' instead of 'at this point in time' for conciseness." + }, + { + "pattern": "\\bfor the purpose of\\b", + "replacement": "for", + "reason": "Use 'for' instead of 'for the purpose of' for conciseness." + }, + { + "pattern": "\\bprior to\\b", + "replacement": "before", + "reason": "Use 'before' instead of 'prior to' for simplicity." + }, + { + "pattern": "\\bsubsequent to\\b", + "replacement": "after", + "reason": "Use 'after' instead of 'subsequent to' for simplicity." + }, + { + "pattern": "\\ba large number of\\b", + "replacement": "many", + "reason": "Use 'many' instead of 'a large number of' for conciseness." + }, + { + "pattern": "\\ba majority of\\b", + "replacement": "most", + "reason": "Use 'most' instead of 'a majority of' for conciseness." + }, + { + "pattern": "\\bin spite of\\b", + "replacement": "despite", + "reason": "Use 'despite' instead of 'in spite of' for conciseness." + }, + { + "pattern": "\\bwith regard to\\b", + "replacement": "about", + "reason": "Use 'about' instead of 'with regard to' for conciseness." + }, + { + "pattern": "\\bwith respect to\\b", + "replacement": "about", + "reason": "Use 'about' instead of 'with respect to' for conciseness." + }, + { + "pattern": "\\bwe recommend\\b", + "replacement": "it is recommended", + "reason": "Use 'it is recommended' instead of 'we recommend' for a more neutral tone." + }, + { + "pattern": "\\bwe suggest\\b", + "replacement": "it is suggested", + "reason": "Use 'it is suggested' instead of 'we suggest' for a more neutral tone." + }, + { + "pattern": "\\bwe advise\\b", + "replacement": "it is advised", + "reason": "Use 'it is advised' instead of 'we advise' for a more neutral tone." + }, + { + "pattern": "\\bwe will\\b", + "replacement": "you will", + "reason": "Use 'you will' instead of 'we will' to address the reader directly." + }, + { + "pattern": "\\bwe can\\b", + "replacement": "you can", + "reason": "Use 'you can' instead of 'we can' to address the reader directly." + }, + { + "pattern": "\\bwe have\\b", + "replacement": "you have", + "reason": "Use 'you have' instead of 'we have' to address the reader directly." + }, + { + "pattern": "\\bwe are\\b", + "replacement": "you are", + "reason": "Use 'you are' instead of 'we are' to address the reader directly." + }, + { + "pattern": "\\bwe need to\\b", + "replacement": "you need to", + "reason": "Use 'you need to' instead of 'we need to' to address the reader directly." + }, + { + "pattern": "\\bwe must\\b", + "replacement": "you must", + "reason": "Use 'you must' instead of 'we must' to address the reader directly." + }, + { + "pattern": "\\bwe should\\b", + "replacement": "you should", + "reason": "Use 'you should' instead of 'we should' to address the reader directly." + }, + { + "pattern": "\\bis processed by\\b", + "replacement": "processes", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bare processed by\\b", + "replacement": "process", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwas processed by\\b", + "replacement": "processed", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwere processed by\\b", + "replacement": "processed", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bis handled by\\b", + "replacement": "handles", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bare handled by\\b", + "replacement": "handle", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwas handled by\\b", + "replacement": "handled", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwere handled by\\b", + "replacement": "handled", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bis managed by\\b", + "replacement": "manages", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bare managed by\\b", + "replacement": "manage", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwas managed by\\b", + "replacement": "managed", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwere managed by\\b", + "replacement": "managed", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bis created by\\b", + "replacement": "creates", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bare created by\\b", + "replacement": "create", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwas created by\\b", + "replacement": "created", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwere created by\\b", + "replacement": "created", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bis generated by\\b", + "replacement": "generates", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bare generated by\\b", + "replacement": "generate", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwas generated by\\b", + "replacement": "generated", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwere generated by\\b", + "replacement": "generated", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bis provided by\\b", + "replacement": "provides", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bare provided by\\b", + "replacement": "provide", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwas provided by\\b", + "replacement": "provided", + "reason": "Use active voice instead of passive voice." + }, + { + "pattern": "\\bwere provided by\\b", + "replacement": "provided", + "reason": "Use active voice instead of passive voice." + } +]