diff --git a/.github/codex/prompts/review_common.md b/.github/codex/prompts/review_common.md new file mode 100644 index 0000000000..3006f5416a --- /dev/null +++ b/.github/codex/prompts/review_common.md @@ -0,0 +1,89 @@ +# Code Review Instructions + +You are an expert software engineer conducting a code review. Review the provided pull request changes and respond in JSON format according to the schema. + +## Review Criteria + +### Critical Issues (Must Fix) + +##### 1. Security vulnerabilities +- Hardcoded secrets or credentials in code +- Unsafe use of eval/exec or system command execution +- Insecure deserialization (e.g., pickle) +- Missing input validation +- Unsafe file or system resource access + +#### 2. Bugs and logic errors +- NoneType errors (accessing attributes of None) +- Off-by-one errors in indexing or loops +- Race conditions in threading or async code +- Infinite loops +- Incorrect exception handling + +#### 3. Breaking changes +- Changes to function or method signatures +- Changes in behavior of functions or classes +- Removed or renamed public functions, classes, or modules +- Changes in the structure of returned data + +#### 4. Tensor/PyTorch Specific Issues +- **Hidden type conversions**: Implicit dtype/device casts without explicit `.to()` or `.type()` +- **Silent fallbacks**: Hidden fallback implementations or broad try/except that hide errors +- **Batch handling**: Missing support for batch dimension in tensor operations +- **Shape assumptions**: Hardcoded tensor shapes without validation or documentation +- **Device mismatches**: Operations between tensors on different devices without explicit movement + +### Warning Issues (Should Fix) + +#### 1. Code quality +- Code duplication (DRY violations) +- Magic numbers/strings +- Overly complex functions (cyclomatic complexity > 10) +- Deep nesting (more than 3 levels) +- Long functions (> 50 lines) + +#### 2. Best practices +- Missing error handling +- Poor naming conventions +- Inconsistent code style +- Lack of comments for complex logic +- Premature optimization + +#### 3. Testing gaps +- Missing unit tests for new functionality +- Missing edge cases in tests +- Low test coverage for changes +- Tests that don't actually assert anything + +#### 4. PyTorch Best Practices +- **Public APIs**: Data-related public APIs should use `torch.Tensor` as base type +- **Documentation**: Tensor shapes missing in docstrings (especially for batch-first semantics) +- **InputData/OutputData**: Missing tests for batch mode + multiple dtypes +- **Device handling**: Implicit assumptions about device placement +- **Gradient flow**: Missing `requires_grad` considerations for trainable parameters + +### Suggestions (Nice to Have) + +#### 1. Performance +- Inefficient algorithms +- Unnecessary database queries +- Missing caching opportunities +- Memory leaks + +#### 2. Maintainability +- Consider extracting reusable components +- Add more comprehensive documentation +- Improve separation of concerns +- Follow SOLID principles more closely + +#### 3. Future-proofing +- Add deprecation notices for removed features +- Consider backward compatibility +- Plan for scalability +- Add feature flags for risky changes + +#### 4. PyTorch Optimizations +- Use in-place operations where appropriate +- Consider `torch.jit.script` for performance-critical paths +- Batch operations instead of loops +- Use appropriate tensor layouts (contiguous vs strided) diff --git a/.github/codex/review_schema.json b/.github/codex/review_schema.json new file mode 100644 index 0000000000..42faf2ca30 --- /dev/null +++ b/.github/codex/review_schema.json @@ -0,0 +1,101 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["approved", "summary", "issues", "strengths", "metrics"], + + "properties": { + "approved": { + "type": "boolean", + "description": "set `true` if the PR is accepted to merge" + }, + + "summary": { + "type": "string", + "description": "make short comprehensive summary of the review" + }, + + "related_issue": { + "type": "integer", + "description": "print the number of the related issue" + }, + + "issues": { + "type": "array", + "items": { + "type": "object", + "required": ["severity", "category", "file", "title", "description", "suggestion"], + + "properties": { + "severity": { + "type": "string", + "enum": ["critical", "warning", "suggestion"] + }, + + "category": { + "type": "string", + "enum": [ + "security", "bug", "tensor", "api", "testing", + "quality", "performance", "documentation" + ] + }, + + "subcategory": { + "type": "string", + "enum": [ + "device_mismatch", "dtype_conversion", "batch_handling", + "hidden_fallback", "missing_tests", "shape_documentation" + ] + }, + + "file": { + "type": "string", + "pattern": "^[\\w\\-/\\.]+\\.(py|js|ts|java|cpp)$" + }, + + "line": { + "type": "integer", + "minimum": 1 + }, + + "title": { + "type": "string", + "maxLength": 100 + }, + + "description": { + "type": "string", + "maxLength": 1000 + }, + + "suggestion": { + "type": "string", + "maxLength": 1000 + }, + + "code_example": { + "type": "string" + } + } + } + }, + + "metrics": { + "type": "object", + "required": ["files_reviewed", "critical_count", "warning_count", "suggestion_count"], + + "properties": { + "files_reviewed": { "type": "integer", "minimum": 0 }, + "critical_count": { "type": "integer", "minimum": 0 }, + "warning_count": { "type": "integer", "minimum": 0 }, + "suggestion_count": { "type": "integer", "minimum": 0 } + } + } + }, + + "strengths": { + "type": "array", + "items": { "type": "string" }, + "description": "provide advantages of the reviewed PR" + } + } +} \ No newline at end of file diff --git a/.github/workflows/codex-auto-review.yml b/.github/workflows/codex-auto-review.yml new file mode 100644 index 0000000000..a489dac129 --- /dev/null +++ b/.github/workflows/codex-auto-review.yml @@ -0,0 +1,127 @@ +name: Automated Codex Reviews + +on: + pull_request: + branches: [ master ] + types: [ opened, synchronize, ready_for_review ] + workflow_dispatch: + +jobs: + + codex-review: + + runs-on: ubuntu-latest + + permissions: + contents: read + pull-requests: write + + steps: + - uses: actions/checkout@v5 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Fetch refs + run: | + git fetch origin ${{ github.event.pull_request.base.ref }} --depth=1 + git checkout origin/${{ github.event.pull_request.base.ref }} -- .github/codex/ + git diff origin/${{ github.event.pull_request.base.ref }}...HEAD > pr.diff + + - name: Set gh CLI + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + uses: cli/gh@v2 + + - name: Select issue + id: select + run: | + BODY=$(gh pr view ${{ github.event.pull_request.number }} --json body -q .body) + + ISSUE=$(echo "$BODY" | grep -oE '#[0-9]+' | head -1 | tr -d '#') + + if [ -z "$ISSUE" ]; then + ISSUE=$(gh pr view ${{ github.event.pull_request.number }} --json labels -q '.labels[].name' | \ + grep '^issue/' | head -1 | cut -d'/' -f2) + fi + + if [ -n "$ISSUE" ] && [ -f ".github/codex/prompts/review_${ISSUE}.md" ]; then + echo "issue_number=$ISSUE" >> $GITHUB_OUTPUT + echo "prompt_file=.github/codex/prompts/review_${ISSUE}.md" >> $GITHUB_OUTPUT + else + echo "issue_number=" >> $GITHUB_OUTPUT + echo "prompt_file=.github/codex/prompts/review_common.md" >> $GITHUB_OUTPUT + fi + + - name: Run Codex Review + id: review + uses: openai/codex-action@v1 + with: + openai-api-key: ${{ secrets.OPENAI_API_KEY }} + prompt: | + This is PR #${{ github.event.pull_request.number }} for ${{ github.repository }}. + ======== + Read the instructions for the current review below: + ---- + $(cat ${{ steps.select.outputs.prompt_file }}) + ======== + Carefully analyze changes introduced by the PR below: + ---- + $(cat pr.diff) + ======== + Pull request title and body: + ---- + ${{ github.event.pull_request.title }} + ${{ github.event.pull_request.body }} + output-file: codex-output.json + output-schema-file: .github/codex/review_schema.json + sandbox: workspace-write + safety-strategy: drop-sudo + + - name: Validate JSON + id: validate + run: | + if [ ! -f codex-output.json ]; then + echo "approved=false" >> $GITHUB_OUTPUT + echo "No output file" + exit 0 + fi + + jq empty codex-output.json || exit 1 + + APPROVED=$(jq -r '.approved // false' codex-output.json) + echo "approved=$APPROVED" >> $GITHUB_OUTPUT + + if [ "$APPROVED" = "false" ]; then + ISSUES=$(jq -r '.issues // [] | length' codex-output.json) + echo "FEEDBACK=Review failed: $ISSUES issues found. See codex-output.json" >> $GITHUB_ENV + fi + + - name: Validate against schema + continue-on-error: false + run: | + npm install -g ajv-cli + ajv validate -s .github/codex/review_schema.json -d codex-output.json + + - name: Post feedback + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + let output = 'No output file generated'; + const status = '${{ steps.validate.outputs.approved }}' === 'true' ? 'Approved' : 'Failed'; + + if (fs.existsSync('codex-output.json')) { + output = fs.readFileSync('codex-output.json', 'utf8'); + } + + const issue = '${{ steps.select.outputs.issue_number }}'; + const prompt = '${{ steps.select.outputs.prompt_file }}'; + const approved = '${{ steps.validate.outputs.approved }}'; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: `## Codex Review \n**Status**: ${status}\n**Issue**: ${issue || 'N/A'}\n**Prompt**: ${prompt || 'N/A'}\n\n**Output**:\n\`\`\`json\n${output}\n\`\`\`` + });