Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
486b014
Add PTP Nightly Failure Detector GitHub Action
aneeshkp Sep 30, 2025
e565353
Add AI-powered CI failure detection and automated fixes documentation
aneeshkp Sep 30, 2025
501a545
Update PTP Nightly Failure Detector for production deployment
aneeshkp Sep 30, 2025
a4456c3
Correct AI analysis to focus on PTP Ginkgo test failures
aneeshkp Sep 30, 2025
751abdd
Use wildcards for OpenShift version in artifact URLs
aneeshkp Sep 30, 2025
f57db5c
Configure workflow to default to main branch for latest OpenShift
aneeshkp Sep 30, 2025
4dc11d6
Configure workflow to run daily at 8 AM EST and default to main branch
aneeshkp Sep 30, 2025
f815de5
Remove redundant nightly failure detector documentation
aneeshkp Sep 30, 2025
024f733
Fix main version handling - test version
aneeshkp Sep 30, 2025
cf4c1ed
Fix Prow API calls to use correct endpoints
aneeshkp Sep 30, 2025
c4c8844
Simplify API calls for testing - avoid 140MB response
aneeshkp Sep 30, 2025
734bb16
Fix PTP failure detector API calls and enable workflow testing
aneeshkp Sep 30, 2025
ba1d0cb
Replace workflow with clean version using repository script file
aneeshkp Sep 30, 2025
c744c4d
Simplify test mode to always detect failure for workflow testing
aneeshkp Sep 30, 2025
51794f7
Fix workflow labels to use standard GitHub labels
aneeshkp Sep 30, 2025
86bc9b8
Restore proper PTP labels now that they exist in repository
aneeshkp Sep 30, 2025
5dfed76
Implement complete AI-powered PTP triage system using CVE automation …
aneeshkp Sep 30, 2025
fbb40fc
Fix AI triage workflow dependencies and simplify for immediate testing
aneeshkp Sep 30, 2025
7f2a7a8
Fix Gemini model name for working AI analysis
aneeshkp Sep 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 241 additions & 0 deletions .github/workflows/ai-ptp-triage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
name: AI-Powered PTP Triage

on:
issue_comment:
types: [created]

permissions:
contents: read
issues: write
pull-requests: read

jobs:
ai-triage:
runs-on: ubuntu-latest
if: |
github.event.issue.state == 'open' &&
contains(github.event.comment.body, '@ai-triage') &&
(contains(github.event.issue.title, 'PTP') || contains(github.event.issue.labels.*.name, 'ptp'))
timeout-minutes: 15

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Node.js for MCP servers
uses: actions/setup-node@v4
with:
node-version: '18'

- name: Install dependencies
run: |
# Install GitHub CLI for issue operations
curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
sudo apt update
sudo apt install gh -y

# Install Python dependencies for Gemini
pip install --upgrade pip
pip install google-generativeai requests

- name: Prepare AI analysis environment
run: |
echo "🤖 Setting up autonomous AI agent environment"
echo "Agent will analyze PTP failures using Gemini AI with issue context"

- name: Create Gemini CLI autonomous agent script
run: |
cat > gemini_agent.py << 'EOF'
#!/usr/bin/env python3
"""
Autonomous Gemini agent for PTP failure analysis
Architecture: GitHub Actions → Gemini AI → GitHub Issue Analysis
"""
import os
import json
import subprocess
import google.generativeai as genai
from typing import Dict, Any

class PTPFailureAgent:
def __init__(self, gemini_api_key: str, github_token: str):
self.gemini_api_key = gemini_api_key
self.github_token = github_token
genai.configure(api_key=gemini_api_key)
self.model = genai.GenerativeModel('gemini-pro')

def analyze_ptp_failure(self, repo: str, issue_number: str) -> str:
"""Main ReAct loop for autonomous PTP failure analysis"""

# Step 1: REASON - Understand the task
reasoning_prompt = f"""
You are an autonomous AI agent analyzing PTP CI failures.

TASK: Analyze PTP failure in issue #{issue_number} from {repo}

REASONING: I need to analyze the PTP failure information and provide:
1. Root cause analysis of the PTP timing issues
2. Specific investigation steps for the engineering team
3. Actionable recommendations for fixing the failure
4. Priority assessment based on PTP accuracy requirements

Focus on PTP-specific timing, synchronization, and hardware issues.
"""

# Step 2: ACT - Execute the analysis
print("🧠 Starting ReAct analysis loop...")

analysis = self._perform_analysis(repo, issue_number)
return analysis

def _perform_analysis(self, repo: str, issue_number: str) -> str:
"""Perform the actual PTP failure analysis"""

# Simplified analysis using available information
prompt = f"""
You are a PTP (Precision Time Protocol) expert analyzing CI test failures.

CONTEXT:
- Repository: {repo}
- Issue: #{issue_number}
- Job: e2e-telco5g-ptp-upstream (OpenShift CI)
- Focus: PTP timing synchronization for telecom/5G workloads

ANALYSIS FRAMEWORK:
1. **Root Cause Categories**:
- PTP daemon configuration (ptp4l, phc2sys)
- Hardware clock synchronization issues
- Network timing precision problems
- Test environment limitations
- Code regressions in PTP operator

2. **Investigation Steps**:
- Check PTP pod logs for sync failures
- Analyze clock offset measurements
- Verify PTP hardware capability
- Review recent code changes

3. **Priority Assessment**:
- High: Sync accuracy > 1μs deviation
- Medium: Intermittent sync issues
- Low: Test flakiness without timing impact

**Your Task**: Provide a comprehensive PTP failure analysis with:
- Root cause hypothesis
- Specific debugging steps
- Fix recommendations
- Priority level

Focus on actionable insights for PTP engineers.
"""

try:
response = self.model.generate_content(prompt)
return response.text
except Exception as e:
return f"Analysis failed: {str(e)}"

def post_github_comment(self, repo: str, issue_number: str, analysis: str) -> bool:
"""Post analysis to GitHub issue"""
comment_body = f"""## 🤖 Autonomous AI PTP Failure Analysis

{analysis}

---
**Analysis Details:**
- **Agent**: Gemini-powered autonomous AI agent
- **Specialization**: PTP timing synchronization for OpenShift/Kubernetes
- **Focus**: Precision Time Protocol failures in telecom/5G workloads
- **Trigger**: @ai-triage comment

**Next Steps:**
1. Review the analysis above
2. Follow the recommended investigation steps
3. Check PTP pod logs and timing measurements
4. Comment `@ai-create-fix` for automated fix proposals (coming soon)

---
*Generated by Autonomous Gemini Agent for PTP Failure Analysis*
"""

# Use GitHub CLI for posting comment
try:
cmd = [
'gh', 'issue', 'comment', issue_number,
'--repo', repo,
'--body', comment_body
]

result = subprocess.run(
cmd,
capture_output=True,
text=True,
env={**os.environ, 'GH_TOKEN': self.github_token}
)

if result.returncode == 0:
print("✅ Analysis posted to GitHub issue")
return True
else:
print(f"❌ Failed to post comment: {result.stderr}")
return False

except Exception as e:
print(f"❌ Error posting comment: {str(e)}")
return False

def main():
"""Main execution function"""
# Get environment variables
gemini_api_key = os.environ.get('GEMINI_API_KEY')
github_token = os.environ.get('GITHUB_TOKEN')
repo = os.environ.get('GITHUB_REPOSITORY')
issue_number = os.environ.get('ISSUE_NUMBER')

if not all([gemini_api_key, github_token, repo, issue_number]):
print("❌ Missing required environment variables")
print("Required: GEMINI_API_KEY, GITHUB_TOKEN, GITHUB_REPOSITORY, ISSUE_NUMBER")
return 1

try:
print(f"🚀 Starting PTP failure analysis for issue #{issue_number}")

# Initialize autonomous agent
agent = PTPFailureAgent(gemini_api_key, github_token)

# Perform analysis
analysis = agent.analyze_ptp_failure(repo, issue_number)

# Post results
success = agent.post_github_comment(repo, issue_number, analysis)

return 0 if success else 1

except Exception as e:
print(f"❌ Agent execution failed: {str(e)}")
return 1

if __name__ == "__main__":
exit(main())
EOF

chmod +x gemini_agent.py

- name: Run Autonomous Gemini Agent
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
ISSUE_NUMBER: ${{ github.event.issue.number }}
run: python gemini_agent.py

- name: React to trigger comment
if: always()
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Add a rocket reaction to the @ai-triage comment to show it was processed
gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \
--method POST \
--field content='rocket' || echo "Could not add reaction (non-critical)"
Loading
Loading