ctrf-io
diff --git a/‎.github/workflows/build-and-test.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/build-and-test.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 24 deletions b/‎README.md‎
Lines changed: 8 additions & 24 deletions
diff --git a/‎__tests__/ctrf/report-preparation.test.ts‎
Lines changed: 1 addition & 0 deletions b/‎__tests__/ctrf/report-preparation.test.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎action.yml‎
Lines changed: 6 additions & 0 deletions b/‎action.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎badges/coverage.svg‎
Lines changed: 1 addition & 1 deletion b/‎badges/coverage.svg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ctrf-reports/ctrf-report.json‎
Lines changed: 8 additions & 1 deletion b/‎ctrf-reports/ctrf-report.json‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎dist/index.js‎
Lines changed: 50 additions & 0 deletions b/‎dist/index.js‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎dist/index.js.map‎
Lines changed: 1 addition & 1 deletion b/‎dist/index.js.map‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dist/reports/ai-summary-report.hbs‎
Lines changed: 61 additions & 0 deletions b/‎dist/reports/ai-summary-report.hbs‎
Lines changed: 61 additions & 0 deletions
@@ -143,6 +143,7 @@ jobs:
         with:
           report-path: './ctrf-reports/ctrf-report.json'
           ai-report: true
+          ai-summary-report: true
           annotate: false
         if: always()
   skipped-reports-test:
 
@@ -114,6 +114,7 @@ For more advanced usage, there are several inputs available.
     insights-report: false
     slowest-report: false
     ai-report: false
+    ai-summary-report: false
     skipped-report: false
     suite-folded-report: false
     suite-list-report: false
@@ -179,15 +180,15 @@ with the provider and any optional settings:
   uses: ctrf-io/github-test-reporter@v1
   with:
     report-path: './ctrf/*.json'
-    github-report: true
+    ai-summary-report: true
+    pull-request: true
     ai: |
       {
         "provider": "openai",
-        "model": "gpt-4",
-        "temperature": 0.7,
-        "maxTokens": 2000
+        "model": "gpt-4"
       }
   env:
+    GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
   if: always()
 ```
@@ -223,30 +224,13 @@ All configuration parameters are specified at the root level (all optional excep
   "topP": 1,                       // Nucleus sampling
   "maxMessages": 10,               // Max failed tests to analyze
   "consolidate": true,             // Consolidate multiple failures
+  "additionalPromptContext": "...", // Additional prompt context
+  "additionalSystemPromptContext": "...", // Additional system prompt context
   "log": false,                    // Enable logging
   "deploymentId": "..."            // Azure OpenAI deployment ID (Azure only)
 }
 ```
 
-### Example with Claude
-
-```yaml
-- name: Publish Test Report with Claude AI
-  uses: ctrf-io/github-test-reporter@v1
-  with:
-    report-path: './ctrf/*.json'
-    github-report: true
-    ai: |
-      {
-        "provider": "claude",
-        "model": "claude-3-5-sonnet-20241022",
-        "maxTokens": 3000
-      }
-  env:
-    ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-  if: always()
-```
-
 ## Pull Requests
 
 You can add a pull request comment by using the `pull-request-report` input:
@@ -514,4 +498,4 @@ analyzing test outcomes across multiple platforms becomes more straightforward.
 ## Support Us
 
 If you find this project useful, consider giving it a GitHub star ⭐ It means a
-lot to us.
+lot to us.
@@ -502,6 +502,7 @@ function createSingleReportInputs(): Inputs {
     failedFoldedReport: false,
     previousResultsReport: false,
     aiReport: false,
+    aiSummaryReport: false,
     skippedReport: false,
     suiteFoldedReport: false,
     suiteListReport: false,
 
@@ -75,6 +75,12 @@ inputs:
     description: 'Include the AI analysis report.'
     required: false
     default: false
+  ai-summary-report:
+    description:
+      'Include the AI summary report with structured analysis (summary, code
+      issues, timeout issues, application issues, recommendations)'
+    required: false
+    default: false
   skipped-report:
     description: 'Include the skipped report.'
     required: false
 
@@ -96,7 +96,14 @@
       }
     ],
     "extra": {
-      "ai": "The test suite experienced failures due to issues related to timing and network configuration. The first test failed because the expected page title did not match the actual title within the specified timeout, suggesting a potential mismatch in the expected title pattern or insufficient timeout duration. The second test encountered a real network timeout instead of the simulated failure it was designed to test, indicating problems with the network setup or timeout settings in the test environment. These failures point to a need for reviewing and adjusting both the expected outcomes and the test environment configurations to better align with actual application behavior and network conditions."
+      "ai": "The test suite experienced failures due to issues related to timing and network configuration. The first test failed because the expected page title did not match the actual title within the specified timeout, suggesting a potential mismatch in the expected title pattern or insufficient timeout duration. The second test encountered a real network timeout instead of the simulated failure it was designed to test, indicating problems with the network setup or timeout settings in the test environment. These failures point to a need for reviewing and adjusting both the expected outcomes and the test environment configurations to better align with actual application behavior and network conditions.",
+      "aiSummary": {
+        "summary": "Three related test failures in the `addFooterDisplayFlags` function reveal inconsistent logic when handling the `includeFlakyReportAllFooter` flag across different flaky test scenarios with previous suite results. Two tests expect the flag to be `false` but receive `true`, while one expects `true` but receives `false`. These are not intermittent flakiness issues but consistent logic errors that have affected approximately 27% of test runs.",
+        "code_issues": "• The **addFooterDisplayFlags** function contains contradictory or inverted conditional logic when evaluating whether to set `includeFlakyReportAllFooter` based on flaky test presence across runs and previous results. The function appears to be setting the flag to the opposite of the expected value in multiple scenarios involving flaky test detection with previous suite results.\n• Logic for determining when flaky tests exist \"across all runs\" versus when they don't is either inverted or missing proper condition checks, causing the flag to be enabled when it should be disabled and vice versa in different test scenarios.\n• The combined scenario handling (flaky tests in current AND across all runs) is incorrectly evaluating conditions when merging current results with previous historical data, failing to properly suppress the footer flag when flaky tests are detected.",
+        "timeout_issues": "",
+        "application_issues": "• The test suite shows a consistent 27% failure rate across 52 runs for these specific flag-setting scenarios, indicating a persistent, reproducible bug rather than environmental or timing-related flakiness.",
+        "recommendations": "• Review the **addFooterDisplayFlags** function's conditional logic for setting `includeFlakyReportAllFooter`, specifically the conditions that check for flaky tests across all runs and in combination with previous results.\n• Verify all boolean comparisons and negations in the flaky test detection logic to ensure they are not inverted or contradictory.\n• Add explicit unit tests or debug traces to validate the flaky test count calculations when previous results are included to ensure accurate detection of flaky tests across runs.\n• Ensure the logic correctly distinguishes between three scenarios: (1) flaky tests exist across all runs with previous results, (2) no flaky tests exist across all runs with previous results, and (3) combined current and historical flaky tests, setting the flag appropriately for each case."
+      }
     }
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,14 @@`
`96`	`96`	`}`
`97`	`97`	`],`
`98`	`98`	`"extra": {`
`99`		- "ai": "The test suite experienced failures due to issues related to timing and network configuration. The first test failed because the expected page title did not match the actual title within the specified timeout, suggesting a potential mismatch in the expected title pattern or insufficient timeout duration. The second test encountered a real network timeout instead of the simulated failure it was designed to test, indicating problems with the network setup or timeout settings in the test environment. These failures point to a need for reviewing and adjusting both the expected outcomes and the test environment configurations to better align with actual application behavior and network conditions."
	`99`	+ "ai": "The test suite experienced failures due to issues related to timing and network configuration. The first test failed because the expected page title did not match the actual title within the specified timeout, suggesting a potential mismatch in the expected title pattern or insufficient timeout duration. The second test encountered a real network timeout instead of the simulated failure it was designed to test, indicating problems with the network setup or timeout settings in the test environment. These failures point to a need for reviewing and adjusting both the expected outcomes and the test environment configurations to better align with actual application behavior and network conditions.",
	`100`	`+ "aiSummary": {`
	`101`	+ "summary": "Three related test failures in the `addFooterDisplayFlags` function reveal inconsistent logic when handling the `includeFlakyReportAllFooter` flag across different flaky test scenarios with previous suite results. Two tests expect the flag to be `false` but receive `true`, while one expects `true` but receives `false`. These are not intermittent flakiness issues but consistent logic errors that have affected approximately 27% of test runs.",
	`102`	+ "code_issues": "• The addFooterDisplayFlags function contains contradictory or inverted conditional logic when evaluating whether to set `includeFlakyReportAllFooter` based on flaky test presence across runs and previous results. The function appears to be setting the flag to the opposite of the expected value in multiple scenarios involving flaky test detection with previous suite results.\n• Logic for determining when flaky tests exist \"across all runs\" versus when they don't is either inverted or missing proper condition checks, causing the flag to be enabled when it should be disabled and vice versa in different test scenarios.\n• The combined scenario handling (flaky tests in current AND across all runs) is incorrectly evaluating conditions when merging current results with previous historical data, failing to properly suppress the footer flag when flaky tests are detected.",
	`103`	`+ "timeout_issues": "",`
	`104`	`+ "application_issues": "• The test suite shows a consistent 27% failure rate across 52 runs for these specific flag-setting scenarios, indicating a persistent, reproducible bug rather than environmental or timing-related flakiness.",`
	`105`	+ "recommendations": "• Review the addFooterDisplayFlags function's conditional logic for setting `includeFlakyReportAllFooter`, specifically the conditions that check for flaky tests across all runs and in combination with previous results.\n• Verify all boolean comparisons and negations in the flaky test detection logic to ensure they are not inverted or contradictory.\n• Add explicit unit tests or debug traces to validate the flaky test count calculations when previous results are included to ensure accurate detection of flaky tests across runs.\n• Ensure the logic correctly distinguishes between three scenarios: (1) flaky tests exist across all runs with previous results, (2) no flaky tests exist across all runs with previous results, and (3) combined current and historical flaky tests, setting the flag appropriately for each case."
	`106`	`+ }`
`100`	`107`	`}`
`101`	`108`	`}`
`102`	`109`	`}`