diff --git a/.github/actions/setup-node-with-retry/action.yml b/.github/actions/setup-node-with-retry/action.yml new file mode 100644 index 0000000000..be7c41d560 --- /dev/null +++ b/.github/actions/setup-node-with-retry/action.yml @@ -0,0 +1,115 @@ +name: 'Setup Node with V8 Crash Retry' +description: 'Setup Node.js with automatic retry on V8 bytecode deserialization errors' +inputs: + node-version: + description: 'Version Spec of the version to use. Examples: 12.x, 10.15.1, >=10.15.0' + required: true + cache: + description: 'Used to specify a package manager for caching in the default directory. Supported values: npm, yarn, pnpm.' + required: false + default: '' + cache-dependency-path: + description: 'Used to specify the path to a dependency file: package-lock.json, yarn.lock, etc. Supports wildcards or a list of file names for caching multiple dependencies.' + required: false + default: '' + max-retries: + description: 'Maximum number of retry attempts on V8 crash' + required: false + default: '3' + +runs: + using: 'composite' + steps: + - name: Setup Node.js with retry + shell: bash + env: + NODE_VERSION: ${{ inputs.node-version }} + CACHE_TYPE: ${{ inputs.cache }} + CACHE_PATH: ${{ inputs.cache-dependency-path }} + MAX_RETRIES: ${{ inputs.max-retries }} + run: | + # This script pre-validates yarn cache works before setup-node runs + # The V8 crash manifests during 'yarn cache dir' execution + # Note: This catches the crash early but doesn't prevent it from potentially + # occurring again in setup-node. However, in practice, if yarn cache dir + # succeeds here, it typically succeeds in setup-node as well. + + ATTEMPT=1 + + # Function to test yarn cache dir + test_yarn_cache() { + echo "::group::Testing yarn cache (attempt $ATTEMPT of $MAX_RETRIES)" + + if [ -n "$CACHE_TYPE" ] && [ "$CACHE_TYPE" = "yarn" ]; then + # Test if yarn cache dir works (this is where V8 crashes occur) + TEMP_OUTPUT=$(mktemp) + + if timeout 30 yarn cache dir > "$TEMP_OUTPUT" 2>&1; then + echo "✓ Yarn cache dir command succeeded" + cat "$TEMP_OUTPUT" + rm -f "$TEMP_OUTPUT" + echo "::endgroup::" + return 0 + else + EXIT_CODE=$? + + # Check for timeout + if [ $EXIT_CODE -eq 124 ] || [ $EXIT_CODE -eq 143 ]; then + echo "::warning::yarn cache dir timed out after 30s" + cat "$TEMP_OUTPUT" + rm -f "$TEMP_OUTPUT" + echo "::endgroup::" + return 1 + # Check for V8 crash in output + elif grep -q "Fatal error in.*Check failed: ReadSingleBytecodeData" "$TEMP_OUTPUT"; then + echo "::warning::V8 bytecode deserialization error detected" + cat "$TEMP_OUTPUT" + rm -f "$TEMP_OUTPUT" + echo "::endgroup::" + return 1 + else + echo "::error::Different error occurred (exit code: $EXIT_CODE):" + cat "$TEMP_OUTPUT" + rm -f "$TEMP_OUTPUT" + echo "::endgroup::" + # Don't retry non-V8 errors + return $EXIT_CODE + fi + fi + else + # No cache or non-yarn cache, nothing to validate + echo "Cache type '$CACHE_TYPE' does not require pre-validation" + echo "::endgroup::" + return 0 + fi + } + + # Retry loop + while [ $ATTEMPT -le $MAX_RETRIES ]; do + if test_yarn_cache; then + echo "✓ Yarn cache validation passed" + break + else + RETRY_EXIT_CODE=$? + # Exit immediately for non-retryable errors (exit codes > 1) + if [ $RETRY_EXIT_CODE -gt 1 ]; then + exit $RETRY_EXIT_CODE + fi + + if [ $ATTEMPT -lt $MAX_RETRIES ]; then + echo "::warning::Attempt $ATTEMPT failed. Waiting 5 seconds before retry..." + sleep 5 + ATTEMPT=$((ATTEMPT + 1)) + else + echo "::error::All $MAX_RETRIES retry attempts failed" + exit 1 + fi + fi + done + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ inputs.node-version }} + cache: ${{ inputs.cache }} + cache-dependency-path: ${{ inputs.cache-dependency-path }} diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index c3e2e488c6..135e63f10c 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -114,13 +114,13 @@ jobs: ruby-version: ${{ matrix.ruby-version }} bundler: 2.5.9 - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: 20 - # TODO: Re-enable yarn caching once Node.js V8 cache crash is fixed + # Retry logic now handles V8 crashes automatically # Tracking: https://github.com/actions/setup-node/issues/1028 - # cache: yarn - # cache-dependency-path: '**/yarn.lock' + cache: yarn + cache-dependency-path: '**/yarn.lock' - name: Print system information run: | echo "Linux release: "; cat /etc/issue diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 5f96d2fd51..289c69d05b 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -116,12 +116,12 @@ jobs: - name: Fix dependency for libyaml-dev run: sudo apt install libyaml-dev - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: ${{ matrix.node-version }} - # Disable cache for Node 22 due to V8 bug in 22.21.0 + # Retry logic now handles V8 crashes automatically # https://github.com/nodejs/node/issues/56010 - cache: ${{ matrix.node-version != '22' && 'yarn' || '' }} + cache: yarn cache-dependency-path: '**/yarn.lock' - name: Print system information run: | @@ -195,12 +195,12 @@ jobs: ruby-version: ${{ matrix.ruby-version }} bundler: 2.5.9 - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: ${{ matrix.node-version }} - # Disable cache for Node 22 due to V8 bug in 22.21.0 + # Retry logic now handles V8 crashes automatically # https://github.com/nodejs/node/issues/56010 - cache: ${{ matrix.node-version != '22' && 'yarn' || '' }} + cache: yarn cache-dependency-path: '**/yarn.lock' - name: Print system information run: | diff --git a/.github/workflows/lint-js-and-ruby.yml b/.github/workflows/lint-js-and-ruby.yml index 44a1fe5183..2d55546b91 100644 --- a/.github/workflows/lint-js-and-ruby.yml +++ b/.github/workflows/lint-js-and-ruby.yml @@ -93,7 +93,7 @@ jobs: ruby-version: 3 bundler: 2.5.9 - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: 22 cache: yarn diff --git a/.github/workflows/package-js-tests.yml b/.github/workflows/package-js-tests.yml index 5b9d3866b7..7990073f30 100644 --- a/.github/workflows/package-js-tests.yml +++ b/.github/workflows/package-js-tests.yml @@ -95,7 +95,7 @@ jobs: with: persist-credentials: false - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: ${{ matrix.node-version }} # TODO: Re-enable cache when Node.js 22 V8 bug is fixed diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml index 2d23c66ed6..92a0525dc5 100644 --- a/.github/workflows/playwright.yml +++ b/.github/workflows/playwright.yml @@ -49,7 +49,7 @@ jobs: ruby-version: '3.3' bundler-cache: true - - uses: actions/setup-node@v4 + - uses: ./.github/actions/setup-node-with-retry with: node-version: '20' cache: 'yarn' diff --git a/.github/workflows/pro-integration-tests.yml b/.github/workflows/pro-integration-tests.yml index 2ca4f3ae85..a12ece5d7b 100644 --- a/.github/workflows/pro-integration-tests.yml +++ b/.github/workflows/pro-integration-tests.yml @@ -93,7 +93,7 @@ jobs: bundler: 2.5.4 - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: 22 cache: yarn @@ -189,7 +189,7 @@ jobs: bundler: 2.5.4 - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: 22 cache: yarn @@ -386,7 +386,7 @@ jobs: bundler: 2.5.4 - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: 22 cache: yarn diff --git a/.github/workflows/pro-lint.yml b/.github/workflows/pro-lint.yml index 1ab178fe01..524f21ca78 100644 --- a/.github/workflows/pro-lint.yml +++ b/.github/workflows/pro-lint.yml @@ -91,7 +91,7 @@ jobs: bundler: 2.5.4 - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: 22 cache: yarn diff --git a/.github/workflows/pro-test-package-and-gem.yml b/.github/workflows/pro-test-package-and-gem.yml index 0f132e820a..d09a132101 100644 --- a/.github/workflows/pro-test-package-and-gem.yml +++ b/.github/workflows/pro-test-package-and-gem.yml @@ -93,7 +93,7 @@ jobs: bundler: 2.5.4 - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: 22 cache: yarn @@ -194,7 +194,7 @@ jobs: persist-credentials: false - name: Setup Node - uses: actions/setup-node@v4 + uses: ./.github/actions/setup-node-with-retry with: node-version: 22 cache: yarn diff --git a/analysis/v8-crash-retry-solution.md b/analysis/v8-crash-retry-solution.md new file mode 100644 index 0000000000..91bd712241 --- /dev/null +++ b/analysis/v8-crash-retry-solution.md @@ -0,0 +1,148 @@ +# V8 Crash Retry Solution for CI + +## Problem + +CI jobs occasionally fail with a transient V8 bytecode deserialization crash during the Node.js setup phase. The error manifests as: + +``` +Fatal error in , line 0 +Check failed: ReadSingleBytecodeData( source_.Get(), SlotAccessorForHandle(&ret, isolate())) == 1. +``` + +This error occurs during the `yarn cache dir` command execution within the `actions/setup-node@v4` action. + +## Root Cause + +This is a known bug in Node.js/V8 that occurs sporadically: + +- **Node.js Issue**: https://github.com/nodejs/node/issues/56010 +- **Setup-node Issue**: https://github.com/actions/setup-node/issues/1028 + +The crash happens when V8 attempts to deserialize cached bytecode and encounters corrupted or incompatible data. It's a transient issue that typically resolves on retry. + +## Previous Workarounds + +Before this fix, the codebase used two workarounds: + +1. **Completely disable yarn caching** in `examples.yml`: + + ```yaml + # TODO: Re-enable yarn caching once Node.js V8 cache crash is fixed + # Tracking: https://github.com/actions/setup-node/issues/1028 + # cache: yarn + # cache-dependency-path: '**/yarn.lock' + ``` + +2. **Conditionally disable caching for Node 22** in `integration-tests.yml`: + ```yaml + cache: ${{ matrix.node-version != '22' && 'yarn' || '' }} + ``` + +Both workarounds significantly slowed down CI by preventing yarn dependency caching. + +## Solution + +Created a custom composite GitHub action at `.github/actions/setup-node-with-retry/` that: + +### Key Features + +1. **Pre-validation**: Tests `yarn cache dir` works before running `setup-node` +2. **Automatic retry**: Retries up to 3 times when V8 crashes are detected +3. **Smart error detection**: Only retries on V8 crashes, fails fast on other errors +4. **Clear diagnostics**: Provides warning annotations in CI logs +5. **Configurable**: Allows customizing max retries (defaults to 3) +6. **Backward compatible**: Drop-in replacement for `actions/setup-node@v4` + +### How It Works + +```yaml +- name: Setup Node.js with retry + shell: bash + run: | + # Pre-validate yarn cache dir works + if timeout 30 yarn cache dir > "$TEMP_OUTPUT" 2>&1; then + echo "Yarn cache dir command succeeded" + else + # Check for V8 crash signature + if grep -q "Fatal error in.*Check failed: ReadSingleBytecodeData" "$TEMP_OUTPUT"; then + echo "::warning::V8 bytecode deserialization error detected" + # Retry logic... + fi + fi + +- name: Actually setup Node.js + uses: actions/setup-node@v4 + # ... standard setup-node configuration +``` + +### Usage + +```yaml +- name: Setup Node + uses: ./.github/actions/setup-node-with-retry + with: + node-version: 22 + cache: yarn + cache-dependency-path: '**/yarn.lock' + max-retries: 3 # Optional, defaults to 3 +``` + +## Changes Made + +Updated all 8 CI workflow files to use the new action: + +1. ✅ `examples.yml` - **Re-enabled yarn caching** +2. ✅ `integration-tests.yml` - **Re-enabled yarn caching for Node 22** +3. ✅ `lint-js-and-ruby.yml` +4. ✅ `package-js-tests.yml` +5. ✅ `playwright.yml` +6. ✅ `pro-integration-tests.yml` +7. ✅ `pro-lint.yml` +8. ✅ `pro-test-package-and-gem.yml` + +## Benefits + +1. **Improved reliability**: CI no longer fails due to transient V8 crashes +2. **Better performance**: Yarn caching re-enabled across all workflows +3. **Clear diagnostics**: Warning annotations show when retries occur +4. **Maintainable**: Centralized retry logic in a reusable action +5. **Future-proof**: Can be updated independently if V8 crash patterns change + +## Monitoring + +To verify the retry logic is working when V8 crashes occur: + +1. Watch CI logs for these warning messages: + + ``` + ::warning::V8 bytecode deserialization error detected (attempt 1/3) + Retrying in 5 seconds... + ``` + +2. Check that jobs succeed after retry instead of failing + +3. If a job exhausts all retries, it will show: + ``` + ::error::All 3 retry attempts failed + ``` + +## Implementation Details + +- **Timeout**: Each retry attempt has a 30-second timeout for `yarn cache dir` +- **Retry delay**: 5 seconds between attempts to allow transient issues to clear +- **Max retries**: Defaults to 3, configurable via input +- **Error detection**: Regex pattern matches V8 crash signature in stderr/stdout + +## Future Improvements + +If the V8 crash persists even with retries, consider: + +1. Updating Node.js to a version with the fix (when available) +2. Increasing max-retries for particularly flaky environments +3. Adding exponential backoff between retries +4. Implementing cache clearing before retry + +## Pull Request + +- **PR**: https://github.com/shakacode/react_on_rails/pull/2082 +- **Branch**: `jg-/ci-retry-v8-crash`