diff --git a/.coderabbit.yaml b/.coderabbit.yaml
index cb3ad2fc4..855ebf172 100644
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -4,9 +4,16 @@
 reviews:
   profile: chill
   high_level_summary: true
+  poem: false
   auto_review:
     enabled: true
     drafts: false
+    base_branches:
+      - main
+      - master
+      - develop
+      - "release/**"
+      - "hotfix/**"
     ignore_usernames: ["rapids-bot", "GPUtester", "nv-automation-bot", "copy-pr-bot"]
   tools:
     markdownlint:
@@ -16,3 +23,39 @@ reviews:
     gitleaks:
       enabled: true
   sequence_diagrams: false
+  collapse_walkthrough: true
+
+  # Reduce noise from status messages
+  request_changes_workflow: false
+  review_status: false
+
+  # Path-specific review instructions
+  path_instructions:
+    - path: "docs/**/*"
+      instructions: |
+        For documentation changes, focus on:
+        - Accuracy: Verify code examples compile and run correctly
+        - Completeness: Check if API changes (parameters, return values, errors) are documented
+        - Clarity: Flag confusing explanations, missing prerequisites, or unclear examples
+        - Consistency: Version numbers, parameter types, and terminology match code
+        - Examples: Suggest adding examples for complex features or new APIs
+        - Missing docs: If PR changes public APIs without updating docs, flag as HIGH priority
+
+        When code changes affect docs:
+        - Suggest specific doc files that need updates (e.g., docs/cuopt/api.rst)
+        - Identify outdated information contradicting the code changes
+        - Recommend documenting performance characteristics, GPU requirements, or numerical tolerances
+
+    - path: "cpp/include/cuopt/**/*"
+      instructions: |
+        For public header files (C++ API):
+        - Check if new public functions/classes have documentation comments (Doxygen format)
+        - Flag API changes that may need corresponding docs/ updates
+        - Verify parameter descriptions match actual types/behavior
+        - Suggest documenting thread-safety, GPU requirements, and numerical behavior
+        - For breaking changes, recommend updating docs and migration guides
+knowledge_base:
+  opt_out: false
+  code_guidelines:
+    filePatterns:
+      - ".github/.coderabbit_review_guide.md"
diff --git a/.github/.coderabbit_review_guide.md b/.github/.coderabbit_review_guide.md
new file mode 100644
index 000000000..828fc6884
--- /dev/null
+++ b/.github/.coderabbit_review_guide.md
@@ -0,0 +1,468 @@
+# AI Code Review Guidelines for CodeRabbit - cuOpt
+
+**Role**: Act as a principal engineer with 10+ years experience in GPU computing, numerical optimization, and high-performance systems. Focus ONLY on CRITICAL and HIGH issues.
+
+**Target**: Sub-3% false positive rate. Be direct, concise, minimal.
+
+**Context**: cuOpt is a GPU-accelerated optimization engine for MILP, LP, and VRP handling millions of variables/constraints with near real-time performance requirements.
+
+## IGNORE These Issues
+
+- Style/formatting (linters handle this)
+- Minor naming preferences (unless truly misleading)
+- Personal taste on implementation (unless impacts maintainability)
+- Nits that don't affect functionality
+- Already-covered issues (one comment per root cause)
+
+## CRITICAL Issues (Always Comment)
+
+### Algorithm Correctness
+- Logic errors in optimization algorithms (simplex, branch-and-bound, routing heuristics, diving)
+- Incorrect constraint handling or objective function computation
+- Numerical instability causing wrong results (overflow, underflow, precision loss)
+- Infeasibility misclassification or unbounded solution detection failures
+- Breaking changes to solver behavior without versioning
+- **Variable/constraint initialization errors** (incorrect bounds, invalid starting values, uninitialized state)
+- **Problem transformation bugs** (accessing variables/constraints from wrong context - e.g., original vs folded problem)
+- **Algorithm state corruption** (incorrect state transitions, mixing state between phases)
+
+### GPU/CUDA Issues
+- Unchecked CUDA errors (kernel launches, memory operations, synchronization)
+- Race conditions in GPU kernels (shared memory, atomics, warps)
+- Device memory leaks (cudaMalloc/cudaFree imbalance, leaked streams/events)
+- Invalid memory access (out-of-bounds, use-after-free, host/device confusion)
+- Missing CUDA synchronization causing non-deterministic failures
+- Kernel launch with zero blocks/threads or invalid grid/block dimensions
+- **Missing explicit stream creation for concurrent operations** (reusing default stream, missing stream isolation)
+- **Incorrect stream lifecycle management** (using destroyed streams, not creating dedicated streams for barriers/concurrent ops)
+
+### Resource Management
+- GPU memory leaks (device allocations, managed memory, pinned memory)
+- CUDA stream/event leaks or improper cleanup
+- Unclosed file handles for MPS/QPS problem files
+- Missing RAII or proper cleanup in exception paths
+- Resource exhaustion (GPU memory, file descriptors, network sockets)
+
+### API Breaking Changes
+- C API changes without ABI versioning
+- Python API changes breaking backward compatibility
+- Server API endpoint changes without deprecation path
+- Changes to data structures exposed in public headers
+
+## HIGH Issues (Comment if Substantial)
+
+### Performance Issues
+- Inefficient GPU kernel launches (low occupancy, poor memory access patterns)
+- Unnecessary host-device synchronization blocking GPU pipeline
+- CPU bottlenecks in GPU-heavy code paths
+- Suboptimal memory access patterns (non-coalesced, strided, unaligned)
+- Excessive memory allocations in hot paths
+- Algorithmic complexity issues for large-scale problems (O(n²) when O(n log n) exists)
+- Missing or incorrect problem size checks before expensive operations
+
+### Numerical Stability
+- Floating-point operations prone to catastrophic cancellation
+- Missing checks for division by zero or near-zero values
+- Ill-conditioned matrix operations without preconditioning
+- Accumulation errors in iterative algorithms
+- Unsafe casting between numeric types (double→float with potential precision loss)
+- Missing epsilon comparisons for floating-point equality checks
+- **Assertion failures in numerical computations** (overly strict assertions, incorrect tolerance assumptions)
+- **Numerical edge cases causing assertion failures** (near-zero pivots, degenerate cases, extreme values)
+- **Inconsistent numerical tolerances** (mixing different epsilon values, hardcoded vs configurable tolerances)
+
+### Concurrency & Thread Safety
+- Race conditions in multi-GPU code or multi-threaded server
+- Missing synchronization for shared state
+- Improper CUDA stream management causing false dependencies
+- Deadlock potential in resource acquisition
+- Thread-unsafe use of global/static variables
+- Missing or incorrect use of mutexes in server code
+- **Concurrent operations sharing streams incorrectly** (barriers, synchronization primitives without dedicated streams)
+- **Stream reuse across independent operations** (causing unwanted serialization or race conditions)
+
+### Security (Server/API)
+- Unsanitized input in problem data leading to buffer overflows
+- Lack of input validation allowing resource exhaustion attacks
+- Credential exposure in logs or error messages
+- Unsafe deserialization of problem files (pickle, msgpack)
+- Missing rate limiting on API endpoints
+- Insufficient error handling exposing internal implementation details
+
+### Design & Architecture
+- Tight coupling between solver components reducing modularity
+- Hard-coded GPU device IDs or resource limits
+- Missing abstraction for multi-backend support (different CUDA versions)
+- Inappropriate use of exceptions in performance-critical paths
+- Missing or incomplete error propagation from CUDA to user APIs
+- Significant code duplication (3+ occurrences) in kernel or solver logic
+- Reinventing functionality already available in dependencies (thrust, cccl, rmm)
+
+### Test Quality
+- Flaky tests due to GPU timing, uninitialized memory, or race conditions
+- Missing validation of numerical correctness (only checking "runs without error")
+- Test isolation violations (GPU state, cached memory, global variables)
+- Missing edge case coverage (empty problems, infeasible, unbounded, degenerate)
+- Inadequate test coverage for error paths and exception handling
+- Missing benchmarks or performance regression detection
+- **Missing tests for problem transformations** (verify correctness of original→transformed→postsolve mappings)
+- **Missing tests for algorithm phase transitions** (verify state initialization between phases)
+- **Missing tests with free variables, singleton problems, or extreme problem dimensions**
+
+## MEDIUM Issues (Comment Selectively)
+
+- Edge cases not handled (empty problem, single constraint, zero variables, large problem sizes near limits)
+- Missing input validation (negative sizes, null pointers, invalid problem formats)
+- Code duplication in solver or kernel logic (3+ occurrences) if pattern exists
+- Misleading naming that obscures GPU/CPU boundaries or numerical precision
+- Deprecated CUDA API usage or deprecated cuOpt internal APIs
+- Missing documentation for numerical tolerances or algorithm parameters
+- Suboptimal but functional memory patterns that could be improved
+- Minor inefficiencies in non-critical code paths
+- **Unclear problem context in function parameters** (ambiguous whether operating on original or transformed problem)
+- **Missing explicit initialization comments** (state appears uninitialized but may be set elsewhere)
+- **Potential index confusion** (variable naming doesn't clarify which problem space the index refers to)
+
+## Review Protocol
+
+1. **Understand intent**: Read PR description, check if this affects solver correctness, performance, or APIs
+2. **Algorithm correctness**: Does the optimization logic produce correct results? Numerical stability?
+3. **GPU correctness**: CUDA errors checked? Memory safety? Race conditions? Synchronization?
+4. **Resource management**: GPU memory leaks? Stream/event cleanup? File handles closed?
+5. **Performance**: GPU bottlenecks? Unnecessary sync? Memory access patterns? Scalability to millions of variables?
+6. **API stability**: Breaking changes to C/Python/Server APIs? Backward compatibility?
+7. **Security (if server code)**: Input validation? Resource exhaustion? Unsafe deserialization?
+8. **Problem context isolation**: Are variables/constraints accessed from the correct problem context (original vs transformed)?
+9. **Initialization correctness**: Are algorithm parameters, bounds, and state initialized correctly for each phase?
+10. **Stream lifecycle**: Are CUDA streams explicitly created/destroyed for concurrent operations? Proper isolation?
+11. **Ask, don't tell**: "Have you considered X?" not "You should do X"
+
+## Quality Threshold
+
+Before commenting, ask:
+1. Is this actually wrong/risky, or just different?
+2. Would this cause a real problem in production?
+3. Does this comment add unique value?
+
+**If no to any: Skip the comment.**
+
+## Output Format
+
+- Use severity labels: CRITICAL, HIGH, MEDIUM
+- Be concise: One-line issue summary + one-line impact
+- Provide code suggestions when you have concrete fixes
+- Omit generic explanations and boilerplate
+- No preamble or sign-off
+
+## Token Optimization
+
+- Omit explanations for obvious issues
+- Omit descriptions of code or design not critical to understanding the changes or issues raised
+- Omit listing benefits of standard good practices and other generic information apparent to an experienced developer
+- No preamble or sign-off
+
+## Context Awareness
+
+**Skip if**:
+- Already handled by CI/linters
+- Same issue exists in codebase (note once if systemic)
+- Experimental/prototype code (check PR labels)
+- Explicitly marked as technical debt
+
+**Escalate if**:
+- Breaking change without discussion
+- Conflicts with documented architecture
+- Security vulnerability
+
+## Examples to Follow
+
+**CRITICAL** (GPU memory leak):
+```
+CRITICAL: GPU memory leak in solver cleanup
+
+Issue: Device memory allocated but never freed on error path
+Why: Causes GPU OOM on repeated solves
+
+Suggested fix:
+if (cudaMalloc(&d_data, size) != cudaSuccess) {
+    // cleanup other resources before returning
+    cudaFree(d_other);
+    return ERROR_CODE;
+}
+```
+
+**CRITICAL** (unchecked CUDA error):
+```
+CRITICAL: Unchecked kernel launch
+
+Issue: Kernel launch error not checked
+Why: Subsequent operations assume success, causing silent corruption
+
+Suggested fix:
+myKernel<<<grid, block>>>(args);
+CUDA_CHECK(cudaGetLastError());
+```
+
+**HIGH** (numerical stability):
+```
+HIGH: Potential division by near-zero
+
+Issue: No epsilon check before division in simplex pivot
+Why: Can produce Inf/NaN values corrupting solution
+Consider: Add epsilon threshold check or use safe division helper
+```
+
+**HIGH** (performance issue):
+```
+HIGH: Unnecessary synchronization in hot path
+
+Issue: cudaDeviceSynchronize() inside iteration loop
+Why: Blocks GPU pipeline, 10x slowdown on benchmarks
+Consider: Move sync outside loop or use streams with events
+```
+
+**CRITICAL** (variable scope violation):
+```
+CRITICAL: Accessing variables from wrong problem context
+
+Issue: Code accesses free variables from original problem in folded problem
+Why: Variable indices don't map correctly between contexts, causing wrong values/crashes
+Impact: Silent data corruption or segfaults on problems with free variables
+
+Suggested fix:
+// Use folded_problem.variables instead of original_problem.variables
+for (int i = 0; i < folded_problem.num_vars; i++) {
+    double val = folded_problem.variables[i];  // NOT original_problem.variables[i]
+}
+```
+
+**CRITICAL** (incorrect initialization):
+```
+CRITICAL: Variable bounds not initialized correctly for diving
+
+Issue: Starting bounds use wrong values from previous phase
+Why: Diving algorithm starts with invalid bounds, producing wrong solutions
+Impact: Incorrect optimization results, potential infeasibility
+
+Suggested fix:
+// Reset bounds before diving
+for (int i = 0; i < num_vars; i++) {
+    diving_bounds[i].lower = problem.original_lower_bounds[i];
+    diving_bounds[i].upper = problem.original_upper_bounds[i];
+}
+```
+
+**HIGH** (missing stream isolation):
+```
+HIGH: Barrier operation missing dedicated stream
+
+Issue: Barrier concurrent uses default stream without explicit creation
+Why: Can cause serialization with other operations, race conditions, or deadlocks
+Impact: Performance degradation or non-deterministic failures
+
+Suggested fix:
+cudaStream_t barrier_stream;
+cudaStreamCreate(&barrier_stream);
+// Use barrier_stream for barrier operations
+// Don't forget: cudaStreamDestroy(barrier_stream) in cleanup
+```
+
+**HIGH** (numerical assertion failure):
+```
+HIGH: Overly strict assertion in pivot operation
+
+Issue: Assert fails on legitimate near-zero pivots in degenerate problems
+Why: Tolerance too strict for edge cases, assertion doesn't allow valid scenarios
+Impact: Crashes on valid degenerate problems
+
+Consider: Replace assertion with warning + fallback, or use configurable tolerance
+```
+
+**Good, concise summary**:
+- Refactor simplex and dual-simplex solvers to share common pivot logic
+- Consolidate CUDA error checking into reusable macros
+- Extract repeated kernel patterns into templated device functions
+
+## Examples to Avoid
+
+**Boilerplate and generic descriptions** (avoid):
+- "CUDA Best Practices: Using streams improves concurrency and overlaps computation with memory transfers. This is a well-known optimization technique."
+- "Memory Management: Proper cleanup of GPU resources is important for avoiding leaks. RAII patterns help ensure resources are freed."
+- "Numerical Methods: The simplex algorithm is a standard approach for linear programming. Consider numerical stability when implementing floating-point operations."
+- "Code Reuse: Duplication of kernel code can lead to maintenance issues. Consider refactoring into reusable device functions."
+
+**Subjective style preferences** (ignore):
+- "Consider using auto here instead of explicit type"
+- "This function could be split into smaller functions"
+- "Prefer range-based for loops"
+- "Consider adding more comments"
+
+---
+
+## cuOpt-Specific Considerations
+
+**GPU/CUDA Code**:
+- Every CUDA call must have error checking (kernel launches, memory ops, sync)
+- Host-device memory boundaries must be clear and correct
+- Shared memory usage must avoid bank conflicts and size limits
+- Warp divergence in hot paths should be minimized
+- **Explicit stream creation**: Concurrent operations (barriers, async ops) must have dedicated streams, not reuse default stream
+- **Stream ownership**: Clearly document stream lifecycle (who creates, who destroys)
+
+**Optimization Algorithms**:
+- Numerical stability is paramount (epsilon checks, scaling, preconditioning)
+- Correctness > Performance (verify algorithm produces correct results first)
+- Handle degenerate cases (infeasible, unbounded, highly degenerate bases)
+- Tolerance parameters must be documented and tested
+- **Phase initialization**: Each algorithm phase (presolve, simplex, diving, crossover) must correctly initialize its state/bounds
+- **Problem transformations**: Variable/constraint indices must be correctly mapped between original and transformed problems (presolve, folding, etc.)
+
+**Multi-Language APIs**:
+- C API must maintain ABI stability (no struct layout changes)
+- Python API changes require deprecation warnings
+- Server API must version endpoints for breaking changes
+- Error codes/messages must be consistent across all APIs
+
+**Performance Expectations**:
+- Near real-time solutions for problems with millions of variables
+- Scalability testing required for large problem sizes
+- Memory usage must be reasonable (avoid O(n²) for n in millions)
+- GPU utilization should be high for computation-heavy kernels
+
+**Documentation (docs/ folder)**:
+When reviewing code changes that affect public APIs, algorithms, or behavior:
+- Check if corresponding documentation in `docs/` needs updating
+- Suggest specific doc updates for API changes (new parameters, return values, error codes)
+- Flag missing documentation for new public functions/classes/endpoints
+- Suggest adding examples for new features or changed behavior
+- Recommend updating algorithm descriptions if solver behavior changes
+- Verify version numbers and deprecation notices are documented
+- Suggest clarifying numerical tolerances, performance characteristics, or GPU requirements
+
+Example documentation suggestion:
+```
+HIGH: Missing documentation for API change
+
+Issue: New parameter `tolerance` added to solver API but not documented
+Why: Users won't know how to use the new parameter
+Suggest: Update docs/cuopt/linear_programming/api.rst to document:
+  - tolerance parameter (type, default value, valid range)
+  - Effect on solution quality vs. speed tradeoff
+  - Example usage with typical values
+```
+
+---
+
+## Common Bug Patterns in cuOpt (From Historical Fixes)
+
+These patterns have caused real bugs. Pay special attention when reviewing code involving these areas:
+
+### 1. Problem Context Confusion
+**Pattern**: Accessing variables/constraints from wrong problem representation (original vs presolve vs folded vs postsolve)
+
+**Red flags**:
+- Functions that receive both `original_problem` and `transformed_problem` as parameters
+- Index arithmetic between problem representations without explicit mapping
+- Accessing `.num_vars` or `.variables[]` from wrong problem object
+- Mixed use of original/transformed indices in same function
+
+**Example bug**: Accessing `original_problem.free_variables` when operating on `folded_problem`
+
+### 2. Algorithm Phase Initialization
+**Pattern**: Bounds, tolerances, or state not properly initialized/reset when transitioning between algorithm phases
+
+**Red flags**:
+- Diving, crossover, or barrier phases starting without explicit initialization
+- Reusing data structures from previous phase without clearing/resetting
+- Missing bounds initialization when entering new optimization phase
+- Carrying over stale state from presolve to main solve
+
+**Example bug**: Diving algorithm using incorrect starting bounds from previous optimization phase
+
+### 3. CUDA Stream Lifecycle Issues
+**Pattern**: Missing explicit stream creation for concurrent/barrier operations, or improper stream reuse
+
+**Red flags**:
+- Barrier or concurrent operations without dedicated stream variable
+- Multiple independent operations sharing same stream without justification
+- Stream creation inside loop but destruction outside loop (or vice versa)
+- Using `nullptr` or default stream for operations that need isolation
+- Missing `cudaStreamDestroy` for explicitly created streams
+
+**Example bug**: Barrier concurrent operation reusing default stream instead of creating dedicated stream
+
+### 4. Numerical Assertion Failures
+**Pattern**: Assertions that are too strict for legitimate edge cases, especially in degenerate problems
+
+**Red flags**:
+- Assertions with hardcoded tolerances (e.g., `assert(abs(value) > 1e-10)`)
+- Assertions that don't account for problem scaling or conditioning
+- Assertions in pivot selection, basis updates, or feasibility checks without epsilon tolerance
+- Assertions that fail on empty, singleton, or highly degenerate problems
+
+**Example bug**: CPUFJ assertion failing on valid near-zero pivots in degenerate problems
+
+### 5. Index Mapping Errors
+**Pattern**: Incorrect mapping between variable/constraint indices after problem transformations
+
+**Red flags**:
+- Off-by-one errors in index arithmetic between problem representations
+- Missing or incorrect index offset when mapping between spaces
+- Iterating over wrong range after problem size changes from presolve
+- Accessing arrays with indices from wrong problem context
+
+**Example bug**: Using original problem indices to access folded problem arrays
+
+### 6. Uninitialized Algorithm State
+**Pattern**: Algorithm state variables not initialized before use, especially after branching or problem modification
+
+**Red flags**:
+- State variables declared but not initialized before first algorithm iteration
+- Conditional initialization that might skip on certain problem types
+- Missing reset when solving multiple problems sequentially
+- Reusing solver object without proper cleanup between solves
+
+**Example bug**: Variable bounds not reset before diving, using stale values
+
+---
+
+## Code Review Checklists by Change Type
+
+### When Reviewing Problem Transformations (Presolve/Folding/Postsolve)
+- [ ] Are variable indices correctly mapped between original and transformed space?
+- [ ] Does the code clearly identify which problem context it's operating in?
+- [ ] Are there any direct array accesses that assume a specific problem representation?
+- [ ] Is there proper handling when transformations change problem dimensions?
+- [ ] Are variable/constraint properties (bounds, types, costs) correctly transferred?
+
+### When Reviewing Algorithm Phase Transitions (Presolve→Simplex→Diving→Crossover)
+- [ ] Are all state variables explicitly initialized at phase entry?
+- [ ] Are variable bounds reset/copied correctly for the new phase?
+- [ ] Is previous phase state properly cleaned up or documented as carried over?
+- [ ] Are tolerances and parameters appropriate for this phase?
+- [ ] Does the code handle early exit from previous phase correctly?
+
+### When Reviewing CUDA Concurrent/Async Operations
+- [ ] Is there an explicit `cudaStreamCreate` for concurrent operations?
+- [ ] Is stream lifecycle clearly documented (creation and destruction)?
+- [ ] Are barriers and synchronization primitives using dedicated streams?
+- [ ] Is the default stream only used intentionally for serialization?
+- [ ] Are stream errors checked with `cudaGetLastError` or equivalent?
+
+### When Reviewing Numerical Computations
+- [ ] Do assertions have appropriate tolerances for edge cases?
+- [ ] Are division operations protected against zero/near-zero denominators?
+- [ ] Are comparisons using epsilon tolerances instead of exact equality?
+- [ ] Are tolerances configurable or at least documented?
+- [ ] Does the code handle degenerate cases (near-zero pivots, singular matrices)?
+
+### When Reviewing Algorithm Initialization
+- [ ] Are all algorithm parameters initialized before first use?
+- [ ] Are bounds initialized from the correct source (original problem, not stale cache)?
+- [ ] Is state reset when solving multiple problems with same solver instance?
+- [ ] Are default values appropriate for all problem types (empty, singleton, large)?
+- [ ] Is initialization conditional code covered by tests?
+
+---
+
+**Remember**: Focus on objective correctness, not subjective preference. Catch real bugs and design flaws, ignore style preferences. AI speed + human judgment. You catch patterns, humans understand business context. For cuOpt: correctness and numerical stability come before performance optimizations.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 0c606a33d..85e72ff10 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -15,6 +15,9 @@ concurrency:
 jobs:
   pr-builder:
     needs:
+      - check-lean-ci
+      - prevent-merge-with-lean-ci
+      - compute-matrix-filters
       - changed-files
       - checks
       - conda-cpp-build
@@ -32,6 +35,77 @@ jobs:
       - test-self-hosted-server
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@release/25.12
+  check-lean-ci:
+    runs-on: ubuntu-latest
+    outputs:
+      lean_ci_enabled: ${{ steps.check-label.outputs.lean_ci_enabled }}
+    steps:
+      - name: Check for lean-ci label
+        id: check-label
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          # Extract PR number from branch name (pull-request/123 -> 123)
+          PR_NUMBER=$(echo "${{ github.ref }}" | sed 's|refs/heads/pull-request/||')
+          echo "Checking PR #$PR_NUMBER for lean-ci label..."
+
+          # Check if the PR has the 'lean-ci' label
+          if gh pr view "$PR_NUMBER" --repo "${{ github.repository }}" --json labels --jq '.labels[].name' | grep -q "^lean-ci$"; then
+            echo "lean_ci_enabled=true" >> $GITHUB_OUTPUT
+            echo "⚠️  Lean CI is enabled (lean-ci label found)"
+          else
+            echo "lean_ci_enabled=false" >> $GITHUB_OUTPUT
+            echo "✅ Full CI is enabled"
+          fi
+
+  prevent-merge-with-lean-ci:
+    runs-on: ubuntu-latest
+    needs: check-lean-ci
+    steps:
+      - name: Check lean-ci status
+        run: |
+          if [ "${{ needs.check-lean-ci.outputs.lean_ci_enabled }}" == "true" ]; then
+            echo "❌ ERROR: This PR has the 'lean-ci' label enabled."
+            echo "Lean CI is only for testing purposes and should not be merged."
+            echo "Please remove the 'lean-ci' label and run full CI before merging."
+            exit 1
+          else
+            echo "✅ No lean-ci label found. PR can be merged."
+            exit 0
+          fi
+  compute-matrix-filters:
+    needs: check-lean-ci
+    runs-on: ubuntu-latest
+    outputs:
+      conda_lean_filter: ${{ steps.set-filters.outputs.conda_lean_filter }}
+      conda_test_filter: ${{ steps.set-filters.outputs.conda_test_filter }}
+      wheel_lean_filter: ${{ steps.set-filters.outputs.wheel_lean_filter }}
+      mps_parser_filter: ${{ steps.set-filters.outputs.mps_parser_filter }}
+      libcuopt_filter: ${{ steps.set-filters.outputs.libcuopt_filter }}
+      cuopt_server_filter: ${{ steps.set-filters.outputs.cuopt_server_filter }}
+      cuopt_sh_client_filter: ${{ steps.set-filters.outputs.cuopt_sh_client_filter }}
+    steps:
+      - name: Set matrix filters
+        id: set-filters
+        run: |
+          if [ "${{ needs.check-lean-ci.outputs.lean_ci_enabled }}" == "true" ]; then
+            echo "conda_lean_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.10\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
+            echo "conda_test_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.13\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
+            echo "wheel_lean_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
+            echo "mps_parser_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
+            echo "libcuopt_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
+            echo "cuopt_server_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
+            echo "cuopt_sh_client_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
+          else
+            echo "conda_lean_filter=." >> $GITHUB_OUTPUT
+            echo "conda_test_filter=." >> $GITHUB_OUTPUT
+            echo "wheel_lean_filter=." >> $GITHUB_OUTPUT
+            echo "mps_parser_filter=group_by([.ARCH, (.PY_VER |split(\".\") | map(tonumber))])|map(max_by([(.CUDA_VER|split(\".\")|map(tonumber))]))" >> $GITHUB_OUTPUT
+            echo "libcuopt_filter=group_by([.ARCH, (.CUDA_VER|split(\".\")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(\".\")|map(tonumber)))" >> $GITHUB_OUTPUT
+            echo "cuopt_server_filter=map(select(.ARCH == \"amd64\")) | group_by(.CUDA_VER|split(\".\")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(\".\")|map(tonumber)), (.CUDA_VER|split(\".\")|map(tonumber))]))" >> $GITHUB_OUTPUT
+            echo "cuopt_sh_client_filter=[map(select(.ARCH == \"amd64\")) | min_by((.PY_VER | split(\".\") | map(tonumber)), (.CUDA_VER | split(\".\") | map(-tonumber)))]" >> $GITHUB_OUTPUT
+          fi
+
   changed-files:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@release/25.12
@@ -102,31 +176,33 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@release/25.12
     with:
       enable_check_generated_files: false
-
   conda-cpp-build:
-    needs: checks
+    needs: [checks, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@release/25.12
     with:
       build_type: pull-request
       script: ci/build_cpp.sh
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }}
   conda-cpp-tests:
-    needs: [conda-cpp-build, changed-files]
+    needs: [conda-cpp-build, changed-files, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@release/25.12
     #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
     with:
       build_type: pull-request
       script: ci/test_cpp.sh
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
   conda-python-build:
-    needs: conda-cpp-build
+    needs: [conda-cpp-build, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@release/25.12
     with:
       build_type: pull-request
       script: ci/build_python.sh
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
   conda-python-tests:
-    needs: [conda-python-build, changed-files]
+    needs: [conda-python-build, changed-files, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@release/25.12
     #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
@@ -134,6 +210,7 @@ jobs:
       run_codecov: false
       build_type: pull-request
       script: ci/test_python.sh
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
   docs-build:
     needs: conda-python-build
     secrets: inherit
@@ -147,6 +224,7 @@ jobs:
       container_image: "rapidsai/ci-conda:25.12-latest"
       script: "ci/build_docs.sh"
   wheel-build-cuopt-mps-parser:
+    needs: compute-matrix-filters
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12
     with:
@@ -156,20 +234,20 @@ jobs:
       package-type: python
       append-cuda-suffix: false
       # need 1 build per Python version and arch (but CUDA version doesn't matter so choose the latest)
-      matrix_filter: 'group_by([.ARCH, (.PY_VER |split(".") | map(tonumber))])|map(max_by([(.CUDA_VER|split(".")|map(tonumber))]))'
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.mps_parser_filter }}
   wheel-build-libcuopt:
-    needs: wheel-build-cuopt-mps-parser
+    needs: [wheel-build-cuopt-mps-parser, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
-      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.libcuopt_filter }}
       package-type: cpp
       package-name: libcuopt
       build_type: pull-request
       script: ci/build_wheel_libcuopt.sh
   wheel-build-cuopt:
-    needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt]
+    needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12
     with:
@@ -177,16 +255,18 @@ jobs:
       script: ci/build_wheel_cuopt.sh
       package-name: cuopt
       package-type: python
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }}
   wheel-tests-cuopt:
-    needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files]
+    needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/25.12
     #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt
     with:
       build_type: pull-request
       script: ci/test_wheel_cuopt.sh
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }}
   wheel-build-cuopt-server:
-    needs: checks
+    needs: [checks, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12
     with:
@@ -196,8 +276,9 @@ jobs:
       package-type: python
       pure-wheel: true
       # Only need 1 package per CUDA major version. This selects "ARCH=amd64 + the latest supported Python, 1 job per major CUDA version".
-      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_server_filter }}
   wheel-build-cuopt-sh-client:
+    needs: compute-matrix-filters
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12
     with:
@@ -208,15 +289,16 @@ jobs:
       append-cuda-suffix: false
       pure-wheel: true
       # only need 1 build (noarch package): this selects amd64, oldest-supported Python, latest-supported CUDA
-      matrix_filter: '[map(select(.ARCH == "amd64")) | min_by((.PY_VER | split(".") | map(tonumber)), (.CUDA_VER | split(".") | map(-tonumber)))]'
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_sh_client_filter }}
   wheel-tests-cuopt-server:
-    needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files]
+    needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files, compute-matrix-filters]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/25.12
     #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt_server
     with:
       build_type: pull-request
       script: ci/test_wheel_cuopt_server.sh
+      matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }}
   test-self-hosted-server:
     needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files]
     secrets: inherit