diff --git a/.coderabbit.yaml b/.coderabbit.yaml index cb3ad2fc4..855ebf172 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -4,9 +4,16 @@ reviews: profile: chill high_level_summary: true + poem: false auto_review: enabled: true drafts: false + base_branches: + - main + - master + - develop + - "release/**" + - "hotfix/**" ignore_usernames: ["rapids-bot", "GPUtester", "nv-automation-bot", "copy-pr-bot"] tools: markdownlint: @@ -16,3 +23,39 @@ reviews: gitleaks: enabled: true sequence_diagrams: false + collapse_walkthrough: true + + # Reduce noise from status messages + request_changes_workflow: false + review_status: false + + # Path-specific review instructions + path_instructions: + - path: "docs/**/*" + instructions: | + For documentation changes, focus on: + - Accuracy: Verify code examples compile and run correctly + - Completeness: Check if API changes (parameters, return values, errors) are documented + - Clarity: Flag confusing explanations, missing prerequisites, or unclear examples + - Consistency: Version numbers, parameter types, and terminology match code + - Examples: Suggest adding examples for complex features or new APIs + - Missing docs: If PR changes public APIs without updating docs, flag as HIGH priority + + When code changes affect docs: + - Suggest specific doc files that need updates (e.g., docs/cuopt/api.rst) + - Identify outdated information contradicting the code changes + - Recommend documenting performance characteristics, GPU requirements, or numerical tolerances + + - path: "cpp/include/cuopt/**/*" + instructions: | + For public header files (C++ API): + - Check if new public functions/classes have documentation comments (Doxygen format) + - Flag API changes that may need corresponding docs/ updates + - Verify parameter descriptions match actual types/behavior + - Suggest documenting thread-safety, GPU requirements, and numerical behavior + - For breaking changes, recommend updating docs and migration guides +knowledge_base: + opt_out: false + code_guidelines: + filePatterns: + - ".github/.coderabbit_review_guide.md" diff --git a/.github/.coderabbit_review_guide.md b/.github/.coderabbit_review_guide.md new file mode 100644 index 000000000..828fc6884 --- /dev/null +++ b/.github/.coderabbit_review_guide.md @@ -0,0 +1,468 @@ +# AI Code Review Guidelines for CodeRabbit - cuOpt + +**Role**: Act as a principal engineer with 10+ years experience in GPU computing, numerical optimization, and high-performance systems. Focus ONLY on CRITICAL and HIGH issues. + +**Target**: Sub-3% false positive rate. Be direct, concise, minimal. + +**Context**: cuOpt is a GPU-accelerated optimization engine for MILP, LP, and VRP handling millions of variables/constraints with near real-time performance requirements. + +## IGNORE These Issues + +- Style/formatting (linters handle this) +- Minor naming preferences (unless truly misleading) +- Personal taste on implementation (unless impacts maintainability) +- Nits that don't affect functionality +- Already-covered issues (one comment per root cause) + +## CRITICAL Issues (Always Comment) + +### Algorithm Correctness +- Logic errors in optimization algorithms (simplex, branch-and-bound, routing heuristics, diving) +- Incorrect constraint handling or objective function computation +- Numerical instability causing wrong results (overflow, underflow, precision loss) +- Infeasibility misclassification or unbounded solution detection failures +- Breaking changes to solver behavior without versioning +- **Variable/constraint initialization errors** (incorrect bounds, invalid starting values, uninitialized state) +- **Problem transformation bugs** (accessing variables/constraints from wrong context - e.g., original vs folded problem) +- **Algorithm state corruption** (incorrect state transitions, mixing state between phases) + +### GPU/CUDA Issues +- Unchecked CUDA errors (kernel launches, memory operations, synchronization) +- Race conditions in GPU kernels (shared memory, atomics, warps) +- Device memory leaks (cudaMalloc/cudaFree imbalance, leaked streams/events) +- Invalid memory access (out-of-bounds, use-after-free, host/device confusion) +- Missing CUDA synchronization causing non-deterministic failures +- Kernel launch with zero blocks/threads or invalid grid/block dimensions +- **Missing explicit stream creation for concurrent operations** (reusing default stream, missing stream isolation) +- **Incorrect stream lifecycle management** (using destroyed streams, not creating dedicated streams for barriers/concurrent ops) + +### Resource Management +- GPU memory leaks (device allocations, managed memory, pinned memory) +- CUDA stream/event leaks or improper cleanup +- Unclosed file handles for MPS/QPS problem files +- Missing RAII or proper cleanup in exception paths +- Resource exhaustion (GPU memory, file descriptors, network sockets) + +### API Breaking Changes +- C API changes without ABI versioning +- Python API changes breaking backward compatibility +- Server API endpoint changes without deprecation path +- Changes to data structures exposed in public headers + +## HIGH Issues (Comment if Substantial) + +### Performance Issues +- Inefficient GPU kernel launches (low occupancy, poor memory access patterns) +- Unnecessary host-device synchronization blocking GPU pipeline +- CPU bottlenecks in GPU-heavy code paths +- Suboptimal memory access patterns (non-coalesced, strided, unaligned) +- Excessive memory allocations in hot paths +- Algorithmic complexity issues for large-scale problems (O(n²) when O(n log n) exists) +- Missing or incorrect problem size checks before expensive operations + +### Numerical Stability +- Floating-point operations prone to catastrophic cancellation +- Missing checks for division by zero or near-zero values +- Ill-conditioned matrix operations without preconditioning +- Accumulation errors in iterative algorithms +- Unsafe casting between numeric types (double→float with potential precision loss) +- Missing epsilon comparisons for floating-point equality checks +- **Assertion failures in numerical computations** (overly strict assertions, incorrect tolerance assumptions) +- **Numerical edge cases causing assertion failures** (near-zero pivots, degenerate cases, extreme values) +- **Inconsistent numerical tolerances** (mixing different epsilon values, hardcoded vs configurable tolerances) + +### Concurrency & Thread Safety +- Race conditions in multi-GPU code or multi-threaded server +- Missing synchronization for shared state +- Improper CUDA stream management causing false dependencies +- Deadlock potential in resource acquisition +- Thread-unsafe use of global/static variables +- Missing or incorrect use of mutexes in server code +- **Concurrent operations sharing streams incorrectly** (barriers, synchronization primitives without dedicated streams) +- **Stream reuse across independent operations** (causing unwanted serialization or race conditions) + +### Security (Server/API) +- Unsanitized input in problem data leading to buffer overflows +- Lack of input validation allowing resource exhaustion attacks +- Credential exposure in logs or error messages +- Unsafe deserialization of problem files (pickle, msgpack) +- Missing rate limiting on API endpoints +- Insufficient error handling exposing internal implementation details + +### Design & Architecture +- Tight coupling between solver components reducing modularity +- Hard-coded GPU device IDs or resource limits +- Missing abstraction for multi-backend support (different CUDA versions) +- Inappropriate use of exceptions in performance-critical paths +- Missing or incomplete error propagation from CUDA to user APIs +- Significant code duplication (3+ occurrences) in kernel or solver logic +- Reinventing functionality already available in dependencies (thrust, cccl, rmm) + +### Test Quality +- Flaky tests due to GPU timing, uninitialized memory, or race conditions +- Missing validation of numerical correctness (only checking "runs without error") +- Test isolation violations (GPU state, cached memory, global variables) +- Missing edge case coverage (empty problems, infeasible, unbounded, degenerate) +- Inadequate test coverage for error paths and exception handling +- Missing benchmarks or performance regression detection +- **Missing tests for problem transformations** (verify correctness of original→transformed→postsolve mappings) +- **Missing tests for algorithm phase transitions** (verify state initialization between phases) +- **Missing tests with free variables, singleton problems, or extreme problem dimensions** + +## MEDIUM Issues (Comment Selectively) + +- Edge cases not handled (empty problem, single constraint, zero variables, large problem sizes near limits) +- Missing input validation (negative sizes, null pointers, invalid problem formats) +- Code duplication in solver or kernel logic (3+ occurrences) if pattern exists +- Misleading naming that obscures GPU/CPU boundaries or numerical precision +- Deprecated CUDA API usage or deprecated cuOpt internal APIs +- Missing documentation for numerical tolerances or algorithm parameters +- Suboptimal but functional memory patterns that could be improved +- Minor inefficiencies in non-critical code paths +- **Unclear problem context in function parameters** (ambiguous whether operating on original or transformed problem) +- **Missing explicit initialization comments** (state appears uninitialized but may be set elsewhere) +- **Potential index confusion** (variable naming doesn't clarify which problem space the index refers to) + +## Review Protocol + +1. **Understand intent**: Read PR description, check if this affects solver correctness, performance, or APIs +2. **Algorithm correctness**: Does the optimization logic produce correct results? Numerical stability? +3. **GPU correctness**: CUDA errors checked? Memory safety? Race conditions? Synchronization? +4. **Resource management**: GPU memory leaks? Stream/event cleanup? File handles closed? +5. **Performance**: GPU bottlenecks? Unnecessary sync? Memory access patterns? Scalability to millions of variables? +6. **API stability**: Breaking changes to C/Python/Server APIs? Backward compatibility? +7. **Security (if server code)**: Input validation? Resource exhaustion? Unsafe deserialization? +8. **Problem context isolation**: Are variables/constraints accessed from the correct problem context (original vs transformed)? +9. **Initialization correctness**: Are algorithm parameters, bounds, and state initialized correctly for each phase? +10. **Stream lifecycle**: Are CUDA streams explicitly created/destroyed for concurrent operations? Proper isolation? +11. **Ask, don't tell**: "Have you considered X?" not "You should do X" + +## Quality Threshold + +Before commenting, ask: +1. Is this actually wrong/risky, or just different? +2. Would this cause a real problem in production? +3. Does this comment add unique value? + +**If no to any: Skip the comment.** + +## Output Format + +- Use severity labels: CRITICAL, HIGH, MEDIUM +- Be concise: One-line issue summary + one-line impact +- Provide code suggestions when you have concrete fixes +- Omit generic explanations and boilerplate +- No preamble or sign-off + +## Token Optimization + +- Omit explanations for obvious issues +- Omit descriptions of code or design not critical to understanding the changes or issues raised +- Omit listing benefits of standard good practices and other generic information apparent to an experienced developer +- No preamble or sign-off + +## Context Awareness + +**Skip if**: +- Already handled by CI/linters +- Same issue exists in codebase (note once if systemic) +- Experimental/prototype code (check PR labels) +- Explicitly marked as technical debt + +**Escalate if**: +- Breaking change without discussion +- Conflicts with documented architecture +- Security vulnerability + +## Examples to Follow + +**CRITICAL** (GPU memory leak): +``` +CRITICAL: GPU memory leak in solver cleanup + +Issue: Device memory allocated but never freed on error path +Why: Causes GPU OOM on repeated solves + +Suggested fix: +if (cudaMalloc(&d_data, size) != cudaSuccess) { + // cleanup other resources before returning + cudaFree(d_other); + return ERROR_CODE; +} +``` + +**CRITICAL** (unchecked CUDA error): +``` +CRITICAL: Unchecked kernel launch + +Issue: Kernel launch error not checked +Why: Subsequent operations assume success, causing silent corruption + +Suggested fix: +myKernel<<>>(args); +CUDA_CHECK(cudaGetLastError()); +``` + +**HIGH** (numerical stability): +``` +HIGH: Potential division by near-zero + +Issue: No epsilon check before division in simplex pivot +Why: Can produce Inf/NaN values corrupting solution +Consider: Add epsilon threshold check or use safe division helper +``` + +**HIGH** (performance issue): +``` +HIGH: Unnecessary synchronization in hot path + +Issue: cudaDeviceSynchronize() inside iteration loop +Why: Blocks GPU pipeline, 10x slowdown on benchmarks +Consider: Move sync outside loop or use streams with events +``` + +**CRITICAL** (variable scope violation): +``` +CRITICAL: Accessing variables from wrong problem context + +Issue: Code accesses free variables from original problem in folded problem +Why: Variable indices don't map correctly between contexts, causing wrong values/crashes +Impact: Silent data corruption or segfaults on problems with free variables + +Suggested fix: +// Use folded_problem.variables instead of original_problem.variables +for (int i = 0; i < folded_problem.num_vars; i++) { + double val = folded_problem.variables[i]; // NOT original_problem.variables[i] +} +``` + +**CRITICAL** (incorrect initialization): +``` +CRITICAL: Variable bounds not initialized correctly for diving + +Issue: Starting bounds use wrong values from previous phase +Why: Diving algorithm starts with invalid bounds, producing wrong solutions +Impact: Incorrect optimization results, potential infeasibility + +Suggested fix: +// Reset bounds before diving +for (int i = 0; i < num_vars; i++) { + diving_bounds[i].lower = problem.original_lower_bounds[i]; + diving_bounds[i].upper = problem.original_upper_bounds[i]; +} +``` + +**HIGH** (missing stream isolation): +``` +HIGH: Barrier operation missing dedicated stream + +Issue: Barrier concurrent uses default stream without explicit creation +Why: Can cause serialization with other operations, race conditions, or deadlocks +Impact: Performance degradation or non-deterministic failures + +Suggested fix: +cudaStream_t barrier_stream; +cudaStreamCreate(&barrier_stream); +// Use barrier_stream for barrier operations +// Don't forget: cudaStreamDestroy(barrier_stream) in cleanup +``` + +**HIGH** (numerical assertion failure): +``` +HIGH: Overly strict assertion in pivot operation + +Issue: Assert fails on legitimate near-zero pivots in degenerate problems +Why: Tolerance too strict for edge cases, assertion doesn't allow valid scenarios +Impact: Crashes on valid degenerate problems + +Consider: Replace assertion with warning + fallback, or use configurable tolerance +``` + +**Good, concise summary**: +- Refactor simplex and dual-simplex solvers to share common pivot logic +- Consolidate CUDA error checking into reusable macros +- Extract repeated kernel patterns into templated device functions + +## Examples to Avoid + +**Boilerplate and generic descriptions** (avoid): +- "CUDA Best Practices: Using streams improves concurrency and overlaps computation with memory transfers. This is a well-known optimization technique." +- "Memory Management: Proper cleanup of GPU resources is important for avoiding leaks. RAII patterns help ensure resources are freed." +- "Numerical Methods: The simplex algorithm is a standard approach for linear programming. Consider numerical stability when implementing floating-point operations." +- "Code Reuse: Duplication of kernel code can lead to maintenance issues. Consider refactoring into reusable device functions." + +**Subjective style preferences** (ignore): +- "Consider using auto here instead of explicit type" +- "This function could be split into smaller functions" +- "Prefer range-based for loops" +- "Consider adding more comments" + +--- + +## cuOpt-Specific Considerations + +**GPU/CUDA Code**: +- Every CUDA call must have error checking (kernel launches, memory ops, sync) +- Host-device memory boundaries must be clear and correct +- Shared memory usage must avoid bank conflicts and size limits +- Warp divergence in hot paths should be minimized +- **Explicit stream creation**: Concurrent operations (barriers, async ops) must have dedicated streams, not reuse default stream +- **Stream ownership**: Clearly document stream lifecycle (who creates, who destroys) + +**Optimization Algorithms**: +- Numerical stability is paramount (epsilon checks, scaling, preconditioning) +- Correctness > Performance (verify algorithm produces correct results first) +- Handle degenerate cases (infeasible, unbounded, highly degenerate bases) +- Tolerance parameters must be documented and tested +- **Phase initialization**: Each algorithm phase (presolve, simplex, diving, crossover) must correctly initialize its state/bounds +- **Problem transformations**: Variable/constraint indices must be correctly mapped between original and transformed problems (presolve, folding, etc.) + +**Multi-Language APIs**: +- C API must maintain ABI stability (no struct layout changes) +- Python API changes require deprecation warnings +- Server API must version endpoints for breaking changes +- Error codes/messages must be consistent across all APIs + +**Performance Expectations**: +- Near real-time solutions for problems with millions of variables +- Scalability testing required for large problem sizes +- Memory usage must be reasonable (avoid O(n²) for n in millions) +- GPU utilization should be high for computation-heavy kernels + +**Documentation (docs/ folder)**: +When reviewing code changes that affect public APIs, algorithms, or behavior: +- Check if corresponding documentation in `docs/` needs updating +- Suggest specific doc updates for API changes (new parameters, return values, error codes) +- Flag missing documentation for new public functions/classes/endpoints +- Suggest adding examples for new features or changed behavior +- Recommend updating algorithm descriptions if solver behavior changes +- Verify version numbers and deprecation notices are documented +- Suggest clarifying numerical tolerances, performance characteristics, or GPU requirements + +Example documentation suggestion: +``` +HIGH: Missing documentation for API change + +Issue: New parameter `tolerance` added to solver API but not documented +Why: Users won't know how to use the new parameter +Suggest: Update docs/cuopt/linear_programming/api.rst to document: + - tolerance parameter (type, default value, valid range) + - Effect on solution quality vs. speed tradeoff + - Example usage with typical values +``` + +--- + +## Common Bug Patterns in cuOpt (From Historical Fixes) + +These patterns have caused real bugs. Pay special attention when reviewing code involving these areas: + +### 1. Problem Context Confusion +**Pattern**: Accessing variables/constraints from wrong problem representation (original vs presolve vs folded vs postsolve) + +**Red flags**: +- Functions that receive both `original_problem` and `transformed_problem` as parameters +- Index arithmetic between problem representations without explicit mapping +- Accessing `.num_vars` or `.variables[]` from wrong problem object +- Mixed use of original/transformed indices in same function + +**Example bug**: Accessing `original_problem.free_variables` when operating on `folded_problem` + +### 2. Algorithm Phase Initialization +**Pattern**: Bounds, tolerances, or state not properly initialized/reset when transitioning between algorithm phases + +**Red flags**: +- Diving, crossover, or barrier phases starting without explicit initialization +- Reusing data structures from previous phase without clearing/resetting +- Missing bounds initialization when entering new optimization phase +- Carrying over stale state from presolve to main solve + +**Example bug**: Diving algorithm using incorrect starting bounds from previous optimization phase + +### 3. CUDA Stream Lifecycle Issues +**Pattern**: Missing explicit stream creation for concurrent/barrier operations, or improper stream reuse + +**Red flags**: +- Barrier or concurrent operations without dedicated stream variable +- Multiple independent operations sharing same stream without justification +- Stream creation inside loop but destruction outside loop (or vice versa) +- Using `nullptr` or default stream for operations that need isolation +- Missing `cudaStreamDestroy` for explicitly created streams + +**Example bug**: Barrier concurrent operation reusing default stream instead of creating dedicated stream + +### 4. Numerical Assertion Failures +**Pattern**: Assertions that are too strict for legitimate edge cases, especially in degenerate problems + +**Red flags**: +- Assertions with hardcoded tolerances (e.g., `assert(abs(value) > 1e-10)`) +- Assertions that don't account for problem scaling or conditioning +- Assertions in pivot selection, basis updates, or feasibility checks without epsilon tolerance +- Assertions that fail on empty, singleton, or highly degenerate problems + +**Example bug**: CPUFJ assertion failing on valid near-zero pivots in degenerate problems + +### 5. Index Mapping Errors +**Pattern**: Incorrect mapping between variable/constraint indices after problem transformations + +**Red flags**: +- Off-by-one errors in index arithmetic between problem representations +- Missing or incorrect index offset when mapping between spaces +- Iterating over wrong range after problem size changes from presolve +- Accessing arrays with indices from wrong problem context + +**Example bug**: Using original problem indices to access folded problem arrays + +### 6. Uninitialized Algorithm State +**Pattern**: Algorithm state variables not initialized before use, especially after branching or problem modification + +**Red flags**: +- State variables declared but not initialized before first algorithm iteration +- Conditional initialization that might skip on certain problem types +- Missing reset when solving multiple problems sequentially +- Reusing solver object without proper cleanup between solves + +**Example bug**: Variable bounds not reset before diving, using stale values + +--- + +## Code Review Checklists by Change Type + +### When Reviewing Problem Transformations (Presolve/Folding/Postsolve) +- [ ] Are variable indices correctly mapped between original and transformed space? +- [ ] Does the code clearly identify which problem context it's operating in? +- [ ] Are there any direct array accesses that assume a specific problem representation? +- [ ] Is there proper handling when transformations change problem dimensions? +- [ ] Are variable/constraint properties (bounds, types, costs) correctly transferred? + +### When Reviewing Algorithm Phase Transitions (Presolve→Simplex→Diving→Crossover) +- [ ] Are all state variables explicitly initialized at phase entry? +- [ ] Are variable bounds reset/copied correctly for the new phase? +- [ ] Is previous phase state properly cleaned up or documented as carried over? +- [ ] Are tolerances and parameters appropriate for this phase? +- [ ] Does the code handle early exit from previous phase correctly? + +### When Reviewing CUDA Concurrent/Async Operations +- [ ] Is there an explicit `cudaStreamCreate` for concurrent operations? +- [ ] Is stream lifecycle clearly documented (creation and destruction)? +- [ ] Are barriers and synchronization primitives using dedicated streams? +- [ ] Is the default stream only used intentionally for serialization? +- [ ] Are stream errors checked with `cudaGetLastError` or equivalent? + +### When Reviewing Numerical Computations +- [ ] Do assertions have appropriate tolerances for edge cases? +- [ ] Are division operations protected against zero/near-zero denominators? +- [ ] Are comparisons using epsilon tolerances instead of exact equality? +- [ ] Are tolerances configurable or at least documented? +- [ ] Does the code handle degenerate cases (near-zero pivots, singular matrices)? + +### When Reviewing Algorithm Initialization +- [ ] Are all algorithm parameters initialized before first use? +- [ ] Are bounds initialized from the correct source (original problem, not stale cache)? +- [ ] Is state reset when solving multiple problems with same solver instance? +- [ ] Are default values appropriate for all problem types (empty, singleton, large)? +- [ ] Is initialization conditional code covered by tests? + +--- + +**Remember**: Focus on objective correctness, not subjective preference. Catch real bugs and design flaws, ignore style preferences. AI speed + human judgment. You catch patterns, humans understand business context. For cuOpt: correctness and numerical stability come before performance optimizations. diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 0c606a33d..85e72ff10 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -15,6 +15,9 @@ concurrency: jobs: pr-builder: needs: + - check-lean-ci + - prevent-merge-with-lean-ci + - compute-matrix-filters - changed-files - checks - conda-cpp-build @@ -32,6 +35,77 @@ jobs: - test-self-hosted-server secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@release/25.12 + check-lean-ci: + runs-on: ubuntu-latest + outputs: + lean_ci_enabled: ${{ steps.check-label.outputs.lean_ci_enabled }} + steps: + - name: Check for lean-ci label + id: check-label + env: + GH_TOKEN: ${{ github.token }} + run: | + # Extract PR number from branch name (pull-request/123 -> 123) + PR_NUMBER=$(echo "${{ github.ref }}" | sed 's|refs/heads/pull-request/||') + echo "Checking PR #$PR_NUMBER for lean-ci label..." + + # Check if the PR has the 'lean-ci' label + if gh pr view "$PR_NUMBER" --repo "${{ github.repository }}" --json labels --jq '.labels[].name' | grep -q "^lean-ci$"; then + echo "lean_ci_enabled=true" >> $GITHUB_OUTPUT + echo "⚠️ Lean CI is enabled (lean-ci label found)" + else + echo "lean_ci_enabled=false" >> $GITHUB_OUTPUT + echo "✅ Full CI is enabled" + fi + + prevent-merge-with-lean-ci: + runs-on: ubuntu-latest + needs: check-lean-ci + steps: + - name: Check lean-ci status + run: | + if [ "${{ needs.check-lean-ci.outputs.lean_ci_enabled }}" == "true" ]; then + echo "❌ ERROR: This PR has the 'lean-ci' label enabled." + echo "Lean CI is only for testing purposes and should not be merged." + echo "Please remove the 'lean-ci' label and run full CI before merging." + exit 1 + else + echo "✅ No lean-ci label found. PR can be merged." + exit 0 + fi + compute-matrix-filters: + needs: check-lean-ci + runs-on: ubuntu-latest + outputs: + conda_lean_filter: ${{ steps.set-filters.outputs.conda_lean_filter }} + conda_test_filter: ${{ steps.set-filters.outputs.conda_test_filter }} + wheel_lean_filter: ${{ steps.set-filters.outputs.wheel_lean_filter }} + mps_parser_filter: ${{ steps.set-filters.outputs.mps_parser_filter }} + libcuopt_filter: ${{ steps.set-filters.outputs.libcuopt_filter }} + cuopt_server_filter: ${{ steps.set-filters.outputs.cuopt_server_filter }} + cuopt_sh_client_filter: ${{ steps.set-filters.outputs.cuopt_sh_client_filter }} + steps: + - name: Set matrix filters + id: set-filters + run: | + if [ "${{ needs.check-lean-ci.outputs.lean_ci_enabled }}" == "true" ]; then + echo "conda_lean_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.10\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT + echo "conda_test_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.13\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT + echo "wheel_lean_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT + echo "mps_parser_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT + echo "libcuopt_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT + echo "cuopt_server_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT + echo "cuopt_sh_client_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT + else + echo "conda_lean_filter=." >> $GITHUB_OUTPUT + echo "conda_test_filter=." >> $GITHUB_OUTPUT + echo "wheel_lean_filter=." >> $GITHUB_OUTPUT + echo "mps_parser_filter=group_by([.ARCH, (.PY_VER |split(\".\") | map(tonumber))])|map(max_by([(.CUDA_VER|split(\".\")|map(tonumber))]))" >> $GITHUB_OUTPUT + echo "libcuopt_filter=group_by([.ARCH, (.CUDA_VER|split(\".\")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(\".\")|map(tonumber)))" >> $GITHUB_OUTPUT + echo "cuopt_server_filter=map(select(.ARCH == \"amd64\")) | group_by(.CUDA_VER|split(\".\")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(\".\")|map(tonumber)), (.CUDA_VER|split(\".\")|map(tonumber))]))" >> $GITHUB_OUTPUT + echo "cuopt_sh_client_filter=[map(select(.ARCH == \"amd64\")) | min_by((.PY_VER | split(\".\") | map(tonumber)), (.CUDA_VER | split(\".\") | map(-tonumber)))]" >> $GITHUB_OUTPUT + fi + changed-files: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@release/25.12 @@ -102,31 +176,33 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@release/25.12 with: enable_check_generated_files: false - conda-cpp-build: - needs: checks + needs: [checks, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@release/25.12 with: build_type: pull-request script: ci/build_cpp.sh + matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }} conda-cpp-tests: - needs: [conda-cpp-build, changed-files] + needs: [conda-cpp-build, changed-files, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@release/25.12 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request script: ci/test_cpp.sh + matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} conda-python-build: - needs: conda-cpp-build + needs: [conda-cpp-build, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@release/25.12 with: build_type: pull-request script: ci/build_python.sh + matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} conda-python-tests: - needs: [conda-python-build, changed-files] + needs: [conda-python-build, changed-files, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@release/25.12 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python @@ -134,6 +210,7 @@ jobs: run_codecov: false build_type: pull-request script: ci/test_python.sh + matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} docs-build: needs: conda-python-build secrets: inherit @@ -147,6 +224,7 @@ jobs: container_image: "rapidsai/ci-conda:25.12-latest" script: "ci/build_docs.sh" wheel-build-cuopt-mps-parser: + needs: compute-matrix-filters secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12 with: @@ -156,20 +234,20 @@ jobs: package-type: python append-cuda-suffix: false # need 1 build per Python version and arch (but CUDA version doesn't matter so choose the latest) - matrix_filter: 'group_by([.ARCH, (.PY_VER |split(".") | map(tonumber))])|map(max_by([(.CUDA_VER|split(".")|map(tonumber))]))' + matrix_filter: ${{ needs.compute-matrix-filters.outputs.mps_parser_filter }} wheel-build-libcuopt: - needs: wheel-build-cuopt-mps-parser + needs: [wheel-build-cuopt-mps-parser, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12 with: # build for every combination of arch and CUDA version, but only for the latest Python - matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + matrix_filter: ${{ needs.compute-matrix-filters.outputs.libcuopt_filter }} package-type: cpp package-name: libcuopt build_type: pull-request script: ci/build_wheel_libcuopt.sh wheel-build-cuopt: - needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] + needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12 with: @@ -177,16 +255,18 @@ jobs: script: ci/build_wheel_cuopt.sh package-name: cuopt package-type: python + matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} wheel-tests-cuopt: - needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files] + needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/25.12 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt with: build_type: pull-request script: ci/test_wheel_cuopt.sh + matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} wheel-build-cuopt-server: - needs: checks + needs: [checks, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12 with: @@ -196,8 +276,9 @@ jobs: package-type: python pure-wheel: true # Only need 1 package per CUDA major version. This selects "ARCH=amd64 + the latest supported Python, 1 job per major CUDA version". - matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_server_filter }} wheel-build-cuopt-sh-client: + needs: compute-matrix-filters secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/25.12 with: @@ -208,15 +289,16 @@ jobs: append-cuda-suffix: false pure-wheel: true # only need 1 build (noarch package): this selects amd64, oldest-supported Python, latest-supported CUDA - matrix_filter: '[map(select(.ARCH == "amd64")) | min_by((.PY_VER | split(".") | map(tonumber)), (.CUDA_VER | split(".") | map(-tonumber)))]' + matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_sh_client_filter }} wheel-tests-cuopt-server: - needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files] + needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files, compute-matrix-filters] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/25.12 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt_server with: build_type: pull-request script: ci/test_wheel_cuopt_server.sh + matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} test-self-hosted-server: needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files] secrets: inherit