diff --git a/.github/workflows/conventional-commits.yml b/.github/workflows/conventional-commits.yml index 801a12802..5a02cd1e3 100644 --- a/.github/workflows/conventional-commits.yml +++ b/.github/workflows/conventional-commits.yml @@ -66,6 +66,7 @@ jobs: blas math numeric + mixedprecision cmake ci docker diff --git a/CMakeLists.txt b/CMakeLists.txt index ce56924bc..d5718a859 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,6 +165,7 @@ option(UNIVERSAL_BUILD_MIXEDPRECISION_INTERPOLATE "Set to ON to build mixed-pr option(UNIVERSAL_BUILD_MIXEDPRECISION_OPTIMIZE "Set to ON to build mixed-precision optimization" OFF) option(UNIVERSAL_BUILD_MIXEDPRECISION_TENSOR "Set to ON to build mixed-precision tensor algebra" OFF) option(UNIVERSAL_BUILD_MIXEDPRECISION_METHODOLOGY "Set to ON to build mixed-precision methodology" OFF) +option(UNIVERSAL_BUILD_MIXEDPRECISION_POP "Set to ON to build POP precision tuning" OFF) # type hierachy, conversions, casting, coercions, and juggling option(UNIVERSAL_BUILD_TYPE_HIERARCHY "Set to ON to build type hierarchy tests" OFF) @@ -894,6 +895,7 @@ if(UNIVERSAL_BUILD_MIXEDPRECISION_SDK) set(UNIVERSAL_BUILD_MIXEDPRECISION_OPTIMIZE ON) set(UNIVERSAL_BUILD_MIXEDPRECISION_TENSOR ON) set(UNIVERSAL_BUILD_MIXEDPRECISION_METHODOLOGY ON) + set(UNIVERSAL_BUILD_MIXEDPRECISION_POP ON) endif(UNIVERSAL_BUILD_MIXEDPRECISION_SDK) ################################################################## @@ -1142,6 +1144,10 @@ if(UNIVERSAL_BUILD_MIXEDPRECISION_METHODOLOGY) add_subdirectory("mixedprecision/methodology") endif(UNIVERSAL_BUILD_MIXEDPRECISION_METHODOLOGY) +if(UNIVERSAL_BUILD_MIXEDPRECISION_POP) +add_subdirectory("mixedprecision/pop") +endif(UNIVERSAL_BUILD_MIXEDPRECISION_POP) + ################################################################## ### benchmark environment diff --git a/docs/design/pop-developer-guide.md b/docs/design/pop-developer-guide.md new file mode 100644 index 000000000..5edc1ff0c --- /dev/null +++ b/docs/design/pop-developer-guide.md @@ -0,0 +1,349 @@ +# POP Precision Tuning — Developer Guide + +This guide shows how to use the POP (Precision-Optimized Programs) facility in the Universal Numbers Library to determine the minimum number of bits each variable and intermediate in a numerical program needs to meet a specified accuracy target. + +## Quick Start + +Include the umbrella header and use the `sw::universal` namespace: + +```cpp +#include +using namespace sw::universal; +``` + +Build with the POP test suite enabled: + +```bash +mkdir build && cd build +cmake -DUNIVERSAL_BUILD_MIXEDPRECISION_POP=ON .. +make -j4 +ctest -R pop +``` + +## Step-by-Step Workflow + +### Step 1: Build an Expression Graph + +An `ExprGraph` is a directed acyclic graph (DAG) representing your computation. Each node is either a leaf (constant or variable with a known value range) or an arithmetic operation. + +```cpp +ExprGraph g; + +// Leaf nodes: variables with observed value ranges [lo, hi] +int a = g.variable("a", 1.0, 10.0); +int b = g.variable("b", 0.5, 5.0); + +// Constants +int k = g.constant(3.14159, "pi"); + +// Arithmetic operations return node IDs +int ab = g.mul(a, b); // a * b +int sum = g.add(ab, k); // a*b + pi +int diff = g.sub(a, b); // a - b +int ratio = g.div(a, b); // a / b + +// Unary operations +int neg_a = g.neg(a); // -a +int abs_b = g.abs(b); // |b| +int root = g.sqrt(ab); // sqrt(a*b) +``` + +Each node automatically computes its value range and UFP (unit in the first place) from its inputs using conservative interval arithmetic. + +### Step 2: Set Accuracy Requirements + +Specify how many significant bits (NSB) the output(s) must have: + +```cpp +g.require_nsb(sum, 24); // require 24 significant bits at output +``` + +You can set requirements on multiple nodes. POP propagates backwards from all requirements simultaneously. + +### Step 3: Analyze — Choose Your Solver + +There are two analysis modes: + +**Option A: Iterative Fixpoint (no LP solver, simpler, always available)** + +```cpp +g.analyze(); +``` + +This runs forward and backward transfer function passes until convergence. Good for straight-line code and single-output programs. + +**Option B: LP-Based Optimal Solver (recommended)** + +```cpp +PopSolver solver; +bool ok = solver.solve(g); +if (ok) { + solver.report(std::cout, g); +} +``` + +This formulates the precision constraints as a Linear Program and finds the globally optimal (minimum total bits) assignment. Better for programs with multiple outputs and competing requirements. + +### Step 4: Refine Carry Bits (Optional, 10-30% Reduction) + +The default carry bit (`carry=1`) is conservative. `CarryAnalyzer` uses policy iteration to identify operations where `carry=0` is safe, reducing total bits: + +```cpp +CarryAnalyzer ca; +int iters = ca.refine(g); // alternates: solve LP -> update carries -> repeat +ca.report(std::cout, g); +``` + +### Step 5: Read Results + +After analysis, query the precision assignment at each node: + +```cpp +int nsb = g.get_nsb(node_id); // significant bits assigned +const ExprNode& node = g.get_node(node_id); // full node details +``` + +Get a type recommendation using `TypeAdvisor`: + +```cpp +TypeAdvisor advisor; +std::string type_name = g.recommended_type(node_id, advisor); +// e.g. "half (cfloat<16,5>)", "single (cfloat<32,8>)" +``` + +Print a full report: + +```cpp +g.report(std::cout); // basic: ID, name, op, ufp, fwd, bwd, final, req +g.report(std::cout, advisor); // with type recommendations +``` + +### Step 6: Generate Code (Optional) + +`PopCodeGenerator` emits a C++ header with type aliases for each variable: + +```cpp +PopCodeGenerator gen(g, advisor); + +// Type alias header +std::string header = gen.generateHeader("MY_PRECISION_CONFIG_HPP"); +// #pragma once +// using type_a = sw::universal::cfloat<16,5>; +// using type_b = sw::universal::cfloat<16,5>; +// ... + +// Analysis report as comment block +std::string report = gen.generateReport(); +// /* POP Precision Analysis Report +// * a nsb=17 -> half (cfloat<16,5>) +// * ... +// * Bit savings: 42.3% +// */ + +// Example function using the type aliases +std::string code = gen.generateExampleCode("compute"); +``` + +## Integration with `range_analyzer` + +If you already use `range_analyzer` to profile your program (Step 2 of the mixed-precision methodology), you can feed its results directly into the expression graph: + +```cpp +range_analyzer ra_x, ra_y; + +// ... observe values during profiling ... +for (double val : x_samples) { + ra_x.observe(val); +} +for (double val : y_samples) { + ra_y.observe(val); +} + +ExprGraph g; +int x = g.variable("x", ra_x); // extracts lo, hi, ufp from analyzer +int y = g.variable("y", ra_y); +``` + +The `range_analyzer` provides the dynamic range data that POP's static analysis needs for the UFP values at each control point. + +## Complete Example: Determinant + +A 2x2 matrix determinant `det = a*d - b*c` with cancellation: + +```cpp +#include +#include + +int main() { + using namespace sw::universal; + + ExprGraph g; + + // Matrix elements with known ranges + int a = g.variable("a", 1.0, 10.0); + int b = g.variable("b", 1.0, 10.0); + int c = g.variable("c", 1.0, 10.0); + int d = g.variable("d", 1.0, 10.0); + + // det = a*d - b*c + int ad = g.mul(a, d); + int bc = g.mul(b, c); + int det = g.sub(ad, bc); + + // Require 20 significant bits at the determinant + g.require_nsb(det, 20); + + // Solve with LP + PopSolver solver; + solver.solve(g); + + // Refine carries + CarryAnalyzer ca; + ca.refine(g); + + // Report with type recommendations + TypeAdvisor advisor; + g.report(std::cout, advisor); + + // Note: a,b,c,d will need MORE than 20 bits because + // subtraction can cancel leading bits (ufp shift). + // POP automatically accounts for this. + + return 0; +} +``` + +## Complete Example: Dot Product with Profiling + +```cpp +#include +#include +#include + +int main() { + using namespace sw::universal; + + // Step 1: Profile the computation + range_analyzer ra_a, ra_b, ra_prod; + + double a[] = { 1.5, -2.3, 0.7, 3.1 }; + double b[] = { 4.2, 1.8, 5.5, -0.9 }; + for (int i = 0; i < 4; ++i) { + ra_a.observe(a[i]); + ra_b.observe(b[i]); + ra_prod.observe(a[i] * b[i]); + } + + // Step 2: Build expression graph from profiled ranges + ExprGraph g; + int va = g.variable("a", ra_a); + int vb = g.variable("b", ra_b); + int accum = g.mul(va, vb); + + for (int i = 1; i < 4; ++i) { + int ai = g.variable("a" + std::to_string(i), ra_a); + int bi = g.variable("b" + std::to_string(i), ra_b); + int pi = g.mul(ai, bi); + accum = g.add(accum, pi); + } + + g.require_nsb(accum, 16); + + // Step 3: Optimize + PopSolver solver; + solver.solve(g); + + CarryAnalyzer ca; + ca.refine(g); + + // Step 4: Generate mixed-precision code + TypeAdvisor advisor; + PopCodeGenerator gen(g, advisor); + std::cout << gen.generateReport(); + std::cout << gen.generateHeader(); + + return 0; +} +``` + +## Architecture Overview + +### Headers + +| Header | Purpose | +|--------|---------| +| `` | Umbrella header — includes everything below | +| `` | Forward and backward transfer functions (`constexpr`) | +| `` | UFP computation, bridge to `range_analyzer` | +| `` | `ExprGraph` DAG builder and iterative fixpoint analysis | +| `` | Embedded header-only Big-M simplex LP solver | +| `` | `PopSolver`: translates graph to LP constraints and solves | +| `` | Optional GLPK ILP binding (when `UNIVERSAL_HAS_GLPK` defined) | +| `` | `CarryAnalyzer`: policy iteration for carry-bit refinement | +| `` | `PopCodeGenerator`: emits C++ type alias headers | + +### Key Types + +| Type | Description | +|------|-------------| +| `precision_info` | `{int ufp, int nsb}` pair with `lsb()` method | +| `ExprNode` | DAG node: op, range, precision fields, carry, consumers | +| `OpKind` | Enum: `Constant`, `Variable`, `Add`, `Sub`, `Mul`, `Div`, `Neg`, `Abs`, `Sqrt` | +| `ExprGraph` | DAG builder with `variable()`, `constant()`, arithmetic ops, `analyze()`, `report()` | +| `PopSolver` | LP-based solver: `solve(graph)`, `total_nsb()`, `report()` | +| `CarryAnalyzer` | Carry refinement: `refine(graph)`, `iterations()`, `report()` | +| `PopCodeGenerator` | Code gen: `generateHeader()`, `generateReport()`, `generateExampleCode()` | +| `SimplexSolver` | Embedded LP solver: `set_num_vars()`, `add_ge_constraint()`, `solve()` | +| `LPStatus` | Enum: `Optimal`, `Infeasible`, `Unbounded`, `MaxIterations` | + +### Transfer Functions + +The `constexpr` transfer functions in `transfer.hpp` can also be used standalone for manual precision reasoning: + +```cpp +// Forward: given input precisions, what is the output precision? +precision_info x{3, 24}; // ufp=3, nsb=24 +precision_info y{1, 16}; // ufp=1, nsb=16 +auto z = forward_add(x, y, /*ufp_z=*/3, /*carry=*/1); +// z.nsb tells you the output precision + +// Backward: given required output precision, what do inputs need? +int nsb_x_needed = backward_add_lhs(/*nsb_z=*/20, /*ufp_z=*/3, /*ufp_x=*/3, /*carry=*/1); +int nsb_y_needed = backward_add_rhs(/*nsb_z=*/20, /*ufp_z=*/3, /*ufp_y=*/1, /*carry=*/1); +``` + +### Using GLPK for Larger Problems + +For problems with more than ~100 variables, the embedded simplex solver may be slow. Link against GLPK for true integer solutions and better performance: + +```bash +cmake -DUNIVERSAL_BUILD_MIXEDPRECISION_POP=ON -DUNIVERSAL_USE_GLPK=ON .. +``` + +The `PopSolver` automatically uses `GlpkSolver` when `UNIVERSAL_HAS_GLPK` is defined; no code changes are needed. + +## Concepts + +### UFP (Unit in the First Place) + +The UFP of a value `x` is `floor(log2(|x|))`, the exponent of the most significant bit. For a range `[lo, hi]`, the UFP is computed from the maximum absolute value. UFP determines how many bits of the representation are used for magnitude vs. precision. + +### NSB (Number of Significant Bits) + +The NSB of a value is `-log2(relative_error)`, measuring how many bits carry useful information. A 32-bit IEEE float has 23-24 significant bits; a 64-bit double has 52-53. + +### Transfer Functions + +Transfer functions describe how precision propagates through arithmetic. For addition `z = x + y`: +- **Forward**: `nsb(z) = ufp(z) - min(lsb(x), lsb(y)) + 1 + carry` +- **Backward**: `nsb(x) >= nsb(z) + ufp(z) - ufp(x) + carry` + +Key insight: subtraction of nearly equal values (cancellation) causes `ufp(z) << ufp(x)`, demanding much higher input precision — POP automatically detects and handles this. + +### Carry Bits + +The carry bit is 0 or 1, accounting for potential rounding error propagation at each operation. The conservative default is `carry=1`. Policy iteration can prove `carry=0` when the operand error cannot affect the result, reducing total bits by 10-30%. + +## Reference + +Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," PhD thesis, Université de Perpignan, 2021. Available at https://theses.hal.science/tel-03509266. diff --git a/docs/design/pop-precision-tuning.md b/docs/design/pop-precision-tuning.md index ae6baeb9e..5e847d1b1 100644 --- a/docs/design/pop-precision-tuning.md +++ b/docs/design/pop-precision-tuning.md @@ -1,10 +1,12 @@ -# POP Precision Tuning Integration Design +# POP Precision Tuning — Design and Implementation ## Background POP (Precision-Optimized Programs) is an automated static analysis tool for bit-level precision tuning of numerical programs, developed at the University of Perpignan (Ben Khalifa, Martel, Adje, 2021). Given a program and user-specified accuracy requirements (e.g., "variable y needs 20 significant bits"), POP determines the minimum number of bits each variable and intermediate result needs, at every control point, to meet that accuracy target. -Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," PhD thesis, Universite de Perpignan, 2021 (https://theses.hal.science/tel-03509266). +Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," PhD thesis, Université de Perpignan, 2021 (https://theses.hal.science/tel-03509266). + +See also: [POP Developer Guide](pop-developer-guide.md) for usage examples and API reference. ## Problem Statement @@ -14,79 +16,75 @@ Mixed-precision programming is a manual, error-prone process. Developers must de - Trial-and-error tools (Precimonious, CRAFT) are slow and unsound - Existing static tools (FPTuner, Rosa) handle only straight-line code -Universal provides 30+ configurable number types but no automated guidance on which type to use where. POP's static analysis can fill this gap. +Universal provides 30+ configurable number types but no automated guidance on which type to use where. POP's static analysis fills this gap. ## POP Architecture -POP has two solver backends: +POP has two solver backends in the original thesis: - **POP(SMT)**: Encodes forward + backward error analysis as propositional logic constraints solved by Z3 with binary search on a cost function. - **POP(ILP)**: Reformulates the problem as an Integer Linear Program solved in one pass by GLPK, with optional Policy Iteration for tighter carry-bit propagation. 10-100x faster than SMT. +Universal implements the ILP approach with an embedded simplex solver (header-only, no external dependencies) and an optional GLPK binding for larger problems. + ### Core Quantities At each control point in the program, POP tracks two integer quantities: -- `ufp(x)` -- unit in the first place (weight of the most significant bit) -- `nsb(x)` -- number of significant bits +- `ufp(x)` -- unit in the first place (weight of the most significant bit): `floor(log2(|x|))` +- `nsb(x)` -- number of significant bits: `-log2(relative_error)` These are sufficient to determine the minimum precision needed for any arithmetic representation. ### Analysis Phases -1. **Range determination** (dynamic): Run the program on representative inputs to determine value ranges (ufp) at each control point. -2. **Forward analysis**: Compute how errors grow through each operation. For `z = x + y`, the forward transfer function yields `nsb_F(z)` from `nsb(x)`, `nsb(y)`, and the ufp values. +1. **Range determination** (dynamic): Run the program on representative inputs to determine value ranges (ufp) at each control point. In Universal, this is done via `range_analyzer`. +2. **Forward analysis**: Compute how errors grow through each operation using transfer functions. 3. **Backward analysis**: Starting from the user's accuracy requirement on the output, propagate backwards to determine the minimum precision at each input and intermediate. -4. **Constraint generation**: Express the transfer functions as integer linear constraints. +4. **Constraint generation**: Express the transfer functions as linear constraints. 5. **Solving**: Find the optimal (minimum total bits) assignment satisfying all constraints. -6. **Type mapping**: Convert `{ufp, nsb}` pairs to concrete data types. +6. **Type mapping**: Convert `{ufp, nsb}` pairs to concrete Universal types via `TypeAdvisor`. ### Transfer Functions -The forward transfer functions for the four elementary operations (from Chapter 4 of the thesis): +Implemented in `include/sw/universal/mixedprecision/transfer.hpp` as `constexpr` functions. -**Addition** `z = x + y`: -```text -nsb_F(z) >= min(nsb(x) + ufp(x), nsb(y) + ufp(y)) - ufp(z) - carry(+) -``` +**Forward functions** — given input precisions, compute output precision: -**Subtraction** `z = x - y`: -```text -nsb_F(z) >= min(nsb(x) + ufp(x), nsb(y) + ufp(y)) - ufp(z) - carry(-) -``` +| Operation | Forward Transfer | +|-----------|-----------------| +| `z = x + y` | `nsb(z) = ufp(z) - min(lsb(x), lsb(y)) + 1 + carry` | +| `z = x - y` | Same as addition | +| `z = x * y` | `nsb(z) = nsb(x) + nsb(y) + carry` | +| `z = x / y` | `nsb(z) = nsb(x) + nsb(y) + carry` | +| `z = -x` | `nsb(z) = nsb(x)` | +| `z = abs(x)` | `nsb(z) = nsb(x)` | +| `z = sqrt(x)` | `nsb(z) = nsb(x) + carry` | -**Multiplication** `z = x * y`: -```text -nsb_F(z) >= min(nsb(x), nsb(y)) - carry(*) -``` +**Backward functions** — given required output precision, compute required input precision: -**Division** `z = x / y`: -```text -nsb_F(z) >= min(nsb(x), nsb(y)) - carry(/) -``` +| Operation | Backward Transfer | +|-----------|------------------| +| `z = x + y` | `nsb(x) >= nsb(z) + ufp(z) - ufp(x) + carry` | +| `z = x * y` | `nsb(x) >= nsb(z) + carry` | +| `z = x / y` | `nsb(x) >= nsb(z) + carry` | +| `z = -x` | `nsb(x) = nsb(z)` | +| `z = sqrt(x)` | `nsb(x) >= nsb(z) + carry` | -The carry bit function is a key contribution: previous methods assumed carry = 1 at every operation. POP uses a refined function that adds carry only when the error from the previous computation can actually propagate, reducing over-approximation significantly. +The carry bit function is a key contribution: previous methods assumed `carry = 1` at every operation. POP uses a refined function that adds carry only when the error from the previous computation can actually propagate, reducing over-approximation significantly. -The backward transfer functions propagate accuracy requirements from outputs to inputs: +### LP Formulation -**Addition** backward: Given `nsb(z)` required, compute minimum `nsb(x)` and `nsb(y)`: -```text -nsb_B(x) >= nsb(z) + ufp(z) - ufp(x) + carry(+) -nsb_B(y) >= nsb(z) + ufp(z) - ufp(y) + carry(+) -``` - -### ILP Formulation - -The ILP-based method (POP's most efficient solver) generates a linear number of integer constraints and variables proportional to program size. The objective function minimizes total bits: +The LP-based method generates a linear number of constraints and variables proportional to program size: ```text -minimize: sum over all control points l of nsb(l) +minimize: sum over all nodes i of nsb(i) subject to: transfer function constraints for each operation - nsb(l) >= nsb_B(l) for all l (backward requirements) - nsb(l) >= 0 for all l + nsb(i) >= nsb_required(i) for output nodes + nsb(i) >= 1 for all nodes ``` -The exact ILP is NP-hard, but a relaxed LP (dropping integrality constraints) can be solved in polynomial time by solvers like GLPK. Exact solutions require an ILP/MIP solver and may only be tractable for small instances; larger programs can use LP relaxation with rounding or other heuristic methods. +The LP relaxation (dropping integrality constraints) is solved in polynomial time by the embedded simplex solver. Integer solutions are obtained by ceiling the LP result. For true ILP solutions, use the optional GLPK binding. ### Key Results from the Thesis @@ -95,190 +93,242 @@ The exact ILP is NP-hard, but a relaxed LP (dropping integrality constraints) ca - Sound static guarantees (no trial-and-error) - Benchmarks: rotation matrices, matrix determinants, Simpson integration, PID controllers, N-body problem, pedometer/IoT algorithms, Newton-Raphson, Runge-Kutta, odometry -## Integration with Universal - -### Mapping POP Output to Universal Types - -POP produces bit-level precision recommendations. Universal provides the types to realize them: - -| POP output (nsb) | Universal type | -|-------------------|----------------| -| 3-4 bits | `microfloat<4,*>` (e2m1, e3m0) | -| 5-8 bits | `microfloat<8,*>` (e4m3, e5m2), `cfloat<8,*>` | -| 10-11 bits | `cfloat<16,5>` (IEEE FP16) | -| 7 bits | `bfloat16` | -| 23-24 bits | `cfloat<32,8>` (IEEE FP32) | -| 52-53 bits | `cfloat<64,11>` (IEEE FP64) | -| 31 digits | `dd` (double-double) | -| N-bit fixed | `fixpnt` | -| Decimal fixed | `dfixpnt` | -| Tapered | `posit` | +## Implementation -This mapping is a unique advantage: POP can recommend arbitrary bit widths, and Universal can actually instantiate them as concrete template types. +### Header Structure -### Proposed Components +All POP headers are in `include/sw/universal/mixedprecision/` and use namespace `sw::universal`: -#### 1. Transfer Function Library +| Header | Phase | Purpose | +|--------|-------|---------| +| `ufp.hpp` | 1 | UFP computation, bridge to `range_analyzer` | +| `transfer.hpp` | 1 | Forward and backward transfer functions (`constexpr`) | +| `expression_graph.hpp` | 2 | `ExprGraph` DAG builder and iterative fixpoint analysis | +| `simplex.hpp` | 3 | Embedded header-only Big-M simplex LP solver | +| `pop_solver.hpp` | 3 | `PopSolver`: translates graph to LP constraints and solves | +| `glpk_solver.hpp` | 3 | Optional GLPK ILP binding (`#ifdef UNIVERSAL_HAS_GLPK`) | +| `carry_analysis.hpp` | 4 | `CarryAnalyzer`: policy iteration for carry-bit refinement | +| `codegen.hpp` | 5 | `PopCodeGenerator`: emits C++ type alias headers | +| `pop.hpp` | — | Umbrella header including all of the above | -A header-only library implementing POP's transfer functions as constexpr integer arithmetic: +### Key Types ```cpp -// include/sw/universal/mixedprecision/transfer.hpp - -namespace sw::universal::mixedprecision { +namespace sw { namespace universal { +// Precision descriptor (transfer.hpp) struct precision_info { - int ufp; // unit in the first place (MSB weight) - int nsb; // number of significant bits + int ufp; // floor(log2(|x|)) + int nsb; // number of significant bits + constexpr int lsb() const; // ufp - nsb + 1 }; -// Forward transfer functions -constexpr precision_info forward_add(precision_info x, precision_info y, int carry = 1); -constexpr precision_info forward_sub(precision_info x, precision_info y, int carry = 1); -constexpr precision_info forward_mul(precision_info x, precision_info y, int carry = 1); -constexpr precision_info forward_div(precision_info x, precision_info y, int carry = 1); +// Operation kinds (expression_graph.hpp) +enum class OpKind : uint8_t { + Constant, Variable, Add, Sub, Mul, Div, Neg, Abs, Sqrt +}; -// Backward transfer functions -constexpr int backward_add_lhs(int nsb_z, int ufp_z, int ufp_x, int carry = 1); -constexpr int backward_add_rhs(int nsb_z, int ufp_z, int ufp_y, int carry = 1); -constexpr int backward_mul_lhs(int nsb_z, int carry = 1); -constexpr int backward_mul_rhs(int nsb_z, int carry = 1); +// Expression graph node (expression_graph.hpp) +struct ExprNode { + OpKind op; + int id; + std::string name; + int lhs{-1}, rhs{-1}; // input edges (-1 = none) + double lo{0.0}, hi{0.0}; // value range + int ufp{0}; // floor(log2(max_abs)) + int nsb_forward{0}; // from forward pass + int nsb_backward{0}; // from backward pass + int nsb_final{0}; // result: max(forward, backward) + int carry{1}; // carry bit (1=conservative, 0=refined) + int nsb_required{-1}; // user requirement (-1=none) + std::vector consumers; // downstream nodes +}; -} // namespace sw::universal::mixedprecision +// LP solver status (simplex.hpp) +enum class LPStatus { Optimal, Infeasible, Unbounded, MaxIterations }; + +}} // namespace sw::universal ``` -#### 2. Expression Graph +### `ExprGraph` — DAG Builder and Fixpoint Analysis -A lightweight DAG representation for numerical computations: +The central class. Builds an expression DAG and runs iterative fixpoint analysis: ```cpp -// include/sw/universal/mixedprecision/expression_graph.hpp - -namespace sw::universal::mixedprecision { - -enum class OpKind { Constant, Variable, Add, Sub, Mul, Div, Sqrt, Sin, Cos }; - -struct ExprNode { - OpKind op; - int label; // control point ID - precision_info range; // ufp from range analysis - int nsb_forward; // computed by forward pass - int nsb_backward; // computed by backward pass - int nsb_final; // max(nsb_forward, nsb_backward) -}; - class ExprGraph { - // Build expression trees - int constant(double value, int precision); + // Construction: returns node ID + int constant(double value, const std::string& name = ""); int variable(const std::string& name, double lo, double hi); + template int variable(const std::string& name, const range_analyzer& ra); int add(int lhs, int rhs); + int sub(int lhs, int rhs); int mul(int lhs, int rhs); - // ... + int div(int lhs, int rhs); + int neg(int operand); + int abs(int operand); + int sqrt(int operand); + + // Requirements + void require_nsb(int node_id, int nsb); + + // Analysis (iterative fixpoint — no LP solver needed) + void analyze(int max_iterations = 20); + + // Results + int get_nsb(int node_id) const; + const ExprNode& get_node(int node_id) const; + int size() const; + std::string recommended_type(int node_id, const TypeAdvisor& = TypeAdvisor()) const; + void report(std::ostream&) const; + void report(std::ostream&, const TypeAdvisor&) const; + + // Access for LP solver / carry analysis + std::vector& nodes(); + const std::vector& nodes() const; +}; +``` - // Set accuracy requirement on a node - void require_nsb(int node, int nsb); +Range estimation uses conservative interval arithmetic. The `variable()` overload accepting `range_analyzer` extracts `lo`, `hi`, and `ufp` from the analyzer's observed values. - // Run the analysis - void analyze(); +### `PopSolver` — LP-Based Optimization - // Query results - int get_nsb(int node) const; +Translates an `ExprGraph` into LP constraints and solves for the globally optimal bit assignment: - // Map to Universal types - std::string recommended_type(int node) const; +```cpp +class PopSolver { + bool solve(ExprGraph& graph); // generate constraints + solve + write nsb_final + double total_nsb() const; // total bits across all nodes + LPStatus status() const; // solver status + void report(std::ostream&, const ExprGraph&) const; }; - -} // namespace sw::universal::mixedprecision ``` -#### 3. ILP Solver Interface +The solver automatically selects the backend based on compile-time configuration: +- Default: `SimplexSolver` (embedded, header-only, no dependencies) +- With `-DUNIVERSAL_HAS_GLPK`: `GlpkSolver` (true ILP via GLPK) -A thin binding to GLPK (GNU LGPL) or HiGHS (MIT) for solving the constraint system: +### `CarryAnalyzer` — Policy Iteration + +Refines carry bits via alternating LP solves and carry recomputation: ```cpp -// include/sw/universal/mixedprecision/ilp_solver.hpp +class CarryAnalyzer { + int refine(ExprGraph& graph, int max_iterations = 10); + int iterations() const; + void report(std::ostream&, const ExprGraph&) const; +}; +``` -namespace sw::universal::mixedprecision { +The carry refinement criterion: `carry(z = x op y) = 0` when `lsb(operand_error) > ufp(z)`, meaning the operand's rounding error cannot affect the result. -class ILPSolver { - int add_variable(const std::string& name, int lo, int hi); - void add_constraint(/* linear constraint */); - void set_objective(/* minimize sum of nsb variables */); - bool solve(); - int get_value(int var_id) const; -}; +### `PopCodeGenerator` — Mixed-Precision Code Emission + +Generates C++ code using Universal types based on POP results: -} // namespace sw::universal::mixedprecision +```cpp +class PopCodeGenerator { + explicit PopCodeGenerator(const ExprGraph& graph, const TypeAdvisor& = TypeAdvisor()); + std::string generateHeader(const std::string& guard = "POP_PRECISION_CONFIG_HPP") const; + std::string generateReport() const; + std::string generateExampleCode(const std::string& function_name = "compute") const; +}; ``` -#### 4. Type Recommender +### `SimplexSolver` — Embedded LP Solver -Maps `{ufp, nsb}` pairs to the smallest Universal type: +A header-only Big-M simplex solver for small LPs (< 100 variables): ```cpp -// include/sw/universal/mixedprecision/type_recommender.hpp +class SimplexSolver { + void set_num_vars(int n); + void set_objective(const std::vector& coeffs); // minimize + void add_ge_constraint(const std::vector& coeffs, double rhs); + void add_le_constraint(const std::vector& coeffs, double rhs); + void add_eq_constraint(const std::vector& coeffs, double rhs); + LPStatus solve(int max_iterations = 10000); + double get_value(int var) const; + double objective_value() const; + LPStatus status() const; +}; +``` -namespace sw::universal::mixedprecision { +## Integration with Universal Infrastructure -enum class TypeFamily { cfloat, posit, fixpnt, lns, dd, qd }; +### Mapping POP Output to Universal Types -struct TypeRecommendation { - TypeFamily family; - unsigned nbits; - unsigned param2; // es for posit/cfloat, frac bits for fixpnt - std::string type_name; // e.g. "cfloat<16,5>" -}; +POP produces bit-level precision recommendations. Universal provides the types to realize them: -TypeRecommendation recommend( - precision_info p, - TypeFamily preferred = TypeFamily::cfloat, - bool allow_nonstandard_widths = true -); +| POP output (nsb) | Universal type | +|-------------------|----------------| +| 3-4 bits | `microfloat<4,*>` (e2m1, e3m0) | +| 5-8 bits | `microfloat<8,*>` (e4m3, e5m2), `cfloat<8,*>` | +| 10-11 bits | `cfloat<16,5>` (IEEE FP16) | +| 7 bits | `bfloat16` | +| 23-24 bits | `cfloat<32,8>` (IEEE FP32) | +| 52-53 bits | `cfloat<64,11>` (IEEE FP64) | +| 31 digits | `dd` (double-double) | +| N-bit fixed | `fixpnt` | +| Decimal fixed | `dfixpnt` | +| Tapered | `posit` | -} // namespace sw::universal::mixedprecision -``` +This mapping is a unique advantage: POP can recommend arbitrary bit widths, and Universal can actually instantiate them as concrete template types. The `TypeAdvisor::recommendForNsb()` method performs this mapping automatically. ### Connection to Existing Infrastructure -- **Error tracker** (`docs/design/error-tracker.md`): POP's forward analysis is a formalized version of error tracking. The two can share transfer function implementations, with the error tracker providing runtime validation of POP's static bounds. -- **Block formats** (`mxfloat`, `nvblock`): POP's neural network perspective (thesis Section 10.3.2) maps to block-scaled quantization. The ILP formulation can optimize block scale selection alongside element precision. -- **`fixpnt` / `dfixpnt`**: The thesis identifies fixed-point code synthesis as a near-term goal. Universal's fixed-point types are ready targets for POP's bit-level recommendations. -- **`posit`**: The thesis explicitly mentions posit integration as future work ("we plan to integrate POSIT libraries in the near future"). Universal is the reference posit implementation. +| Component | Integration | +|-----------|-------------| +| `range_analyzer` | `ExprGraph::variable(name, ra)` extracts `lo`, `hi`, `ufp` from profiled ranges. `range_analyzer::ufp()` alias added. | +| `TypeAdvisor` | `TypeAdvisor::recommendForNsb(nsb, lo, hi)` maps NSB to concrete types. Used by `ExprGraph::recommended_type()` and `PopCodeGenerator`. | +| `PrecisionConfigGenerator` | `PopCodeGenerator::generateHeader()` follows the same output format. | +| Error tracker | POP's forward analysis is a formalized version of error tracking. The error tracker can validate POP's static bounds at runtime. | +| Block formats (`mxfloat`, `nvblock`) | POP's neural network perspective (thesis Section 10.3.2) maps to block-scaled quantization. | +| `fixpnt` / `dfixpnt` | The thesis identifies fixed-point code synthesis as a near-term goal. Universal's fixed-point types are ready targets. | +| `posit` | The thesis explicitly mentions posit integration as future work. Universal is the reference posit implementation. | -### Implementation Phases +### Modifications to Existing Headers -#### Phase 1: Transfer Functions (Small) +| File | Change | +|------|--------| +| `include/sw/universal/utility/range_analyzer.hpp` | Added `int ufp() const { return maxScale(); }` convenience alias | +| `include/sw/universal/utility/type_advisor.hpp` | Added `TypeRecommendation recommendForNsb(int nsb, double lo, double hi) const` method | -Implement the 10 forward/backward equations from Chapter 4 as constexpr functions. No external dependencies. Add unit tests validating against the thesis examples (rotation matrix, determinant). +## Build Configuration -#### Phase 2: Expression Graph + Backward Analysis (Medium) +### CMake Options -Build the DAG representation. Implement backward propagation without an LP solver (use a simple iterative fixpoint on the transfer functions). This alone can provide useful precision recommendations for straight-line code. +```bash +# Build POP tests +cmake -DUNIVERSAL_BUILD_MIXEDPRECISION_POP=ON .. -#### Phase 3: ILP Solver Binding (Medium) +# Build with GLPK for ILP solving +cmake -DUNIVERSAL_BUILD_MIXEDPRECISION_POP=ON -DUNIVERSAL_USE_GLPK=ON .. -Interface to GLPK or HiGHS. Generate constraints from the expression graph. Solve for optimal bit assignments. This enables handling of programs with multiple outputs and competing precision requirements. +# POP is also included in the SDK cascade +cmake -DUNIVERSAL_BUILD_SDK=ON .. +``` -#### Phase 4: Policy Iteration for Carry Bits (Medium) +### Test Suite -Implement the refined carry-bit optimization from Chapter 5. Requires iterating between the LP solver and carry-bit updates until convergence. Produces tighter (lower) precision assignments. +8 test executables in `mixedprecision/pop/`: -#### Phase 5: Code Generator (Large) +| Test | Validates | +|------|-----------| +| `test_transfer` | Forward/backward transfer functions (constexpr) | +| `test_ufp` | UFP computation and `range_analyzer` bridge | +| `test_expression_graph` | DAG construction, fixpoint analysis, type recommendations | +| `test_simplex` | Embedded LP solver correctness | +| `test_pop_solver` | End-to-end LP-based optimization | +| `test_carry_analysis` | Carry refinement convergence | +| `test_codegen` | Code generation output | +| `test_complete_workflow` | End-to-end: profile -> graph -> optimize -> codegen | -Emit mixed-precision C++ code using Universal types. Takes the expression graph + precision map and generates template-parameterized code: +Run all tests: -```cpp -// Input: z = x * y + w -// POP output: x needs 24 bits, y needs 16 bits, w needs 11 bits, z needs 24 bits -// Generated: -cfloat<32,8> x = ...; -cfloat<16,5> y = ...; -cfloat<16,5> w = ...; -cfloat<32,8> z = static_cast>(x) * static_cast>(y) - + static_cast>(w); +```bash +ctest -R pop ``` -### Unique Value Proposition +## Unique Value Proposition What makes Universal + POP compelling vs. POP standalone: @@ -288,10 +338,10 @@ What makes Universal + POP compelling vs. POP standalone: 4. **Fixed-point synthesis**: The thesis identifies this as a short-term goal. Universal's `fixpnt` and `dfixpnt` are ready. 5. **Block formats**: `mxfloat`/`nvblock` enable the neural network quantization perspective from the thesis. -### Open Questions +## Future Work -1. **Range analysis**: POP uses dynamic range analysis (running the program on representative inputs). Should we also support static range analysis via abstract interpretation, or is dynamic sufficient for the SDK? -2. **Loop handling**: POP handles loops via policy iteration on the constraint system. For the initial integration, should we restrict to straight-line code or implement the full loop analysis? -3. **LP solver dependency**: GLPK is GNU LGPL which may conflict with Universal's MIT license for a header-only library. HiGHS (MIT license) is an alternative. Or we could implement a simple LP solver for the small constraint systems POP generates. -4. **Granularity**: POP tunes at the variable level. Should we also support expression-level tuning (different precision for each subexpression)? -5. **Integration with LLVM/Clang**: Several tools in the thesis ecosystem (TAFFO, Precimonious) work at the LLVM IR level. Should we target source-level C++ or also support IR-level analysis? +1. **Loop handling**: Extend policy iteration to handle loops via widening/narrowing in the constraint system. +2. **Static range analysis**: Supplement dynamic `range_analyzer` profiling with abstract interpretation for ranges, removing the need for representative inputs. +3. **Expression-level tuning**: Support different precision for each subexpression, not just per variable. +4. **LLVM/Clang integration**: Source-level analysis via Clang AST visitors to automatically extract expression graphs from C++ code. +5. **Weighted objectives**: Support non-uniform cost functions (e.g., energy-weighted optimization using `TypeAdvisor` energy estimates). diff --git a/include/sw/universal/mixedprecision/carry_analysis.hpp b/include/sw/universal/mixedprecision/carry_analysis.hpp new file mode 100644 index 000000000..ea523cd6b --- /dev/null +++ b/include/sw/universal/mixedprecision/carry_analysis.hpp @@ -0,0 +1,159 @@ +#pragma once +// carry_analysis.hpp: carry-bit refinement via policy iteration for POP +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// The default carry bit (carry=1) is conservative. For many operations, +// carry=0 is safe when the operand error cannot affect the result: +// +// carry(z = x op y) = 0 when lsb(x_err) > ufp(z) +// +// Policy iteration alternates between: +// 1. Solve LP with current carry values +// 2. Recompute carries from the LP solution +// 3. Repeat until stable +// +// Typically reduces total bits by 10-30%. +// +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021, Section 5.4. + +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +class CarryAnalyzer { +public: + // Run carry-bit refinement via policy iteration + // Returns the number of iterations to convergence + int refine(ExprGraph& graph, int max_iterations = 10) { + auto& nodes = graph.nodes(); + int n = static_cast(nodes.size()); + + // Initialize all carries to 1 (conservative) + for (auto& node : nodes) { + node.carry = 1; + } + + int iter = 0; + for (; iter < max_iterations; ++iter) { + // Step 1: Solve LP with current carries + PopSolver solver; + if (!solver.solve(graph)) { + // LP failed; keep current carries + break; + } + + // Step 2: Recompute carries from the solution + bool changed = false; + for (int i = 0; i < n; ++i) { + auto& node = nodes[static_cast(i)]; + int old_carry = node.carry; + + int new_carry = compute_carry(node, nodes); + if (new_carry != old_carry) { + node.carry = new_carry; + changed = true; + } + } + + if (!changed) break; // Converged + } + + // Final solve with refined carries + PopSolver final_solver; + final_solver.solve(graph); + iterations_ = iter; + + return iter; + } + + int iterations() const { return iterations_; } + + // Report carry analysis results + void report(std::ostream& ostr, const ExprGraph& graph) const { + ostr << "Carry Analysis Results (converged in " << iterations_ << " iterations)\n"; + ostr << std::string(50, '=') << "\n\n"; + + int carry0_count = 0; + int carry1_count = 0; + + for (const auto& node : graph.nodes()) { + if (node.op == OpKind::Constant || node.op == OpKind::Variable) continue; + + if (node.carry == 0) ++carry0_count; + else ++carry1_count; + + ostr << " " << node.name << " (" << to_string(node.op) << "): carry = " + << node.carry << "\n"; + } + + ostr << "\nRefined carries: " << carry0_count << " of " + << (carry0_count + carry1_count) << " operations have carry=0\n"; + } + +private: + int iterations_{0}; + + // Compute carry for a node based on current nsb assignments + // carry = 0 when lsb(operand_error) > ufp(result) + int compute_carry(const ExprNode& node, const std::vector& nodes) const { + switch (node.op) { + case OpKind::Add: + case OpKind::Sub: { + if (node.lhs < 0 || node.rhs < 0) return 1; + auto& l = nodes[static_cast(node.lhs)]; + auto& r = nodes[static_cast(node.rhs)]; + + // lsb of operand error = ufp - nsb (the position of the last significant bit) + int lsb_l_err = l.ufp - l.nsb_final; + int lsb_r_err = r.ufp - r.nsb_final; + + // If both operand error lsbs are above ufp(z), carry = 0 + if (lsb_l_err > node.ufp && lsb_r_err > node.ufp) { + return 0; + } + return 1; + } + case OpKind::Mul: { + if (node.lhs < 0 || node.rhs < 0) return 1; + auto& l = nodes[static_cast(node.lhs)]; + auto& r = nodes[static_cast(node.rhs)]; + + // For multiplication, the product error depends on both operands + // carry = 0 when nsb_l + nsb_r is small enough + int lsb_l = l.ufp - l.nsb_final + 1; + int lsb_r = r.ufp - r.nsb_final + 1; + int lsb_product = lsb_l + lsb_r; + + // If the product's lsb is above the result's ufp - nsb position, carry = 0 + if (lsb_product > node.ufp - node.nsb_final + 1) { + return 0; + } + return 1; + } + case OpKind::Div: { + // Division carry analysis is more complex; keep conservative + return 1; + } + case OpKind::Sqrt: { + if (node.lhs < 0) return 1; + auto& l = nodes[static_cast(node.lhs)]; + int lsb_l_err = l.ufp - l.nsb_final; + if (lsb_l_err > node.ufp) return 0; + return 1; + } + default: + return 1; + } + } +}; + +}} // namespace sw::universal diff --git a/include/sw/universal/mixedprecision/codegen.hpp b/include/sw/universal/mixedprecision/codegen.hpp new file mode 100644 index 000000000..142a9f636 --- /dev/null +++ b/include/sw/universal/mixedprecision/codegen.hpp @@ -0,0 +1,210 @@ +#pragma once +// codegen.hpp: mixed-precision code generator for POP precision tuning +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// Generates mixed-precision C++ code using Universal types based on +// POP analysis results. The output follows the format established by +// PrecisionConfigGenerator::generateConfigHeader(). +// +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021. + +#include +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +class PopCodeGenerator { +public: + explicit PopCodeGenerator(const ExprGraph& graph, const TypeAdvisor& advisor = TypeAdvisor()) + : graph_(graph), advisor_(advisor) {} + + // Generate a C++ header with type aliases for each variable + std::string generateHeader(const std::string& guard_name = "POP_PRECISION_CONFIG_HPP") const { + std::ostringstream ss; + + ss << "#pragma once\n"; + ss << "// Auto-generated mixed-precision configuration from POP analysis\n"; + ss << "//\n"; + ss << "// Generated by Universal Numbers Library POP Precision Tuning\n"; + ss << "// https://github.com/stillwater-sc/universal\n"; + ss << "//\n"; + ss << "// This header defines type aliases for each variable based on\n"; + ss << "// static precision analysis (minimum bits needed for accuracy).\n\n"; + + ss << "#ifndef " << guard_name << "\n"; + ss << "#define " << guard_name << "\n\n"; + + // Collect variable and intermediate type recommendations + for (const auto& node : graph_.nodes()) { + if (node.op == OpKind::Constant) continue; + + auto rec = advisor_.recommendForNsb(node.nsb_final, node.lo, node.hi); + ss << "// " << node.name << ": " << node.nsb_final + << " significant bits, range [" << node.lo << ", " << node.hi << "]\n"; + ss << "using type_" << sanitize_name(node.name) << " = " + << universal_type_string(rec, node.nsb_final) << ";\n\n"; + } + + ss << "#endif // " << guard_name << "\n"; + return ss.str(); + } + + // Generate an analysis summary as a comment block + std::string generateReport() const { + std::ostringstream ss; + + ss << "/*\n"; + ss << " * POP Precision Analysis Report\n"; + ss << " * " << std::string(40, '=') << "\n"; + ss << " *\n"; + + double total_bits = 0; + double fp64_total = 0; + + for (const auto& node : graph_.nodes()) { + auto rec = advisor_.recommendForNsb(node.nsb_final, node.lo, node.hi); + ss << " * " << std::left << std::setw(12) << node.name + << " nsb=" << std::setw(4) << node.nsb_final + << " -> " << rec.type.name << "\n"; + total_bits += rec.type.total_bits; + fp64_total += 64; + } + + ss << " *\n"; + ss << " * Total bits: " << total_bits << " (vs " << fp64_total << " fp64)\n"; + if (fp64_total > 0.0) { + double savings = (1.0 - total_bits / fp64_total) * 100.0; + ss << " * Bit savings: " << std::fixed << std::setprecision(1) << savings << "%\n"; + } + ss << " */\n"; + + return ss.str(); + } + + // Generate a code template that uses the precision config + std::string generateExampleCode(const std::string& function_name = "compute") const { + std::ostringstream ss; + + ss << "#include \"pop_precision_config.hpp\"\n"; + ss << "#include \n"; + ss << "#include \n"; + ss << "#include \n\n"; + + ss << "using namespace sw::universal;\n\n"; + + // Collect variables (inputs) + std::vector inputs; + for (const auto& node : graph_.nodes()) { + if (node.op == OpKind::Variable) { + inputs.push_back(&node); + } + } + + // Function signature + ss << "auto " << function_name << "("; + for (size_t i = 0; i < inputs.size(); ++i) { + if (i > 0) ss << ", "; + ss << "type_" << sanitize_name(inputs[i]->name) << " " << inputs[i]->name; + } + ss << ") {\n"; + + // Find the output (last node with a requirement, or just the last node) + int output_id = -1; + for (const auto& node : graph_.nodes()) { + if (node.nsb_required >= 0) output_id = node.id; + } + if (output_id < 0 && !graph_.nodes().empty()) { + output_id = graph_.nodes().back().id; + } + + // Generate computation body (pseudo-code for non-input nodes) + for (const auto& node : graph_.nodes()) { + if (node.op == OpKind::Constant || node.op == OpKind::Variable) continue; + + ss << " type_" << sanitize_name(node.name) << " " << node.name << " = "; + + auto lhs_name = [&]() -> std::string { + if (node.lhs >= 0) return graph_.nodes()[static_cast(node.lhs)].name; + return "?"; + }; + auto rhs_name = [&]() -> std::string { + if (node.rhs >= 0) return graph_.nodes()[static_cast(node.rhs)].name; + return "?"; + }; + + switch (node.op) { + case OpKind::Add: ss << lhs_name() << " + " << rhs_name(); break; + case OpKind::Sub: ss << lhs_name() << " - " << rhs_name(); break; + case OpKind::Mul: ss << lhs_name() << " * " << rhs_name(); break; + case OpKind::Div: ss << lhs_name() << " / " << rhs_name(); break; + case OpKind::Neg: ss << "-" << lhs_name(); break; + case OpKind::Abs: ss << "abs(" << lhs_name() << ")"; break; + case OpKind::Sqrt: ss << "sqrt(" << lhs_name() << ")"; break; + default: ss << "/* unknown */"; break; + } + + ss << ";\n"; + } + + if (output_id >= 0) { + ss << " return " << graph_.nodes()[static_cast(output_id)].name << ";\n"; + } + + ss << "}\n"; + return ss.str(); + } + +private: + const ExprGraph& graph_; + TypeAdvisor advisor_; + + static std::string sanitize_name(const std::string& name) { + std::string result; + for (char c : name) { + if (std::isalnum(static_cast(c)) || c == '_') { + result += c; + } else { + result += '_'; + } + } + return result; + } + + // Generate a Universal type string from recommendation + static std::string universal_type_string(const TypeRecommendation& rec, int nsb) { + // Extract the concrete type from the recommendation name + // If it has a parenthetical like "half (cfloat<16,5>)", extract the cfloat part + const std::string& name = rec.type.name; + auto paren = name.find('('); + if (paren != std::string::npos) { + auto end_paren = name.find(')'); + if (end_paren != std::string::npos) { + std::string inner = name.substr(paren + 1, end_paren - paren - 1); + return "sw::universal::" + inner; + } + } + + // If name already starts with a type family, prefix with namespace + if (name.starts_with("posit") || name.starts_with("cfloat") || + name.starts_with("fixpnt") || name.starts_with("lns")) { + return "sw::universal::" + name; + } + + // Fallback: generate cfloat with appropriate bits + int total_bits = std::max(8, ((nsb + 8) / 8) * 8); // round up to byte boundary + int exp_bits = (total_bits <= 16) ? 5 : (total_bits <= 32) ? 8 : 11; + return "sw::universal::cfloat<" + std::to_string(total_bits) + "," + + std::to_string(exp_bits) + ">"; + } +}; + +}} // namespace sw::universal diff --git a/include/sw/universal/mixedprecision/expression_graph.hpp b/include/sw/universal/mixedprecision/expression_graph.hpp new file mode 100644 index 000000000..ede9646ee --- /dev/null +++ b/include/sw/universal/mixedprecision/expression_graph.hpp @@ -0,0 +1,525 @@ +#pragma once +// expression_graph.hpp: DAG-based expression graph for POP precision analysis +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// The ExprGraph builds a directed acyclic graph (DAG) of arithmetic operations +// and performs iterative fixpoint analysis to determine minimum bit requirements +// at each node. The analysis proceeds in three phases: +// +// 1. Forward pass: propagate precision from inputs to outputs +// 2. Backward pass: propagate requirements from outputs to inputs +// 3. Finalize: nsb_final = max(nsb_forward, nsb_backward) +// +// The iterative fixpoint repeats forward+backward until convergence, handling +// the case where some nodes have requirements from multiple output consumers. +// +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021, Chapters 4-5. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace sw { namespace universal { + +enum class OpKind : uint8_t { + Constant, + Variable, + Add, + Sub, + Mul, + Div, + Neg, + Abs, + Sqrt +}; + +inline const char* to_string(OpKind op) { + switch (op) { + case OpKind::Constant: return "const"; + case OpKind::Variable: return "var"; + case OpKind::Add: return "+"; + case OpKind::Sub: return "-"; + case OpKind::Mul: return "*"; + case OpKind::Div: return "/"; + case OpKind::Neg: return "neg"; + case OpKind::Abs: return "abs"; + case OpKind::Sqrt: return "sqrt"; + default: return "?"; + } +} + +struct ExprNode { + OpKind op; + int id; + std::string name; + int lhs{-1}; + int rhs{-1}; + + // Range information (from dynamic analysis or user) + double lo{0.0}; + double hi{0.0}; + int ufp{0}; + + // Precision analysis results + int nsb_forward{0}; + int nsb_backward{0}; + int nsb_final{0}; + + // Carry bit (1 = conservative, 0 = refined via carry analysis) + int carry{1}; + + // User-specified requirement (-1 = none) + int nsb_required{-1}; + + // Consumers: nodes that use this node as input + std::vector consumers; +}; + +class ExprGraph { +public: + // ================================================================ + // Graph construction + // ================================================================ + + int constant(double value, const std::string& name = "") { + ExprNode node; + node.op = OpKind::Constant; + node.id = static_cast(nodes_.size()); + node.name = name.empty() ? ("c" + std::to_string(node.id)) : name; + node.lo = value; + node.hi = value; + node.ufp = (value != 0.0) ? compute_ufp(value) : 0; + nodes_.push_back(node); + return node.id; + } + + int variable(const std::string& name, double lo, double hi) { + ExprNode node; + node.op = OpKind::Variable; + node.id = static_cast(nodes_.size()); + node.name = name; + node.lo = lo; + node.hi = hi; + node.ufp = compute_ufp(lo, hi); + nodes_.push_back(node); + return node.id; + } + + // Integration with range_analyzer: extract range and ufp + template + int variable(const std::string& name, const range_analyzer& ra) { + double lo = static_cast(ra.minValue()); + double hi = static_cast(ra.maxValue()); + return variable(name, lo, hi); + } + + int add(int lhs_id, int rhs_id) { return binary_op(OpKind::Add, lhs_id, rhs_id); } + int sub(int lhs_id, int rhs_id) { return binary_op(OpKind::Sub, lhs_id, rhs_id); } + int mul(int lhs_id, int rhs_id) { return binary_op(OpKind::Mul, lhs_id, rhs_id); } + int div(int lhs_id, int rhs_id) { return binary_op(OpKind::Div, lhs_id, rhs_id); } + + int neg(int operand_id) { return unary_op(OpKind::Neg, operand_id); } + int abs(int operand_id) { return unary_op(OpKind::Abs, operand_id); } + int sqrt(int operand_id) { return unary_op(OpKind::Sqrt, operand_id); } + + // ================================================================ + // Requirements specification + // ================================================================ + + void require_nsb(int node_id, int nsb) { + assert(node_id >= 0 && node_id < static_cast(nodes_.size())); + nodes_[static_cast(node_id)].nsb_required = nsb; + } + + // ================================================================ + // Analysis + // ================================================================ + + // Run complete analysis: forward + backward + finalize (iterative fixpoint) + void analyze(int max_iterations = 20) { + // Initialize forward nsb for leaves + for (auto& node : nodes_) { + if (node.op == OpKind::Constant || node.op == OpKind::Variable) { + // Leaves start with "infinite" precision (we use a large number) + node.nsb_forward = 53; // double precision as default source precision + } else { + node.nsb_forward = 0; + } + node.nsb_backward = 0; + } + + // Iterative fixpoint + for (int iter = 0; iter < max_iterations; ++iter) { + bool changed = false; + + // Forward pass (topological order = node id order for DAG) + for (auto& node : nodes_) { + int old_nsb = node.nsb_forward; + compute_forward(node); + if (node.nsb_forward != old_nsb) changed = true; + } + + // Backward pass (reverse topological order) + for (int i = static_cast(nodes_.size()) - 1; i >= 0; --i) { + int old_nsb = nodes_[static_cast(i)].nsb_backward; + compute_backward(nodes_[static_cast(i)]); + if (nodes_[static_cast(i)].nsb_backward != old_nsb) changed = true; + } + + if (!changed) break; + } + + // Finalize: nsb_final = max(nsb_forward_demand, nsb_backward) + for (auto& node : nodes_) { + node.nsb_final = node.nsb_backward; + if (node.nsb_required > 0) { + node.nsb_final = std::max(node.nsb_final, node.nsb_required); + } + // Clamp: can't exceed the forward-determined upper bound + if (node.nsb_forward > 0 && node.nsb_final > node.nsb_forward) { + node.nsb_final = node.nsb_forward; + } + // Minimum 1 bit for any non-constant + if (node.nsb_final < 1 && node.op != OpKind::Constant) { + node.nsb_final = 1; + } + } + } + + // ================================================================ + // Results + // ================================================================ + + int get_nsb(int node_id) const { + assert(node_id >= 0 && node_id < static_cast(nodes_.size())); + return nodes_[static_cast(node_id)].nsb_final; + } + + const ExprNode& get_node(int node_id) const { + assert(node_id >= 0 && node_id < static_cast(nodes_.size())); + return nodes_[static_cast(node_id)]; + } + + int size() const { return static_cast(nodes_.size()); } + + // Recommend a Universal type for a node based on its nsb and range + std::string recommended_type(int node_id, const TypeAdvisor& advisor = TypeAdvisor()) const { + const auto& node = get_node(node_id); + auto rec = advisor.recommendForNsb(node.nsb_final, node.lo, node.hi); + return rec.type.name; + } + + // Print analysis report + void report(std::ostream& ostr) const { + ostr << "POP Expression Graph Analysis\n"; + ostr << std::string(70, '=') << "\n\n"; + ostr << std::left + << std::setw(4) << "ID" + << std::setw(12) << "Name" + << std::setw(6) << "Op" + << std::setw(6) << "UFP" + << std::setw(6) << "Fwd" + << std::setw(6) << "Bwd" + << std::setw(6) << "Final" + << std::setw(6) << "Req" + << "\n"; + ostr << std::string(52, '-') << "\n"; + + for (const auto& node : nodes_) { + ostr << std::left + << std::setw(4) << node.id + << std::setw(12) << node.name + << std::setw(6) << to_string(node.op) + << std::setw(6) << node.ufp + << std::setw(6) << node.nsb_forward + << std::setw(6) << node.nsb_backward + << std::setw(6) << node.nsb_final + << std::setw(6) << (node.nsb_required >= 0 ? std::to_string(node.nsb_required) : "-") + << "\n"; + } + ostr << "\n"; + } + + // Print report with type recommendations + void report(std::ostream& ostr, const TypeAdvisor& advisor) const { + ostr << "POP Expression Graph Analysis with Type Recommendations\n"; + ostr << std::string(80, '=') << "\n\n"; + ostr << std::left + << std::setw(4) << "ID" + << std::setw(12) << "Name" + << std::setw(6) << "Op" + << std::setw(6) << "UFP" + << std::setw(6) << "NSB" + << std::setw(30) << "Recommended Type" + << "\n"; + ostr << std::string(64, '-') << "\n"; + + for (const auto& node : nodes_) { + auto rec = advisor.recommendForNsb(node.nsb_final, node.lo, node.hi); + ostr << std::left + << std::setw(4) << node.id + << std::setw(12) << node.name + << std::setw(6) << to_string(node.op) + << std::setw(6) << node.ufp + << std::setw(6) << node.nsb_final + << std::setw(30) << rec.type.name + << "\n"; + } + ostr << "\n"; + } + + // Access to nodes for LP solver and carry analysis + std::vector& nodes() { return nodes_; } + const std::vector& nodes() const { return nodes_; } + +private: + std::vector nodes_; + + int binary_op(OpKind op, int lhs_id, int rhs_id) { + assert(lhs_id >= 0 && lhs_id < static_cast(nodes_.size())); + assert(rhs_id >= 0 && rhs_id < static_cast(nodes_.size())); + + ExprNode node; + node.op = op; + node.id = static_cast(nodes_.size()); + node.name = std::string(to_string(op)) + std::to_string(node.id); + node.lhs = lhs_id; + node.rhs = rhs_id; + + // Estimate range for binary ops + auto& l = nodes_[static_cast(lhs_id)]; + auto& r = nodes_[static_cast(rhs_id)]; + estimate_range(node, l, r); + + // Register as consumer + l.consumers.push_back(node.id); + r.consumers.push_back(node.id); + + nodes_.push_back(node); + return node.id; + } + + int unary_op(OpKind op, int operand_id) { + assert(operand_id >= 0 && operand_id < static_cast(nodes_.size())); + + ExprNode node; + node.op = op; + node.id = static_cast(nodes_.size()); + node.name = std::string(to_string(op)) + std::to_string(node.id); + node.lhs = operand_id; + node.rhs = -1; + + auto& input = nodes_[static_cast(operand_id)]; + estimate_unary_range(node, input); + + input.consumers.push_back(node.id); + + nodes_.push_back(node); + return node.id; + } + + void estimate_range(ExprNode& z, const ExprNode& x, const ExprNode& y) { + // Conservative interval arithmetic for range estimation + double xlo = x.lo, xhi = x.hi; + double ylo = y.lo, yhi = y.hi; + + switch (z.op) { + case OpKind::Add: + z.lo = xlo + ylo; + z.hi = xhi + yhi; + break; + case OpKind::Sub: + z.lo = xlo - yhi; + z.hi = xhi - ylo; + break; + case OpKind::Mul: { + double a = xlo * ylo, b = xlo * yhi, c = xhi * ylo, d = xhi * yhi; + z.lo = std::min({a, b, c, d}); + z.hi = std::max({a, b, c, d}); + break; + } + case OpKind::Div: { + // Skip division by zero range + if (ylo <= 0.0 && yhi >= 0.0) { + z.lo = -1e100; + z.hi = 1e100; + } else { + double a = xlo / ylo, b = xlo / yhi, c = xhi / ylo, d = xhi / yhi; + z.lo = std::min({a, b, c, d}); + z.hi = std::max({a, b, c, d}); + } + break; + } + default: + z.lo = xlo; + z.hi = xhi; + break; + } + z.ufp = compute_ufp(z.lo, z.hi); + } + + void estimate_unary_range(ExprNode& z, const ExprNode& x) { + switch (z.op) { + case OpKind::Neg: + z.lo = -x.hi; + z.hi = -x.lo; + break; + case OpKind::Abs: + if (x.lo >= 0) { + z.lo = x.lo; + z.hi = x.hi; + } else if (x.hi <= 0) { + z.lo = -x.hi; + z.hi = -x.lo; + } else { + z.lo = 0.0; + z.hi = std::max(-x.lo, x.hi); + } + break; + case OpKind::Sqrt: + z.lo = (x.lo >= 0) ? std::sqrt(x.lo) : 0.0; + z.hi = (x.hi >= 0) ? std::sqrt(x.hi) : 0.0; + break; + default: + z.lo = x.lo; + z.hi = x.hi; + break; + } + z.ufp = compute_ufp(z.lo, z.hi); + } + + void compute_forward(ExprNode& node) { + if (node.op == OpKind::Constant || node.op == OpKind::Variable) { + return; // Leaves keep their initial nsb_forward + } + + if (node.lhs < 0) return; + + auto& l = nodes_[static_cast(node.lhs)]; + + if (node.rhs < 0) { + // Unary operation + switch (node.op) { + case OpKind::Neg: + case OpKind::Abs: + node.nsb_forward = l.nsb_forward; + break; + case OpKind::Sqrt: { + auto pi = forward_sqrt({l.ufp, l.nsb_forward}, node.ufp, node.carry); + node.nsb_forward = pi.nsb; + break; + } + default: + node.nsb_forward = l.nsb_forward; + break; + } + return; + } + + auto& r = nodes_[static_cast(node.rhs)]; + + precision_info lp{l.ufp, l.nsb_forward}; + precision_info rp{r.ufp, r.nsb_forward}; + + switch (node.op) { + case OpKind::Add: { + auto pi = forward_add(lp, rp, node.ufp, node.carry); + node.nsb_forward = pi.nsb; + break; + } + case OpKind::Sub: { + auto pi = forward_sub(lp, rp, node.ufp, node.carry); + node.nsb_forward = pi.nsb; + break; + } + case OpKind::Mul: { + auto pi = forward_mul(lp, rp, node.ufp, node.carry); + node.nsb_forward = pi.nsb; + break; + } + case OpKind::Div: { + auto pi = forward_div(lp, rp, node.ufp, node.carry); + node.nsb_forward = pi.nsb; + break; + } + default: + break; + } + } + + void compute_backward(ExprNode& node) { + // Seed backward nsb from requirements + if (node.nsb_required > 0) { + node.nsb_backward = std::max(node.nsb_backward, node.nsb_required); + } + + // Propagate from all consumers + for (int consumer_id : node.consumers) { + auto& consumer = nodes_[static_cast(consumer_id)]; + int consumer_nsb = consumer.nsb_backward; + if (consumer_nsb <= 0) continue; + + int demanded = compute_backward_demand(consumer, node.id); + node.nsb_backward = std::max(node.nsb_backward, demanded); + } + } + + int compute_backward_demand(const ExprNode& consumer, int input_id) const { + int nsb_z = consumer.nsb_backward; + if (nsb_z <= 0) return 0; + + bool is_lhs = (consumer.lhs == input_id); + + switch (consumer.op) { + case OpKind::Add: + if (is_lhs) { + return backward_add_lhs(nsb_z, consumer.ufp, nodes_[static_cast(input_id)].ufp, consumer.carry); + } else { + return backward_add_rhs(nsb_z, consumer.ufp, nodes_[static_cast(input_id)].ufp, consumer.carry); + } + case OpKind::Sub: + if (is_lhs) { + return backward_sub_lhs(nsb_z, consumer.ufp, nodes_[static_cast(input_id)].ufp, consumer.carry); + } else { + return backward_sub_rhs(nsb_z, consumer.ufp, nodes_[static_cast(input_id)].ufp, consumer.carry); + } + case OpKind::Mul: + if (is_lhs) { + return backward_mul_lhs(nsb_z, consumer.carry); + } else { + return backward_mul_rhs(nsb_z, consumer.carry); + } + case OpKind::Div: + if (is_lhs) { + return backward_div_lhs(nsb_z, consumer.carry); + } else { + return backward_div_rhs(nsb_z, consumer.carry); + } + case OpKind::Neg: + return backward_neg(nsb_z); + case OpKind::Abs: + return backward_abs(nsb_z); + case OpKind::Sqrt: + return backward_sqrt(nsb_z, consumer.carry); + default: + return nsb_z; + } + } +}; + +}} // namespace sw::universal diff --git a/include/sw/universal/mixedprecision/glpk_solver.hpp b/include/sw/universal/mixedprecision/glpk_solver.hpp new file mode 100644 index 000000000..c9e2604e1 --- /dev/null +++ b/include/sw/universal/mixedprecision/glpk_solver.hpp @@ -0,0 +1,159 @@ +#pragma once +// glpk_solver.hpp: optional GLPK binding for POP precision tuning +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// Provides a GLPK-backed LP/ILP solver as an alternative to the +// embedded simplex solver. Enable with -DUNIVERSAL_HAS_GLPK. +// +// This header is only included when UNIVERSAL_HAS_GLPK is defined. + +#ifdef UNIVERSAL_HAS_GLPK + +#include +#include +#include +#include + +namespace sw { namespace universal { + +// GLPK-backed solver with ILP capability +// Provides the same interface as SimplexSolver but uses GLPK +// for better performance on larger problems and true integer solutions. +class GlpkSolver { +public: + GlpkSolver() : nvars_(0), prob_(nullptr) {} + + // Rule of 5: raw pointer ownership + GlpkSolver(const GlpkSolver&) = delete; + GlpkSolver& operator=(const GlpkSolver&) = delete; + GlpkSolver(GlpkSolver&& other) noexcept + : nvars_(other.nvars_), objective_(std::move(other.objective_)), + constraints_(std::move(other.constraints_)), + solution_(std::move(other.solution_)), prob_(other.prob_) { + other.prob_ = nullptr; + } + GlpkSolver& operator=(GlpkSolver&& other) noexcept { + if (this != &other) { + if (prob_) glp_delete_prob(prob_); + nvars_ = other.nvars_; + objective_ = std::move(other.objective_); + constraints_ = std::move(other.constraints_); + solution_ = std::move(other.solution_); + prob_ = other.prob_; + other.prob_ = nullptr; + } + return *this; + } + + ~GlpkSolver() { + if (prob_) glp_delete_prob(prob_); + } + + void set_num_vars(int n) { + nvars_ = n; + objective_.assign(static_cast(n), 0.0); + } + + void set_objective(const std::vector& coeffs) { + assert(static_cast(coeffs.size()) == nvars_); + objective_ = coeffs; + } + + void add_ge_constraint(const std::vector& coeffs, double rhs) { + assert(static_cast(coeffs.size()) == nvars_); + Constraint c; + c.coeffs = coeffs; + c.rhs = rhs; + c.type = GLP_LO; + constraints_.push_back(c); + } + + LPStatus solve(int /*max_iterations*/ = 10000) { + if (prob_) glp_delete_prob(prob_); + prob_ = glp_create_prob(); + glp_set_obj_dir(prob_, GLP_MIN); + + int m = static_cast(constraints_.size()); + + // Add columns (decision variables) + glp_add_cols(prob_, nvars_); + for (int j = 1; j <= nvars_; ++j) { + glp_set_col_bnds(prob_, j, GLP_LO, 0.0, 0.0); + glp_set_obj_coef(prob_, j, objective_[static_cast(j - 1)]); + glp_set_col_kind(prob_, j, GLP_IV); // Integer variable + } + + // Add rows (constraints) + glp_add_rows(prob_, m); + for (int i = 0; i < m; ++i) { + auto& con = constraints_[static_cast(i)]; + glp_set_row_bnds(prob_, i + 1, con.type, con.rhs, 0.0); + } + + // Load constraint matrix in sparse format + int nz = m * nvars_; + std::vector ia(static_cast(nz + 1)); + std::vector ja(static_cast(nz + 1)); + std::vector ar(static_cast(nz + 1)); + + int k = 1; + for (int i = 0; i < m; ++i) { + for (int j = 0; j < nvars_; ++j) { + ia[static_cast(k)] = i + 1; + ja[static_cast(k)] = j + 1; + ar[static_cast(k)] = constraints_[static_cast(i)].coeffs[static_cast(j)]; + ++k; + } + } + glp_load_matrix(prob_, nz, ia.data(), ja.data(), ar.data()); + + // Solve LP relaxation first, then ILP + glp_smcp parm; + glp_init_smcp(&parm); + parm.msg_lev = GLP_MSG_OFF; + glp_simplex(prob_, &parm); + + glp_iocp iocp; + glp_init_iocp(&iocp); + iocp.msg_lev = GLP_MSG_OFF; + int ret = glp_intopt(prob_, &iocp); + + if (ret != 0 || glp_mip_status(prob_) != GLP_OPT) { + return LPStatus::Infeasible; + } + + // Extract solution + solution_.assign(static_cast(nvars_), 0.0); + for (int j = 1; j <= nvars_; ++j) { + solution_[static_cast(j - 1)] = glp_mip_col_val(prob_, j); + } + + return LPStatus::Optimal; + } + + double get_value(int var) const { + assert(var >= 0 && var < static_cast(solution_.size())); + return solution_[static_cast(var)]; + } + +private: + struct Constraint { + std::vector coeffs; + double rhs; + int type; + }; + + int nvars_; + std::vector objective_; + std::vector constraints_; + std::vector solution_; + glp_prob* prob_; +}; + +}} // namespace sw::universal + +#endif // UNIVERSAL_HAS_GLPK diff --git a/include/sw/universal/mixedprecision/pop.hpp b/include/sw/universal/mixedprecision/pop.hpp new file mode 100644 index 000000000..bd8b17031 --- /dev/null +++ b/include/sw/universal/mixedprecision/pop.hpp @@ -0,0 +1,63 @@ +#pragma once +// pop.hpp: umbrella header for POP (Precision-Optimized Programs) precision tuning +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// POP is a static analysis method for bit-level precision tuning. Given accuracy +// requirements on program outputs, it determines the minimum bits needed at each +// variable/intermediate using forward and backward error transfer functions, +// solved as a Linear Program (LP). +// +// Usage: +// #include +// +// using namespace sw::universal; +// +// // Step 1: Build expression graph +// ExprGraph g; +// int a = g.variable("a", 1.0, 10.0); +// int b = g.variable("b", 1.0, 10.0); +// int c = g.mul(a, b); +// g.require_nsb(c, 16); // require 16 significant bits at output +// +// // Step 2a: Iterative fixpoint analysis (no LP solver) +// g.analyze(); +// +// // Step 2b: OR use LP solver for optimal assignment +// PopSolver solver; +// solver.solve(g); +// +// // Step 3: Optional carry refinement (10-30% reduction) +// CarryAnalyzer ca; +// ca.refine(g); +// +// // Step 4: Generate code +// PopCodeGenerator gen(g); +// std::cout << gen.generateHeader(); +// +// // Step 5: Get type recommendations +// TypeAdvisor advisor; +// g.report(std::cout, advisor); +// +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021. + +// Phase 1: Transfer functions and UFP +#include +#include + +// Phase 2: Expression graph and iterative fixpoint analysis +#include + +// Phase 3: LP solver and optimal bit assignment +#include +#include + +// Phase 4: Carry-bit refinement +#include + +// Phase 5: Code generation +#include diff --git a/include/sw/universal/mixedprecision/pop_solver.hpp b/include/sw/universal/mixedprecision/pop_solver.hpp new file mode 100644 index 000000000..164a6f777 --- /dev/null +++ b/include/sw/universal/mixedprecision/pop_solver.hpp @@ -0,0 +1,213 @@ +#pragma once +// pop_solver.hpp: LP-based optimal bit assignment for POP precision tuning +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// The PopSolver translates an ExprGraph into an LP problem: +// +// minimize sum(nsb_i) // minimize total bits +// subject to transfer function constraints at each node +// nsb_i >= nsb_required_i for output nodes +// nsb_i >= 1 for all nodes +// +// This finds the globally optimal bit assignment that minimizes +// total cost while meeting all accuracy requirements. +// +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021, Chapter 5. + +#include + +// Use embedded simplex by default; GLPK if available +#ifdef UNIVERSAL_HAS_GLPK +#include +#else +#include +#endif + +#include +#include +#include +#include + +namespace sw { namespace universal { + +// Unified solver type alias +#ifdef UNIVERSAL_HAS_GLPK +using PopLPSolver = GlpkSolver; +#else +using PopLPSolver = SimplexSolver; +#endif + +class PopSolver { +public: + // Solve the LP and write optimal nsb values back to the graph + bool solve(ExprGraph& graph) { + auto& nodes = graph.nodes(); + int n = static_cast(nodes.size()); + if (n == 0) return false; + + PopLPSolver lp; + lp.set_num_vars(n); + + // Objective: minimize sum of nsb values (uniform weight) + std::vector obj(static_cast(n), 1.0); + lp.set_objective(obj); + + // Constraints from transfer functions + for (int i = 0; i < n; ++i) { + auto& node = nodes[static_cast(i)]; + + // All variables >= 1 (minimum 1 significant bit) + { + std::vector c(static_cast(n), 0.0); + c[static_cast(i)] = 1.0; + lp.add_ge_constraint(c, 1.0); + } + + // User-specified requirements + if (node.nsb_required > 0) { + std::vector c(static_cast(n), 0.0); + c[static_cast(i)] = 1.0; + lp.add_ge_constraint(c, static_cast(node.nsb_required)); + } + + // Transfer function constraints (backward direction) + switch (node.op) { + case OpKind::Add: + case OpKind::Sub: { + // nsb(lhs) >= nsb(z) + ufp(z) - ufp(lhs) + carry + // => nsb(lhs) - nsb(z) >= ufp(z) - ufp(lhs) + carry + int lhs = node.lhs; + int rhs_id = node.rhs; + if (lhs >= 0) { + int ufp_shift = node.ufp - nodes[static_cast(lhs)].ufp + node.carry; + std::vector c(static_cast(n), 0.0); + c[static_cast(lhs)] = 1.0; + c[static_cast(i)] = -1.0; + lp.add_ge_constraint(c, static_cast(ufp_shift)); + } + if (rhs_id >= 0) { + int ufp_shift = node.ufp - nodes[static_cast(rhs_id)].ufp + node.carry; + std::vector c(static_cast(n), 0.0); + c[static_cast(rhs_id)] = 1.0; + c[static_cast(i)] = -1.0; + lp.add_ge_constraint(c, static_cast(ufp_shift)); + } + break; + } + case OpKind::Mul: + case OpKind::Div: { + // nsb(lhs) >= nsb(z) + carry + // => nsb(lhs) - nsb(z) >= carry + int lhs = node.lhs; + int rhs_id = node.rhs; + if (lhs >= 0) { + std::vector c(static_cast(n), 0.0); + c[static_cast(lhs)] = 1.0; + c[static_cast(i)] = -1.0; + lp.add_ge_constraint(c, static_cast(node.carry)); + } + if (rhs_id >= 0) { + std::vector c(static_cast(n), 0.0); + c[static_cast(rhs_id)] = 1.0; + c[static_cast(i)] = -1.0; + lp.add_ge_constraint(c, static_cast(node.carry)); + } + break; + } + case OpKind::Neg: + case OpKind::Abs: { + // nsb(input) >= nsb(z) + int lhs = node.lhs; + if (lhs >= 0) { + std::vector c(static_cast(n), 0.0); + c[static_cast(lhs)] = 1.0; + c[static_cast(i)] = -1.0; + lp.add_ge_constraint(c, 0.0); + } + break; + } + case OpKind::Sqrt: { + // nsb(input) >= nsb(z) + carry + int lhs = node.lhs; + if (lhs >= 0) { + std::vector c(static_cast(n), 0.0); + c[static_cast(lhs)] = 1.0; + c[static_cast(i)] = -1.0; + lp.add_ge_constraint(c, static_cast(node.carry)); + } + break; + } + case OpKind::Constant: + case OpKind::Variable: + // No transfer constraints for leaves + break; + } + } + + // Solve + LPStatus status = lp.solve(); + + if (status != LPStatus::Optimal) { + status_ = status; + return false; + } + + status_ = status; + + // Write back solution (ceil to integer bits) + total_nsb_ = 0.0; + for (int i = 0; i < n; ++i) { + double val = lp.get_value(i); + int nsb = static_cast(std::ceil(val - 1e-9)); // round up, tolerant of floating-point + if (nsb < 1) nsb = 1; + nodes[static_cast(i)].nsb_final = nsb; + total_nsb_ += nsb; + } + + return true; + } + + // Total nsb across all nodes (cost) + double total_nsb() const { return total_nsb_; } + + // LP solver status + LPStatus status() const { return status_; } + + // Print solution report + void report(std::ostream& ostr, const ExprGraph& graph) const { + ostr << "POP LP Solver Results\n"; + ostr << std::string(50, '=') << "\n"; + ostr << "Status: " << to_string(status_) << "\n"; + ostr << "Total NSB: " << total_nsb_ << "\n\n"; + + ostr << std::left + << std::setw(4) << "ID" + << std::setw(12) << "Name" + << std::setw(6) << "Op" + << std::setw(6) << "NSB" + << std::setw(6) << "Req" + << "\n"; + ostr << std::string(34, '-') << "\n"; + + for (const auto& node : graph.nodes()) { + ostr << std::left + << std::setw(4) << node.id + << std::setw(12) << node.name + << std::setw(6) << to_string(node.op) + << std::setw(6) << node.nsb_final + << std::setw(6) << (node.nsb_required >= 0 ? std::to_string(node.nsb_required) : "-") + << "\n"; + } + } + +private: + LPStatus status_{LPStatus::Infeasible}; + double total_nsb_{0.0}; +}; + +}} // namespace sw::universal diff --git a/include/sw/universal/mixedprecision/simplex.hpp b/include/sw/universal/mixedprecision/simplex.hpp new file mode 100644 index 000000000..ec4c6c0fe --- /dev/null +++ b/include/sw/universal/mixedprecision/simplex.hpp @@ -0,0 +1,289 @@ +#pragma once +// simplex.hpp: embedded header-only LP solver for POP precision tuning +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// A minimal simplex solver for small linear programs arising from +// POP precision tuning. Solves: +// +// minimize c^T x +// subject to A x >= b +// x >= 0 +// +// Implementation uses the revised simplex method with a dense tableau. +// For small problems (< 100 variables) this is more than adequate. +// For larger problems, link against GLPK or HiGHS. +// +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021, Chapter 5. + +#include +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +enum class LPStatus { + Optimal, + Infeasible, + Unbounded, + MaxIterations +}; + +inline const char* to_string(LPStatus s) { + switch (s) { + case LPStatus::Optimal: return "Optimal"; + case LPStatus::Infeasible: return "Infeasible"; + case LPStatus::Unbounded: return "Unbounded"; + case LPStatus::MaxIterations: return "MaxIterations"; + default: return "Unknown"; + } +} + +// Minimal LP solver using the two-phase simplex method +// Solves: minimize c^T x subject to A x >= b, x >= 0 +class SimplexSolver { +public: + SimplexSolver() : nvars_(0), status_(LPStatus::Infeasible), obj_value_(0.0) {} + + // Set number of decision variables + void set_num_vars(int n) { + nvars_ = n; + objective_.assign(static_cast(n), 0.0); + } + + // Set the objective function coefficients (minimize) + void set_objective(const std::vector& coeffs) { + assert(static_cast(coeffs.size()) == nvars_); + objective_ = coeffs; + } + + // Add a >= constraint: sum(coeffs[i] * x[i]) >= rhs + void add_ge_constraint(const std::vector& coeffs, double rhs) { + assert(static_cast(coeffs.size()) == nvars_); + Constraint c; + c.coeffs = coeffs; + c.rhs = rhs; + c.kind = ConstraintKind::GE; + constraints_.push_back(c); + } + + // Add a <= constraint: sum(coeffs[i] * x[i]) <= rhs + void add_le_constraint(const std::vector& coeffs, double rhs) { + assert(static_cast(coeffs.size()) == nvars_); + Constraint c; + c.coeffs = coeffs; + c.rhs = rhs; + c.kind = ConstraintKind::LE; + constraints_.push_back(c); + } + + // Add an equality constraint: sum(coeffs[i] * x[i]) == rhs + void add_eq_constraint(const std::vector& coeffs, double rhs) { + add_ge_constraint(coeffs, rhs); + add_le_constraint(coeffs, rhs); + } + + // Solve the LP using the Big-M simplex method + LPStatus solve(int max_iterations = 10000) { + obj_value_ = std::numeric_limits::quiet_NaN(); + status_ = LPStatus::Infeasible; + + int m = static_cast(constraints_.size()); // number of constraints + int n = nvars_; // decision variables + + if (m == 0 || n == 0) { + return status_; + } + + // Build the tableau + // Variables: x[0..n-1] (decision), s[0..m-1] (surplus), a[0..m-1] (artificial) + // Total columns: n + m (surplus) + m (artificial) + 1 (rhs) + int total_vars = n + 2 * m; + int cols = total_vars + 1; // +1 for RHS + int rows = m + 1; // +1 for objective + + // Allocate tableau + std::vector> T(static_cast(rows), + std::vector(static_cast(cols), 0.0)); + + // Big-M penalty + double M = 1e6; + + // Fill constraint rows + for (int i = 0; i < m; ++i) { + auto& con = constraints_[static_cast(i)]; + std::vector coeffs = con.coeffs; + double rhs = con.rhs; + bool is_ge = (con.kind == ConstraintKind::GE); + + // Normalize: ensure RHS >= 0 for a valid initial artificial basis + if (rhs < 0.0) { + for (double& v : coeffs) v = -v; + rhs = -rhs; + is_ge = !is_ge; + } + + // Decision variables + for (int j = 0; j < n; ++j) { + T[static_cast(i)][static_cast(j)] = coeffs[static_cast(j)]; + } + // >= uses surplus (-1), <= uses slack (+1) + T[static_cast(i)][static_cast(n + i)] = is_ge ? -1.0 : 1.0; + // Artificial variable + T[static_cast(i)][static_cast(n + m + i)] = 1.0; + // RHS + T[static_cast(i)][static_cast(total_vars)] = rhs; + } + + // Objective row: minimize c^T x + M * sum(artificial) + for (int j = 0; j < n; ++j) { + T[static_cast(m)][static_cast(j)] = objective_[static_cast(j)]; + } + for (int j = 0; j < m; ++j) { + T[static_cast(m)][static_cast(n + m + j)] = M; + } + + // Subtract M * (constraint rows) from objective to make artificial basis feasible + for (int i = 0; i < m; ++i) { + for (int j = 0; j < cols; ++j) { + T[static_cast(m)][static_cast(j)] -= M * T[static_cast(i)][static_cast(j)]; + } + } + + // Basis tracking: basis[i] = column index of basic variable in row i + std::vector basis(static_cast(m)); + for (int i = 0; i < m; ++i) { + basis[static_cast(i)] = n + m + i; // artificial variables + } + + // Simplex iterations + double eps = 1e-10; + for (int iter = 0; iter < max_iterations; ++iter) { + // Find pivot column: most negative coefficient in objective row + int pivot_col = -1; + double min_val = -eps; + for (int j = 0; j < total_vars; ++j) { + if (T[static_cast(m)][static_cast(j)] < min_val) { + min_val = T[static_cast(m)][static_cast(j)]; + pivot_col = j; + } + } + + if (pivot_col == -1) { + // Optimal - check if artificial variables are zero + bool feasible = true; + for (int i = 0; i < m; ++i) { + if (basis[static_cast(i)] >= n + m) { + if (std::abs(T[static_cast(i)][static_cast(total_vars)]) > eps) { + feasible = false; + break; + } + } + } + if (feasible) { + status_ = LPStatus::Optimal; + } else { + status_ = LPStatus::Infeasible; + } + break; + } + + // Find pivot row: minimum ratio test + int pivot_row = -1; + double min_ratio = std::numeric_limits::max(); + for (int i = 0; i < m; ++i) { + double aij = T[static_cast(i)][static_cast(pivot_col)]; + if (aij > eps) { + double ratio = T[static_cast(i)][static_cast(total_vars)] / aij; + if (ratio < min_ratio) { + min_ratio = ratio; + pivot_row = i; + } + } + } + + if (pivot_row == -1) { + status_ = LPStatus::Unbounded; + break; + } + + // Pivot + double pivot_elem = T[static_cast(pivot_row)][static_cast(pivot_col)]; + for (int j = 0; j < cols; ++j) { + T[static_cast(pivot_row)][static_cast(j)] /= pivot_elem; + } + for (int i = 0; i <= m; ++i) { + if (i == pivot_row) continue; + double factor = T[static_cast(i)][static_cast(pivot_col)]; + if (std::abs(factor) > eps) { + for (int j = 0; j < cols; ++j) { + T[static_cast(i)][static_cast(j)] -= factor * T[static_cast(pivot_row)][static_cast(j)]; + } + } + } + + basis[static_cast(pivot_row)] = pivot_col; + + if (iter == max_iterations - 1) { + status_ = LPStatus::MaxIterations; + } + } + + // Extract solution + solution_.assign(static_cast(n), 0.0); + if (status_ == LPStatus::Optimal) { + for (int i = 0; i < m; ++i) { + int b = basis[static_cast(i)]; + if (b < n) { + solution_[static_cast(b)] = T[static_cast(i)][static_cast(total_vars)]; + } + } + // Compute objective value + obj_value_ = 0.0; + for (int j = 0; j < n; ++j) { + obj_value_ += objective_[static_cast(j)] * solution_[static_cast(j)]; + } + } else { + obj_value_ = std::numeric_limits::quiet_NaN(); + } + + return status_; + } + + // Get solution value for variable i + double get_value(int var) const { + assert(var >= 0 && var < static_cast(solution_.size())); + return solution_[static_cast(var)]; + } + + // Get optimal objective value + double objective_value() const { return obj_value_; } + + // Get solver status + LPStatus status() const { return status_; } + +private: + enum class ConstraintKind { GE, LE }; + + struct Constraint { + std::vector coeffs; + double rhs; + ConstraintKind kind; + }; + + int nvars_; + std::vector objective_; + std::vector constraints_; + std::vector solution_; + LPStatus status_; + double obj_value_; +}; + +}} // namespace sw::universal diff --git a/include/sw/universal/mixedprecision/transfer.hpp b/include/sw/universal/mixedprecision/transfer.hpp new file mode 100644 index 000000000..f127212d9 --- /dev/null +++ b/include/sw/universal/mixedprecision/transfer.hpp @@ -0,0 +1,166 @@ +#pragma once +// transfer.hpp: forward and backward error transfer functions for POP precision tuning +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// POP (Precision-Optimized Programs) uses transfer functions to propagate +// precision information through arithmetic expressions. Each operation has: +// +// Forward: Given input precisions, compute output precision +// Backward: Given required output precision, compute required input precisions +// +// Precision is expressed as (ufp, nsb) pairs: +// ufp = unit in the first place = floor(log2(|x|)) +// nsb = number of significant bits = -log2(relative_error) +// +// The lsb (least significant bit) position is: lsb = ufp - nsb + 1 +// +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021, Chapter 4. + +#include + +namespace sw { namespace universal { + +// Precision descriptor for a value +struct precision_info { + int ufp; // unit in the first place: floor(log2(|x|)) + int nsb; // number of significant bits + + constexpr int lsb() const { return ufp - nsb + 1; } +}; + +// ============================================================================ +// Forward transfer functions +// Given input precisions, compute output precision +// ============================================================================ + +// Forward transfer for addition: z = x + y +// The result ufp is provided externally (from range analysis) +// carry: 1 (conservative default) or 0 (refined via carry analysis) +constexpr precision_info forward_add(precision_info x, precision_info y, int ufp_z, int carry = 1) { + // lsb(z) = min(lsb(x), lsb(y)) (the finest granularity of either operand) + int lsb_z = std::min(x.lsb(), y.lsb()); + // nsb(z) = ufp(z) - lsb(z) + 1 + carry + int nsb_z = ufp_z - lsb_z + 1 + carry; + return { ufp_z, nsb_z }; +} + +// Forward transfer for subtraction: z = x - y +// Identical to addition (subtraction has same error propagation) +constexpr precision_info forward_sub(precision_info x, precision_info y, int ufp_z, int carry = 1) { + return forward_add(x, y, ufp_z, carry); +} + +// Forward transfer for multiplication: z = x * y +// carry: 1 (conservative default) or 0 (refined via carry analysis) +constexpr precision_info forward_mul(precision_info x, precision_info y, int carry = 1) { + // ufp(z) = ufp(x) + ufp(y) (may differ slightly; use range-based ufp if available) + int ufp_z = x.ufp + y.ufp; + // nsb(z) = nsb(x) + nsb(y) + carry + int nsb_z = x.nsb + y.nsb + carry; + return { ufp_z, nsb_z }; +} + +// Forward transfer for multiplication with explicit ufp_z (from range analysis) +constexpr precision_info forward_mul(precision_info x, precision_info y, int ufp_z, int carry) { + int nsb_z = x.nsb + y.nsb + carry; + return { ufp_z, nsb_z }; +} + +// Forward transfer for division: z = x / y +// carry: 1 (conservative default) or 0 (refined via carry analysis) +constexpr precision_info forward_div(precision_info x, precision_info y, int carry = 1) { + int ufp_z = x.ufp - y.ufp; + int nsb_z = x.nsb + y.nsb + carry; + return { ufp_z, nsb_z }; +} + +// Forward transfer for division with explicit ufp_z +constexpr precision_info forward_div(precision_info x, precision_info y, int ufp_z, int carry) { + int nsb_z = x.nsb + y.nsb + carry; + return { ufp_z, nsb_z }; +} + +// Forward transfer for negation: z = -x (precision unchanged) +constexpr precision_info forward_neg(precision_info x) { + return x; +} + +// Forward transfer for absolute value: z = |x| (precision unchanged) +constexpr precision_info forward_abs(precision_info x) { + return x; +} + +// Forward transfer for square root: z = sqrt(x) +// nsb(z) = nsb(x) + carry, ufp(z) ~= ufp(x)/2 +constexpr precision_info forward_sqrt(precision_info x, int ufp_z, int carry = 1) { + int nsb_z = x.nsb + carry; + return { ufp_z, nsb_z }; +} + +// ============================================================================ +// Backward transfer functions +// Given required output precision, compute required input precisions +// ============================================================================ + +// Backward for addition z = x + y: required nsb for x given nsb(z) +// nsb(x) >= nsb(z) + ufp(z) - ufp(x) + carry +constexpr int backward_add_lhs(int nsb_z, int ufp_z, int ufp_x, int carry = 1) { + return nsb_z + ufp_z - ufp_x + carry; +} + +// Backward for addition z = x + y: required nsb for y +constexpr int backward_add_rhs(int nsb_z, int ufp_z, int ufp_y, int carry = 1) { + return nsb_z + ufp_z - ufp_y + carry; +} + +// Backward for subtraction z = x - y: identical to addition +constexpr int backward_sub_lhs(int nsb_z, int ufp_z, int ufp_x, int carry = 1) { + return backward_add_lhs(nsb_z, ufp_z, ufp_x, carry); +} + +constexpr int backward_sub_rhs(int nsb_z, int ufp_z, int ufp_y, int carry = 1) { + return backward_add_rhs(nsb_z, ufp_z, ufp_y, carry); +} + +// Backward for multiplication z = x * y: required nsb for x +// nsb(x) >= nsb(z) + carry (independent of ufp values for multiplication) +constexpr int backward_mul_lhs(int nsb_z, int carry = 1) { + return nsb_z + carry; +} + +// Backward for multiplication z = x * y: required nsb for y +constexpr int backward_mul_rhs(int nsb_z, int carry = 1) { + return nsb_z + carry; +} + +// Backward for division z = x / y: required nsb for x +constexpr int backward_div_lhs(int nsb_z, int carry = 1) { + return nsb_z + carry; +} + +// Backward for division z = x / y: required nsb for y +constexpr int backward_div_rhs(int nsb_z, int carry = 1) { + return nsb_z + carry; +} + +// Backward for negation z = -x: nsb(x) = nsb(z) +constexpr int backward_neg(int nsb_z) { + return nsb_z; +} + +// Backward for absolute value z = |x|: nsb(x) = nsb(z) +constexpr int backward_abs(int nsb_z) { + return nsb_z; +} + +// Backward for square root z = sqrt(x): nsb(x) >= nsb(z) + carry +constexpr int backward_sqrt(int nsb_z, int carry = 1) { + return nsb_z + carry; +} + +}} // namespace sw::universal diff --git a/include/sw/universal/mixedprecision/ufp.hpp b/include/sw/universal/mixedprecision/ufp.hpp new file mode 100644 index 000000000..c9a7be5bb --- /dev/null +++ b/include/sw/universal/mixedprecision/ufp.hpp @@ -0,0 +1,70 @@ +#pragma once +// ufp.hpp: unit in the first place (UFP) computation for POP precision tuning +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// The UFP (unit in the first place) of a value x is defined as: +// ufp(x) = 2^floor(log2(|x|)) for x != 0 +// ufp(0) = 0 +// +// In integer form, compute_ufp(x) returns floor(log2(|x|)), +// which is the exponent of the most significant bit. +// +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021. + +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +// Forward declaration of range_analyzer (include range_analyzer.hpp for full definition) +template class range_analyzer; + +// compute_ufp returns floor(log2(|x|)) for x != 0 +// For x == 0 returns a sentinel value (INT_MIN) +inline int compute_ufp(double x) { + if (x == 0.0) return std::numeric_limits::min(); + double ax = std::abs(x); + // Extract the IEEE 754 exponent via bit manipulation + uint64_t bits; + std::memcpy(&bits, &ax, sizeof(bits)); + int biased_exp = static_cast((bits >> 52) & 0x7FF); + // Normal number: exponent = biased_exp - 1023 + // Subnormal: use frexp-style fallback + if (biased_exp == 0) { + // subnormal + int exp; + std::frexp(ax, &exp); + return exp - 1; + } + return biased_exp - 1023; +} + +// compute_ufp for float +inline int compute_ufp(float x) { + return compute_ufp(static_cast(x)); +} + +// compute_ufp from a range: returns ufp of the maximum absolute value +inline int compute_ufp(double lo, double hi) { + double abs_max = std::max(std::abs(lo), std::abs(hi)); + if (abs_max == 0.0) return std::numeric_limits::min(); + return compute_ufp(abs_max); +} + +// Bridge to range_analyzer: extract ufp from a range_analyzer's maxScale +// (maxScale already returns the same value as compute_ufp for the largest observed value) +// Usage: int ufp = ufp_from_analyzer(analyzer); +template +int ufp_from_analyzer(const range_analyzer& analyzer) { + return analyzer.maxScale(); +} + +}} // namespace sw::universal diff --git a/include/sw/universal/utility/range_analyzer.hpp b/include/sw/universal/utility/range_analyzer.hpp index d3374e201..fbce58845 100644 --- a/include/sw/universal/utility/range_analyzer.hpp +++ b/include/sw/universal/utility/range_analyzer.hpp @@ -175,6 +175,8 @@ class range_analyzer { /// Get observed scale (exponent) range int minScale() const { return min_scale_; } int maxScale() const { return max_scale_; } + /// Convenience alias for POP integration: ufp of the largest observed value + int ufp() const { return maxScale(); } int scaleRange() const { if (min_scale_ == std::numeric_limits::max()) return 0; return max_scale_ - min_scale_ + 1; diff --git a/include/sw/universal/utility/type_advisor.hpp b/include/sw/universal/utility/type_advisor.hpp index 206d79b9a..9ec70fb00 100644 --- a/include/sw/universal/utility/type_advisor.hpp +++ b/include/sw/universal/utility/type_advisor.hpp @@ -168,6 +168,60 @@ class TypeAdvisor { return recs[0]; } + /// Recommend a type based on required number of significant bits and value range + /// This is the bridge from POP analysis to type selection + TypeRecommendation recommendForNsb(int nsb, double lo, double hi) const { + double rel_err = std::pow(2.0, -nsb); + double abs_max = std::max(std::abs(lo), std::abs(hi)); + double abs_min = (lo <= 0.0 && hi >= 0.0) ? 0.0 : std::min(std::abs(lo), std::abs(hi)); + + AccuracyRequirement acc(rel_err); + + TypeRecommendation best; + best.suitability_score = -1.0; + + for (const auto& type : types_) { + TypeRecommendation rec; + rec.type = type; + rec.meets_range = evaluateRange(type, abs_max, abs_min, 0); + rec.meets_accuracy = (type.fraction_bits >= nsb); + rec.estimated_energy = type.energy_per_fma / 1.5; + + // Score: prefer smallest type that meets requirements + double score = 0.0; + if (rec.meets_range) score += 40.0; + if (rec.meets_accuracy) score += 40.0; + // Prefer smaller bit widths + score += std::max(0.0, 20.0 - type.total_bits * 0.25); + rec.suitability_score = score; + rec.rationale = rec.meets_accuracy && rec.meets_range + ? "Meets " + std::to_string(nsb) + "-bit requirement" + : "Insufficient precision or range"; + + if (rec.meets_accuracy && rec.meets_range && rec.suitability_score > best.suitability_score) { + best = rec; + } + } + + // If nothing meets requirements, return the largest type available + if (best.suitability_score < 0 && !types_.empty()) { + auto it = std::max_element( + types_.begin(), types_.end(), + [](const TypeCharacteristics& a, const TypeCharacteristics& b) { + if (a.total_bits != b.total_bits) return a.total_bits < b.total_bits; + return a.max_value < b.max_value; + }); + best.type = *it; + best.suitability_score = 0; + best.meets_accuracy = false; + best.meets_range = false; + best.rationale = "No type satisfies both range and " + std::to_string(nsb) + "-bit requirement"; + best.estimated_energy = best.type.energy_per_fma / 1.5; + } + + return best; + } + /// Print recommendations report template void report(std::ostream& ostr, diff --git a/mixedprecision/pop/CMakeLists.txt b/mixedprecision/pop/CMakeLists.txt new file mode 100644 index 000000000..79c610a04 --- /dev/null +++ b/mixedprecision/pop/CMakeLists.txt @@ -0,0 +1,3 @@ +file (GLOB SOURCES "./*.cpp") + +compile_all("true" "pop" "Mixed-Precision/POP" "${SOURCES}") diff --git a/mixedprecision/pop/test_carry_analysis.cpp b/mixedprecision/pop/test_carry_analysis.cpp new file mode 100644 index 000000000..a03888d82 --- /dev/null +++ b/mixedprecision/pop/test_carry_analysis.cpp @@ -0,0 +1,178 @@ +// test_carry_analysis.cpp: validate carry-bit refinement via policy iteration +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. + +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +// Test that carry analysis converges +int TestConvergence() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int x = g.variable("x", 1.0, 8.0); + int y = g.variable("y", 1.0, 8.0); + int z = g.mul(x, y); + + g.require_nsb(z, 10); + + CarryAnalyzer ca; + int iters = ca.refine(g); + + std::cout << "Simple mul converged in " << iters << " iterations\n"; + ca.report(std::cout, g); + + // Should converge (not exceed max iterations) + if (iters >= 10) { + std::cerr << "FAIL: carry analysis did not converge" << std::endl; + ++nrOfFailedTestCases; + } + + // z should still meet its requirement + if (g.get_nsb(z) < 10) { + std::cerr << "FAIL: z requirement not met after carry analysis" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Test that carry refinement can reduce total bits +int TestBitReduction() { + int nrOfFailedTestCases = 0; + + // Build graph: det = a*d - b*c + ExprGraph g; + int a = g.variable("a", 8.0, 12.0); + int b = g.variable("b", 8.0, 12.0); + int c = g.variable("c", 8.0, 12.0); + int d = g.variable("d", 8.0, 12.0); + int ad = g.mul(a, d); + int bc = g.mul(b, c); + int det = g.sub(ad, bc); + + g.require_nsb(det, 20); + + // Solve with conservative carries (all 1) + PopSolver conservative; + { + ExprGraph g2; + int a2 = g2.variable("a", 8.0, 12.0); + int b2 = g2.variable("b", 8.0, 12.0); + int c2 = g2.variable("c", 8.0, 12.0); + int d2 = g2.variable("d", 8.0, 12.0); + int ad2 = g2.mul(a2, d2); + int bc2 = g2.mul(b2, c2); + int det2 = g2.sub(ad2, bc2); + g2.require_nsb(det2, 20); + conservative.solve(g2); + std::cout << "Conservative total: " << conservative.total_nsb() << "\n"; + } + + // Solve with carry refinement + CarryAnalyzer ca; + ca.refine(g); + + double refined_total = 0; + for (int i = 0; i < g.size(); ++i) { + refined_total += g.get_nsb(i); + } + + std::cout << "Refined total: " << refined_total << "\n"; + ca.report(std::cout, g); + + // Refined should be <= conservative + if (refined_total > conservative.total_nsb() + 1) { + std::cerr << "FAIL: refined total exceeds conservative" << std::endl; + ++nrOfFailedTestCases; + } + + // Output should still meet requirement + if (g.get_nsb(det) < 20) { + std::cerr << "FAIL: det requirement not met after carry refinement" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Test chain with addition (where carry analysis is most effective) +int TestAdditionChain() { + int nrOfFailedTestCases = 0; + + // z = (a + b) + c with values of very different magnitudes + ExprGraph g; + int a = g.variable("a", 1000.0, 2000.0); // ufp ~= 10 + int b = g.variable("b", 0.001, 0.002); // ufp ~= -10 + int c = g.variable("c", 1000.0, 2000.0); // ufp ~= 10 + + int ab = g.add(a, b); + int z = g.add(ab, c); + + g.require_nsb(z, 12); + + CarryAnalyzer ca; + int iters = ca.refine(g); + + std::cout << "Addition chain converged in " << iters << " iterations\n"; + ca.report(std::cout, g); + + if (g.get_nsb(z) < 12) { + std::cerr << "FAIL: addition chain z requirement not met" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +}} // namespace sw::universal + +#define TEST_CASE(name, func) \ + do { \ + int fails = func; \ + if (fails) { \ + std::cout << name << ": FAIL (" << fails << " errors)" << std::endl; \ + nrOfFailedTestCases += fails; \ + } else { \ + std::cout << name << ": PASS" << std::endl; \ + } \ + } while(0) + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "POP Carry Analysis Tests\n"; + std::cout << std::string(40, '=') << "\n\n"; + + TEST_CASE("Convergence", TestConvergence()); + TEST_CASE("Bit reduction", TestBitReduction()); + TEST_CASE("Addition chain", TestAdditionChain()); + + std::cout << "\n"; + if (nrOfFailedTestCases == 0) { + std::cout << "All carry analysis tests PASSED\n"; + } else { + std::cout << nrOfFailedTestCases << " test(s) FAILED\n"; + } + + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (const char* msg) { + std::cerr << "Caught exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/mixedprecision/pop/test_codegen.cpp b/mixedprecision/pop/test_codegen.cpp new file mode 100644 index 000000000..240390d32 --- /dev/null +++ b/mixedprecision/pop/test_codegen.cpp @@ -0,0 +1,166 @@ +// test_codegen.cpp: validate POP code generation +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. + +#include +#include +#include +#include + +namespace sw { namespace universal { + +// Test header generation +int TestHeaderGeneration() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int a = g.variable("a", 1.0, 10.0); + int b = g.variable("b", 1.0, 10.0); + int c = g.mul(a, b); + (void)a; (void)b; + + g.require_nsb(c, 10); + + PopSolver solver; + solver.solve(g); + + PopCodeGenerator gen(g); + std::string header = gen.generateHeader(); + + // Should contain #pragma once and type aliases + if (header.find("#pragma once") == std::string::npos) { + std::cerr << "FAIL: header missing #pragma once" << std::endl; + ++nrOfFailedTestCases; + } + if (header.find("type_a") == std::string::npos) { + std::cerr << "FAIL: header missing type_a alias" << std::endl; + ++nrOfFailedTestCases; + } + if (header.find("type_b") == std::string::npos) { + std::cerr << "FAIL: header missing type_b alias" << std::endl; + ++nrOfFailedTestCases; + } + if (header.find("sw::universal::") == std::string::npos) { + std::cerr << "FAIL: header missing sw::universal:: prefix" << std::endl; + ++nrOfFailedTestCases; + } + + std::cout << "Generated header:\n" << header << "\n"; + + return nrOfFailedTestCases; +} + +// Test report generation +int TestReportGeneration() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int a = g.variable("a", 1.0, 10.0); + int b = g.variable("b", 1.0, 10.0); + int c = g.mul(a, b); + (void)a; (void)b; + + g.require_nsb(c, 10); + + PopSolver solver; + solver.solve(g); + + PopCodeGenerator gen(g); + std::string report = gen.generateReport(); + + if (report.find("POP Precision Analysis Report") == std::string::npos) { + std::cerr << "FAIL: report missing title" << std::endl; + ++nrOfFailedTestCases; + } + if (report.find("Bit savings") == std::string::npos) { + std::cerr << "FAIL: report missing savings calculation" << std::endl; + ++nrOfFailedTestCases; + } + + std::cout << report << "\n"; + + return nrOfFailedTestCases; +} + +// Test example code generation +int TestExampleCodeGeneration() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int a = g.variable("a", 1.0, 10.0); + int b = g.variable("b", 1.0, 10.0); + int c = g.mul(a, b); + (void)a; (void)b; + + g.require_nsb(c, 10); + + PopSolver solver; + solver.solve(g); + + PopCodeGenerator gen(g); + std::string code = gen.generateExampleCode("multiply"); + + if (code.find("auto multiply(") == std::string::npos) { + std::cerr << "FAIL: code missing function signature" << std::endl; + ++nrOfFailedTestCases; + } + if (code.find("a * b") == std::string::npos) { + std::cerr << "FAIL: code missing multiplication" << std::endl; + ++nrOfFailedTestCases; + } + if (code.find("return") == std::string::npos) { + std::cerr << "FAIL: code missing return statement" << std::endl; + ++nrOfFailedTestCases; + } + + std::cout << "Generated code:\n" << code << "\n"; + + return nrOfFailedTestCases; +} + +}} // namespace sw::universal + +#define TEST_CASE(name, func) \ + do { \ + int fails = func; \ + if (fails) { \ + std::cout << name << ": FAIL (" << fails << " errors)" << std::endl; \ + nrOfFailedTestCases += fails; \ + } else { \ + std::cout << name << ": PASS" << std::endl; \ + } \ + } while(0) + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "POP Code Generation Tests\n"; + std::cout << std::string(40, '=') << "\n\n"; + + TEST_CASE("Header generation", TestHeaderGeneration()); + TEST_CASE("Report generation", TestReportGeneration()); + TEST_CASE("Example code generation", TestExampleCodeGeneration()); + + std::cout << "\n"; + if (nrOfFailedTestCases == 0) { + std::cout << "All code generation tests PASSED\n"; + } else { + std::cout << nrOfFailedTestCases << " test(s) FAILED\n"; + } + + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (const char* msg) { + std::cerr << "Caught exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/mixedprecision/pop/test_complete_workflow.cpp b/mixedprecision/pop/test_complete_workflow.cpp new file mode 100644 index 000000000..743bcfe60 --- /dev/null +++ b/mixedprecision/pop/test_complete_workflow.cpp @@ -0,0 +1,262 @@ +// test_complete_workflow.cpp: end-to-end POP precision tuning workflow +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// Demonstrates the complete POP workflow: +// 1. Profile with range_analyzer (dynamic analysis) +// 2. Build expression graph +// 3. Run LP-based optimal bit assignment +// 4. Refine carries +// 5. Generate mixed-precision code +// +// Uses the umbrella header . + +#include +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +// Simulate profiling a dot product: result = sum(a[i] * b[i]) +// This models a real workflow where range_analyzer tracks observed values +int TestDotProductWorkflow() { + int nrOfFailedTestCases = 0; + + std::cout << "=== Dot Product Workflow ===\n\n"; + + // Step 1: Profile with range_analyzer + std::cout << "Step 1: Dynamic profiling with range_analyzer\n"; + + range_analyzer ra_a, ra_b, ra_product, ra_sum; + + // Simulate a dot product of 4 elements + double a[] = { 1.5, -2.3, 0.7, 3.1 }; + double b[] = { 4.2, 1.8, 5.5, -0.9 }; + + double sum = 0.0; + for (int i = 0; i < 4; ++i) { + ra_a.observe(a[i]); + ra_b.observe(b[i]); + double prod = a[i] * b[i]; + ra_product.observe(prod); + sum += prod; + ra_sum.observe(sum); + } + + std::cout << " a range: [" << static_cast(ra_a.minValue()) << ", " + << static_cast(ra_a.maxValue()) << "], ufp=" << ra_a.ufp() << "\n"; + std::cout << " b range: [" << static_cast(ra_b.minValue()) << ", " + << static_cast(ra_b.maxValue()) << "], ufp=" << ra_b.ufp() << "\n"; + std::cout << " product range: [" << static_cast(ra_product.minValue()) << ", " + << static_cast(ra_product.maxValue()) << "], ufp=" << ra_product.ufp() << "\n"; + std::cout << " result = " << sum << "\n\n"; + + // Step 2: Build expression graph + std::cout << "Step 2: Build expression graph\n"; + + ExprGraph g; + + // Model: result = a0*b0 + a1*b1 + a2*b2 + a3*b3 + int va = g.variable("a", ra_a); + int vb = g.variable("b", ra_b); + int prod = g.mul(va, vb); + int accum = prod; // start accumulation + + // Chain addition (simplified model for dot product) + for (int i = 1; i < 4; ++i) { + std::string ai = "a" + std::to_string(i); + std::string bi = "b" + std::to_string(i); + int a_i = g.variable(ai, ra_a); + int b_i = g.variable(bi, ra_b); + int p_i = g.mul(a_i, b_i); + accum = g.add(accum, p_i); + } + + // Require 16 significant bits at output + g.require_nsb(accum, 16); + + std::cout << " Graph has " << g.size() << " nodes\n\n"; + + // Step 3: LP-based optimal bit assignment + std::cout << "Step 3: LP-based optimal bit assignment\n"; + + PopSolver solver; + bool ok = solver.solve(g); + if (!ok) { + std::cerr << "FAIL: LP solver failed" << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + solver.report(std::cout, g); + std::cout << "\n"; + + // Step 4: Carry refinement + std::cout << "Step 4: Carry-bit refinement\n"; + + double pre_total = solver.total_nsb(); + CarryAnalyzer ca; + ca.refine(g); + + double post_total = 0; + for (int i = 0; i < g.size(); ++i) post_total += g.get_nsb(i); + + ca.report(std::cout, g); + std::cout << " Pre-refinement total: " << pre_total << "\n"; + std::cout << " Post-refinement total: " << post_total << "\n\n"; + + // Step 5: Code generation + std::cout << "Step 5: Code generation\n"; + + TypeAdvisor advisor; + PopCodeGenerator gen(g, advisor); + + std::cout << gen.generateReport() << "\n"; + std::cout << gen.generateHeader("DOT_PRODUCT_PRECISION_HPP") << "\n"; + + // Verify output meets requirement + if (g.get_nsb(accum) < 16) { + std::cerr << "FAIL: output does not meet 16-bit requirement" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Simpson integration workflow: model f(x) = x^2 integrated on [0,1] +// Simpson: (h/3) * (f(a) + 4*f(m) + f(b)) +int TestSimpsonWorkflow() { + int nrOfFailedTestCases = 0; + + std::cout << "=== Simpson Integration Workflow ===\n\n"; + + ExprGraph g; + + // h = (b-a)/2 = 0.5 + int h = g.constant(0.5, "h"); + + // f(a) = 0^2 = 0, f(m) = 0.5^2 = 0.25, f(b) = 1^2 = 1 + int fa = g.constant(0.0, "fa"); + int fm = g.variable("fm", 0.2, 0.3); // f at midpoint + int fb = g.variable("fb", 0.9, 1.1); // f at endpoint + + // Simpson formula: (h/3) * (fa + 4*fm + fb) + int four = g.constant(4.0, "four"); + int three = g.constant(3.0, "three"); + + int fm4 = g.mul(four, fm); // 4 * f(m) + int sum1 = g.add(fa, fm4); // f(a) + 4*f(m) + int sum2 = g.add(sum1, fb); // f(a) + 4*f(m) + f(b) + int h_div_3 = g.div(h, three); // h/3 + int result = g.mul(h_div_3, sum2); // (h/3) * (...) + + // Require 24 bits at the integration result + g.require_nsb(result, 24); + + // Run full POP analysis + PopSolver solver; + bool ok = solver.solve(g); + if (!ok) { + std::cerr << "FAIL: Simpson LP solver failed" << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + // Carry refinement + CarryAnalyzer ca; + ca.refine(g); + + // Report + TypeAdvisor advisor; + g.report(std::cout, advisor); + + PopCodeGenerator gen(g, advisor); + std::cout << gen.generateReport() << "\n"; + + if (g.get_nsb(result) < 24) { + std::cerr << "FAIL: Simpson result does not meet 24-bit requirement" << std::endl; + ++nrOfFailedTestCases; + } + + std::cout << "Simpson integration: PASS\n\n"; + + return nrOfFailedTestCases; +} + +// Test iterative fixpoint (no LP) workflow using pop.hpp umbrella +int TestFixpointOnlyWorkflow() { + int nrOfFailedTestCases = 0; + + std::cout << "=== Fixpoint-Only Workflow (no LP) ===\n\n"; + + ExprGraph g; + int x = g.variable("x", 1.0, 100.0); + int y = g.variable("y", 1.0, 100.0); + int z = g.mul(x, y); + int w = g.add(z, x); + + g.require_nsb(w, 20); + + // Use iterative fixpoint analysis (Phase 2 only, no LP) + g.analyze(); + + std::cout << "Fixpoint analysis:\n"; + g.report(std::cout); + + if (g.get_nsb(w) < 20) { + std::cerr << "FAIL: fixpoint output does not meet requirement" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +}} // namespace sw::universal + +#define TEST_CASE(name, func) \ + do { \ + int fails = func; \ + if (fails) { \ + std::cout << name << ": FAIL (" << fails << " errors)" << std::endl; \ + nrOfFailedTestCases += fails; \ + } else { \ + std::cout << name << ": PASS" << std::endl; \ + } \ + } while(0) + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "POP Complete Workflow Tests\n"; + std::cout << std::string(50, '=') << "\n\n"; + + TEST_CASE("Dot product workflow", TestDotProductWorkflow()); + TEST_CASE("Simpson integration workflow", TestSimpsonWorkflow()); + TEST_CASE("Fixpoint-only workflow", TestFixpointOnlyWorkflow()); + + std::cout << "\n"; + if (nrOfFailedTestCases == 0) { + std::cout << "All complete workflow tests PASSED\n"; + } else { + std::cout << nrOfFailedTestCases << " test(s) FAILED\n"; + } + + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (const char* msg) { + std::cerr << "Caught exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/mixedprecision/pop/test_expression_graph.cpp b/mixedprecision/pop/test_expression_graph.cpp new file mode 100644 index 000000000..8c89ecad7 --- /dev/null +++ b/mixedprecision/pop/test_expression_graph.cpp @@ -0,0 +1,255 @@ +// test_expression_graph.cpp: validate ExprGraph construction and analysis +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// Tests the expression graph DAG builder, forward/backward analysis, +// and integration with TypeAdvisor for type recommendations. + +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +// Test basic graph construction +int TestGraphConstruction() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int a = g.variable("a", 1.0, 10.0); + int b = g.variable("b", 1.0, 10.0); + int c = g.add(a, b); + + if (g.size() != 3) { + std::cerr << "FAIL: expected 3 nodes, got " << g.size() << std::endl; + ++nrOfFailedTestCases; + } + + auto& node_c = g.get_node(c); + if (node_c.op != OpKind::Add) { + std::cerr << "FAIL: expected Add op" << std::endl; + ++nrOfFailedTestCases; + } + if (node_c.lhs != a || node_c.rhs != b) { + std::cerr << "FAIL: wrong input edges" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Test the determinant example: det = a*d - b*c +// With high accuracy requirement on det, backward analysis should +// propagate higher precision requirements to inputs +int TestDeterminantAnalysis() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + + // Matrix entries: all in [8, 12] range (nearly singular) + int a = g.variable("a", 8.0, 12.0); // ufp = 3 + int b = g.variable("b", 8.0, 12.0); // ufp = 3 + int c = g.variable("c", 8.0, 12.0); // ufp = 3 + int d = g.variable("d", 8.0, 12.0); // ufp = 3 + + // Products: a*d in [64, 144], b*c in [64, 144] + int ad = g.mul(a, d); // ufp ~ 7 + int bc = g.mul(b, c); // ufp ~ 7 + + // Determinant: det = a*d - b*c, range [-80, 80], ufp ~ 6 + int det = g.sub(ad, bc); + + // Require 20 significant bits at the output + g.require_nsb(det, 20); + + // Run analysis + g.analyze(); + + // The determinant should get at least 20 bits + if (g.get_nsb(det) < 20) { + std::cerr << "FAIL: det nsb should be >= 20, got " << g.get_nsb(det) << std::endl; + ++nrOfFailedTestCases; + } + + // The products should need more bits than the output (due to subtraction) + if (g.get_nsb(ad) < g.get_nsb(det)) { + std::cerr << "FAIL: ad should need >= det bits due to cancellation" << std::endl; + ++nrOfFailedTestCases; + } + + // The input variables should need even more (mul adds carry) + if (g.get_nsb(a) < g.get_nsb(ad)) { + std::cerr << "FAIL: input a should need >= ad bits" << std::endl; + ++nrOfFailedTestCases; + } + + std::cout << "Determinant example analysis:\n"; + g.report(std::cout); + + return nrOfFailedTestCases; +} + +// Test simple multiplication chain: z = x * y, require 10 bits at z +int TestSimpleMulBackward() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int x = g.variable("x", 1.0, 8.0); + int y = g.variable("y", 1.0, 8.0); + int z = g.mul(x, y); + + g.require_nsb(z, 10); + g.analyze(); + + // Backward through mul: nsb(x) >= nsb(z) + carry = 11 + if (g.get_nsb(x) < 11) { + std::cerr << "FAIL: mul backward x expected >= 11, got " << g.get_nsb(x) << std::endl; + ++nrOfFailedTestCases; + } + if (g.get_nsb(y) < 11) { + std::cerr << "FAIL: mul backward y expected >= 11, got " << g.get_nsb(y) << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Test with range_analyzer integration +int TestRangeAnalyzerIntegration() { + int nrOfFailedTestCases = 0; + + // Simulate: we observed values in [0.5, 100.0] + range_analyzer ra; + ra.observe(0.5); + ra.observe(100.0); + ra.observe(50.0); + ra.observe(75.0); + + ExprGraph g; + int x = g.variable("x", ra); + + auto& node = g.get_node(x); + // lo should be 0.5, hi should be 100.0 + if (node.lo != 0.5 || node.hi != 100.0) { + std::cerr << "FAIL: range_analyzer bridge lo/hi mismatch" << std::endl; + ++nrOfFailedTestCases; + } + + // ufp should match range_analyzer + if (node.ufp != ra.ufp()) { + std::cerr << "FAIL: ufp mismatch: node=" << node.ufp << ", analyzer=" << ra.ufp() << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Test TypeAdvisor integration +int TestTypeRecommendation() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int x = g.variable("x", 0.1, 100.0); + int y = g.variable("y", 0.1, 100.0); + int z = g.mul(x, y); + + g.require_nsb(z, 10); + g.analyze(); + + TypeAdvisor advisor; + std::string rec = g.recommended_type(z, advisor); + + // With 10 nsb required, posit<16,1> (12 fraction bits) should suffice + std::cout << "Type recommendation for z (nsb=" << g.get_nsb(z) << "): " << rec << std::endl; + + // The recommendation should not be empty + if (rec.empty()) { + std::cerr << "FAIL: empty type recommendation" << std::endl; + ++nrOfFailedTestCases; + } + + // Print full report with types + g.report(std::cout, advisor); + + return nrOfFailedTestCases; +} + +// Test chain of operations: y = sqrt(a*a + b*b) +int TestPythagoreanAnalysis() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int a = g.variable("a", 1.0, 10.0); + int b = g.variable("b", 1.0, 10.0); + + int a2 = g.mul(a, a); + int b2 = g.mul(b, b); + int sum = g.add(a2, b2); + int result = g.sqrt(sum); + + g.require_nsb(result, 16); + g.analyze(); + + std::cout << "Pythagorean analysis (require 16 bits at sqrt):\n"; + g.report(std::cout); + + // result should have at least 16 bits + if (g.get_nsb(result) < 16) { + std::cerr << "FAIL: pythagorean result should have >= 16 bits" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +}} // namespace sw::universal + +#define TEST_CASE(name, func) \ + do { \ + int fails = func; \ + if (fails) { \ + std::cout << name << ": FAIL (" << fails << " errors)" << std::endl; \ + nrOfFailedTestCases += fails; \ + } else { \ + std::cout << name << ": PASS" << std::endl; \ + } \ + } while(0) + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "POP Expression Graph Tests\n"; + std::cout << std::string(40, '=') << "\n\n"; + + TEST_CASE("Graph construction", TestGraphConstruction()); + TEST_CASE("Simple mul backward", TestSimpleMulBackward()); + TEST_CASE("Determinant analysis", TestDeterminantAnalysis()); + TEST_CASE("Range analyzer integration", TestRangeAnalyzerIntegration()); + TEST_CASE("Type recommendation", TestTypeRecommendation()); + TEST_CASE("Pythagorean analysis", TestPythagoreanAnalysis()); + + std::cout << "\n"; + if (nrOfFailedTestCases == 0) { + std::cout << "All expression graph tests PASSED\n"; + } else { + std::cout << nrOfFailedTestCases << " test(s) FAILED\n"; + } + + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (const char* msg) { + std::cerr << "Caught exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/mixedprecision/pop/test_pop_solver.cpp b/mixedprecision/pop/test_pop_solver.cpp new file mode 100644 index 000000000..cd4d18e31 --- /dev/null +++ b/mixedprecision/pop/test_pop_solver.cpp @@ -0,0 +1,202 @@ +// test_pop_solver.cpp: end-to-end LP-based optimal bit assignment +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// Tests the PopSolver which translates an ExprGraph into an LP, +// solves it, and writes optimal nsb values back to the graph. + +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +// Test simple multiplication: z = x * y, require 10 bits at z +int TestSimpleMul() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int x = g.variable("x", 1.0, 8.0); + int y = g.variable("y", 1.0, 8.0); + int z = g.mul(x, y); + + g.require_nsb(z, 10); + + PopSolver solver; + bool ok = solver.solve(g); + + if (!ok) { + std::cerr << "FAIL: LP solver returned " << to_string(solver.status()) << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + // z should be exactly 10 + if (g.get_nsb(z) != 10) { + std::cerr << "FAIL: z nsb expected 10, got " << g.get_nsb(z) << std::endl; + ++nrOfFailedTestCases; + } + + // x, y should be 10 + carry = 11 + if (g.get_nsb(x) < 11) { + std::cerr << "FAIL: x nsb expected >= 11, got " << g.get_nsb(x) << std::endl; + ++nrOfFailedTestCases; + } + if (g.get_nsb(y) < 11) { + std::cerr << "FAIL: y nsb expected >= 11, got " << g.get_nsb(y) << std::endl; + ++nrOfFailedTestCases; + } + + std::cout << "Simple mul LP solution:\n"; + solver.report(std::cout, g); + + return nrOfFailedTestCases; +} + +// Test determinant: det = a*d - b*c +int TestDeterminant() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + + int a = g.variable("a", 8.0, 12.0); + int b = g.variable("b", 8.0, 12.0); + int c = g.variable("c", 8.0, 12.0); + int d = g.variable("d", 8.0, 12.0); + + int ad = g.mul(a, d); + int bc = g.mul(b, c); + int det = g.sub(ad, bc); + + g.require_nsb(det, 20); + + PopSolver solver; + bool ok = solver.solve(g); + + if (!ok) { + std::cerr << "FAIL: LP solver returned " << to_string(solver.status()) << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + // det should be at least 20 + if (g.get_nsb(det) < 20) { + std::cerr << "FAIL: det nsb expected >= 20, got " << g.get_nsb(det) << std::endl; + ++nrOfFailedTestCases; + } + + // LP should find optimal (minimum total bits) + std::cout << "Determinant LP solution (total=" << solver.total_nsb() << "):\n"; + solver.report(std::cout, g); + + // Verify the LP solution is at least as good as or equal to fixpoint + ExprGraph g2; + int a2 = g2.variable("a", 8.0, 12.0); + int b2 = g2.variable("b", 8.0, 12.0); + int c2 = g2.variable("c", 8.0, 12.0); + int d2 = g2.variable("d", 8.0, 12.0); + int ad2 = g2.mul(a2, d2); + int bc2 = g2.mul(b2, c2); + int det2 = g2.sub(ad2, bc2); + g2.require_nsb(det2, 20); + g2.analyze(); + + double fixpoint_total = 0; + double lp_total = solver.total_nsb(); + for (int i = 0; i < g2.size(); ++i) { + fixpoint_total += g2.get_nsb(i); + } + + std::cout << "Fixpoint total: " << fixpoint_total << ", LP total: " << lp_total << "\n"; + + // LP should be <= fixpoint (optimal is at most as good as conservative) + if (lp_total > fixpoint_total + 1) { + std::cerr << "WARNING: LP total exceeds fixpoint total (may be due to rounding)\n"; + // Not a hard failure since rounding can differ + } + + return nrOfFailedTestCases; +} + +// Test chain: z = (a + b) * c, require 12 bits at z +int TestChain() { + int nrOfFailedTestCases = 0; + + ExprGraph g; + int a = g.variable("a", 1.0, 10.0); + int b = g.variable("b", 1.0, 10.0); + int c = g.variable("c", 1.0, 10.0); + + int sum = g.add(a, b); + int z = g.mul(sum, c); + + g.require_nsb(z, 12); + + PopSolver solver; + bool ok = solver.solve(g); + + if (!ok) { + std::cerr << "FAIL: chain LP solver failed" << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + if (g.get_nsb(z) < 12) { + std::cerr << "FAIL: chain z expected >= 12, got " << g.get_nsb(z) << std::endl; + ++nrOfFailedTestCases; + } + + std::cout << "Chain LP solution:\n"; + solver.report(std::cout, g); + + return nrOfFailedTestCases; +} + +}} // namespace sw::universal + +#define TEST_CASE(name, func) \ + do { \ + int fails = func; \ + if (fails) { \ + std::cout << name << ": FAIL (" << fails << " errors)" << std::endl; \ + nrOfFailedTestCases += fails; \ + } else { \ + std::cout << name << ": PASS" << std::endl; \ + } \ + } while(0) + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "POP LP Solver Tests\n"; + std::cout << std::string(40, '=') << "\n\n"; + + TEST_CASE("Simple multiplication LP", TestSimpleMul()); + TEST_CASE("Determinant LP", TestDeterminant()); + TEST_CASE("Chain LP", TestChain()); + + std::cout << "\n"; + if (nrOfFailedTestCases == 0) { + std::cout << "All POP solver tests PASSED\n"; + } else { + std::cout << nrOfFailedTestCases << " test(s) FAILED\n"; + } + + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (const char* msg) { + std::cerr << "Caught exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/mixedprecision/pop/test_simplex.cpp b/mixedprecision/pop/test_simplex.cpp new file mode 100644 index 000000000..0599350f2 --- /dev/null +++ b/mixedprecision/pop/test_simplex.cpp @@ -0,0 +1,230 @@ +// test_simplex.cpp: validate the embedded simplex LP solver +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. + +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +// Test 1: simple 2-variable LP +// minimize x + y +// subject to x >= 3 +// y >= 5 +// Solution: x=3, y=5, objective=8 +int TestSimple2Var() { + int nrOfFailedTestCases = 0; + + SimplexSolver lp; + lp.set_num_vars(2); + lp.set_objective({1.0, 1.0}); + + lp.add_ge_constraint({1.0, 0.0}, 3.0); + lp.add_ge_constraint({0.0, 1.0}, 5.0); + + LPStatus status = lp.solve(); + if (status != LPStatus::Optimal) { + std::cerr << "FAIL: expected Optimal, got " << to_string(status) << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + double x = lp.get_value(0); + double y = lp.get_value(1); + + if (std::abs(x - 3.0) > 0.01) { + std::cerr << "FAIL: expected x=3, got " << x << std::endl; + ++nrOfFailedTestCases; + } + if (std::abs(y - 5.0) > 0.01) { + std::cerr << "FAIL: expected y=5, got " << y << std::endl; + ++nrOfFailedTestCases; + } + if (std::abs(lp.objective_value() - 8.0) > 0.01) { + std::cerr << "FAIL: expected obj=8, got " << lp.objective_value() << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Test 2: LP with relational constraints +// minimize 2x + 3y +// subject to x + y >= 10 +// x >= 2 +// y >= 3 +// Solution: x=7, y=3, objective=23 +int TestRelational() { + int nrOfFailedTestCases = 0; + + SimplexSolver lp; + lp.set_num_vars(2); + lp.set_objective({2.0, 3.0}); + + lp.add_ge_constraint({1.0, 1.0}, 10.0); + lp.add_ge_constraint({1.0, 0.0}, 2.0); + lp.add_ge_constraint({0.0, 1.0}, 3.0); + + LPStatus status = lp.solve(); + if (status != LPStatus::Optimal) { + std::cerr << "FAIL: expected Optimal, got " << to_string(status) << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + double x = lp.get_value(0); + double y = lp.get_value(1); + + if (std::abs(x - 7.0) > 0.01) { + std::cerr << "FAIL: expected x=7, got " << x << std::endl; + ++nrOfFailedTestCases; + } + if (std::abs(y - 3.0) > 0.01) { + std::cerr << "FAIL: expected y=3, got " << y << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Test 3: constraints that mimic POP transfer functions +// minimize a + b + z +// subject to a - z >= 1 (backward mul: nsb(a) >= nsb(z) + 1) +// b - z >= 1 (backward mul: nsb(b) >= nsb(z) + 1) +// z >= 10 (output requirement) +// Solution: a=11, b=11, z=10, objective=32 +int TestPopLikeConstraints() { + int nrOfFailedTestCases = 0; + + SimplexSolver lp; + lp.set_num_vars(3); + lp.set_objective({1.0, 1.0, 1.0}); + + // a >= z + 1 => a - z >= 1 + lp.add_ge_constraint({1.0, 0.0, -1.0}, 1.0); + // b >= z + 1 => b - z >= 1 + lp.add_ge_constraint({0.0, 1.0, -1.0}, 1.0); + // z >= 10 + lp.add_ge_constraint({0.0, 0.0, 1.0}, 10.0); + // a,b,z >= 1 + lp.add_ge_constraint({1.0, 0.0, 0.0}, 1.0); + lp.add_ge_constraint({0.0, 1.0, 0.0}, 1.0); + + LPStatus status = lp.solve(); + if (status != LPStatus::Optimal) { + std::cerr << "FAIL: expected Optimal, got " << to_string(status) << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + double a = lp.get_value(0); + double b = lp.get_value(1); + double z = lp.get_value(2); + + if (std::abs(a - 11.0) > 0.01) { + std::cerr << "FAIL: expected a=11, got " << a << std::endl; + ++nrOfFailedTestCases; + } + if (std::abs(b - 11.0) > 0.01) { + std::cerr << "FAIL: expected b=11, got " << b << std::endl; + ++nrOfFailedTestCases; + } + if (std::abs(z - 10.0) > 0.01) { + std::cerr << "FAIL: expected z=10, got " << z << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +// Test 4: 3-variable LP with mixed constraints +// minimize x + y + z +// subject to x + y >= 5 +// y + z >= 7 +// x >= 1 +// z >= 1 +// Solution: x=1, y=4, z=3, objective=8 +int TestThreeVar() { + int nrOfFailedTestCases = 0; + + SimplexSolver lp; + lp.set_num_vars(3); + lp.set_objective({1.0, 1.0, 1.0}); + + lp.add_ge_constraint({1.0, 1.0, 0.0}, 5.0); + lp.add_ge_constraint({0.0, 1.0, 1.0}, 7.0); + lp.add_ge_constraint({1.0, 0.0, 0.0}, 1.0); + lp.add_ge_constraint({0.0, 0.0, 1.0}, 1.0); + + LPStatus status = lp.solve(); + if (status != LPStatus::Optimal) { + std::cerr << "FAIL: expected Optimal, got " << to_string(status) << std::endl; + ++nrOfFailedTestCases; + return nrOfFailedTestCases; + } + + double x = lp.get_value(0); + double y = lp.get_value(1); + double z = lp.get_value(2); + double obj = lp.objective_value(); + + // x=1, y=4, z=3 + if (std::abs(obj - 8.0) > 0.01) { + std::cerr << "FAIL: expected obj=8, got " << obj + << " (x=" << x << ", y=" << y << ", z=" << z << ")" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +}} // namespace sw::universal + +#define TEST_CASE(name, func) \ + do { \ + int fails = func; \ + if (fails) { \ + std::cout << name << ": FAIL (" << fails << " errors)" << std::endl; \ + nrOfFailedTestCases += fails; \ + } else { \ + std::cout << name << ": PASS" << std::endl; \ + } \ + } while(0) + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "POP Simplex Solver Tests\n"; + std::cout << std::string(40, '=') << "\n\n"; + + TEST_CASE("Simple 2-var LP", TestSimple2Var()); + TEST_CASE("Relational constraints", TestRelational()); + TEST_CASE("POP-like constraints", TestPopLikeConstraints()); + TEST_CASE("Three-variable LP", TestThreeVar()); + + std::cout << "\n"; + if (nrOfFailedTestCases == 0) { + std::cout << "All simplex solver tests PASSED\n"; + } else { + std::cout << nrOfFailedTestCases << " test(s) FAILED\n"; + } + + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (const char* msg) { + std::cerr << "Caught exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/mixedprecision/pop/test_transfer.cpp b/mixedprecision/pop/test_transfer.cpp new file mode 100644 index 000000000..8054a41e9 --- /dev/null +++ b/mixedprecision/pop/test_transfer.cpp @@ -0,0 +1,371 @@ +// test_transfer.cpp: validate POP forward and backward transfer functions +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// Validates transfer functions against known precision propagation results. +// Reference: Dorra Ben Khalifa, "Fast and Efficient Bit-Level Precision Tuning," +// PhD thesis, Université de Perpignan, 2021, Chapter 4. + +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +int TestForwardTransfer() { + int nrOfFailedTestCases = 0; + + // Test forward_add: z = x + y where x ~ 2^3 (8-ish), y ~ 2^1 (2-ish) + // x: ufp=3, nsb=10 -> lsb = 3-10+1 = -6 + // y: ufp=1, nsb=8 -> lsb = 1-8+1 = -6 + // z: ufp_z=3 (from range), carry=1 + // nsb_z = 3 - (-6) + 1 + 1 = 11 + { + precision_info x{3, 10}; + precision_info y{1, 8}; + auto z = forward_add(x, y, 3, 1); + if (z.nsb != 11) { + std::cerr << "FAIL: forward_add expected nsb=11, got " << z.nsb << std::endl; + ++nrOfFailedTestCases; + } + if (z.ufp != 3) { + std::cerr << "FAIL: forward_add expected ufp=3, got " << z.ufp << std::endl; + ++nrOfFailedTestCases; + } + } + + // Test forward_add with different lsb values + // x: ufp=5, nsb=4 -> lsb = 5-4+1 = 2 + // y: ufp=0, nsb=8 -> lsb = 0-8+1 = -7 + // z: ufp_z=5, carry=1 + // nsb_z = 5 - (-7) + 1 + 1 = 14 + { + precision_info x{5, 4}; + precision_info y{0, 8}; + auto z = forward_add(x, y, 5, 1); + if (z.nsb != 14) { + std::cerr << "FAIL: forward_add (mixed lsb) expected nsb=14, got " << z.nsb << std::endl; + ++nrOfFailedTestCases; + } + } + + // Test forward_mul: z = x * y + // nsb(z) = nsb(x) + nsb(y) + carry + { + precision_info x{3, 10}; + precision_info y{2, 8}; + auto z = forward_mul(x, y, 1); + if (z.nsb != 19) { + std::cerr << "FAIL: forward_mul expected nsb=19, got " << z.nsb << std::endl; + ++nrOfFailedTestCases; + } + if (z.ufp != 5) { + std::cerr << "FAIL: forward_mul expected ufp=5, got " << z.ufp << std::endl; + ++nrOfFailedTestCases; + } + } + + // Test forward_mul with carry=0 + { + precision_info x{3, 10}; + precision_info y{2, 8}; + auto z = forward_mul(x, y, 0); + if (z.nsb != 18) { + std::cerr << "FAIL: forward_mul (carry=0) expected nsb=18, got " << z.nsb << std::endl; + ++nrOfFailedTestCases; + } + } + + // Test forward_div: z = x / y + { + precision_info x{5, 12}; + precision_info y{2, 8}; + auto z = forward_div(x, y, 1); + if (z.nsb != 21) { + std::cerr << "FAIL: forward_div expected nsb=21, got " << z.nsb << std::endl; + ++nrOfFailedTestCases; + } + if (z.ufp != 3) { + std::cerr << "FAIL: forward_div expected ufp=3, got " << z.ufp << std::endl; + ++nrOfFailedTestCases; + } + } + + // Test forward_neg and forward_abs: precision unchanged + { + precision_info x{3, 10}; + auto z_neg = forward_neg(x); + auto z_abs = forward_abs(x); + if (z_neg.nsb != 10 || z_neg.ufp != 3) { + std::cerr << "FAIL: forward_neg should preserve precision" << std::endl; + ++nrOfFailedTestCases; + } + if (z_abs.nsb != 10 || z_abs.ufp != 3) { + std::cerr << "FAIL: forward_abs should preserve precision" << std::endl; + ++nrOfFailedTestCases; + } + } + + // Test forward_sqrt: nsb(z) = nsb(x) + carry + { + precision_info x{6, 12}; + auto z = forward_sqrt(x, 3, 1); + if (z.nsb != 13) { + std::cerr << "FAIL: forward_sqrt expected nsb=13, got " << z.nsb << std::endl; + ++nrOfFailedTestCases; + } + if (z.ufp != 3) { + std::cerr << "FAIL: forward_sqrt expected ufp=3, got " << z.ufp << std::endl; + ++nrOfFailedTestCases; + } + } + + return nrOfFailedTestCases; +} + +int TestBackwardTransfer() { + int nrOfFailedTestCases = 0; + + // Backward add: z = x + y, require nsb(z) = 10 + // nsb(x) >= nsb(z) + ufp(z) - ufp(x) + carry + { + int nsb_z = 10, ufp_z = 3, ufp_x = 3, ufp_y = 1; + int nsb_x = backward_add_lhs(nsb_z, ufp_z, ufp_x, 1); + int nsb_y = backward_add_rhs(nsb_z, ufp_z, ufp_y, 1); + // nsb_x = 10 + 3 - 3 + 1 = 11 + if (nsb_x != 11) { + std::cerr << "FAIL: backward_add_lhs expected 11, got " << nsb_x << std::endl; + ++nrOfFailedTestCases; + } + // nsb_y = 10 + 3 - 1 + 1 = 13 + if (nsb_y != 13) { + std::cerr << "FAIL: backward_add_rhs expected 13, got " << nsb_y << std::endl; + ++nrOfFailedTestCases; + } + } + + // Backward add: cancellation scenario (z = x - y where x ~ y) + // x ~ 1000 (ufp=9), y ~ 999 (ufp=9), z ~ 1 (ufp=0) + // To get 10 bits in z, we need: + // nsb(x) >= 10 + 0 - 9 + 1 = 2 ... but that's wrong for cancellation + // Actually the formula correctly handles this: + // When ufp_z << ufp_x, the required nsb_x increases proportionally + { + int nsb_z = 10, ufp_z = 0, ufp_x = 9; + int nsb_x = backward_add_lhs(nsb_z, ufp_z, ufp_x, 1); + // nsb_x = 10 + 0 - 9 + 1 = 2 + // This is correct: the output only needs 2 significant bits from x + // because x and y mostly cancel. But those 2 bits must be correct! + if (nsb_x != 2) { + std::cerr << "FAIL: backward_add_lhs (cancellation) expected 2, got " << nsb_x << std::endl; + ++nrOfFailedTestCases; + } + } + + // Backward sub: same as add + { + int nsb_z = 10, ufp_z = 3, ufp_x = 3; + int nsb_x_add = backward_add_lhs(nsb_z, ufp_z, ufp_x, 1); + int nsb_x_sub = backward_sub_lhs(nsb_z, ufp_z, ufp_x, 1); + if (nsb_x_add != nsb_x_sub) { + std::cerr << "FAIL: backward_sub should match backward_add" << std::endl; + ++nrOfFailedTestCases; + } + } + + // Backward mul: nsb(x) >= nsb(z) + carry + { + int nsb_z = 10; + int nsb_x = backward_mul_lhs(nsb_z, 1); + int nsb_y = backward_mul_rhs(nsb_z, 1); + if (nsb_x != 11) { + std::cerr << "FAIL: backward_mul_lhs expected 11, got " << nsb_x << std::endl; + ++nrOfFailedTestCases; + } + if (nsb_y != 11) { + std::cerr << "FAIL: backward_mul_rhs expected 11, got " << nsb_y << std::endl; + ++nrOfFailedTestCases; + } + } + + // Backward div + { + int nsb_z = 10; + int nsb_x = backward_div_lhs(nsb_z, 1); + int nsb_y = backward_div_rhs(nsb_z, 1); + if (nsb_x != 11) { + std::cerr << "FAIL: backward_div_lhs expected 11, got " << nsb_x << std::endl; + ++nrOfFailedTestCases; + } + if (nsb_y != 11) { + std::cerr << "FAIL: backward_div_rhs expected 11, got " << nsb_y << std::endl; + ++nrOfFailedTestCases; + } + } + + // Backward neg and abs: passthrough + { + if (backward_neg(10) != 10) { + std::cerr << "FAIL: backward_neg should passthrough" << std::endl; + ++nrOfFailedTestCases; + } + if (backward_abs(10) != 10) { + std::cerr << "FAIL: backward_abs should passthrough" << std::endl; + ++nrOfFailedTestCases; + } + } + + // Backward sqrt: nsb(x) >= nsb(z) + carry + { + if (backward_sqrt(10, 1) != 11) { + std::cerr << "FAIL: backward_sqrt expected 11, got " << backward_sqrt(10, 1) << std::endl; + ++nrOfFailedTestCases; + } + } + + return nrOfFailedTestCases; +} + +int TestConstexprTransfer() { + int nrOfFailedTestCases = 0; + + // Verify constexpr evaluation + constexpr precision_info x{3, 10}; + constexpr precision_info y{1, 8}; + constexpr auto z_add = forward_add(x, y, 3, 1); + static_assert(z_add.nsb == 11, "forward_add constexpr failed"); + static_assert(z_add.ufp == 3, "forward_add constexpr ufp failed"); + + constexpr auto z_mul = forward_mul(x, y, 1); + static_assert(z_mul.nsb == 19, "forward_mul constexpr failed"); + + constexpr int bk_add = backward_add_lhs(10, 3, 3, 1); + static_assert(bk_add == 11, "backward_add_lhs constexpr failed"); + + constexpr int bk_mul = backward_mul_lhs(10, 1); + static_assert(bk_mul == 11, "backward_mul_lhs constexpr failed"); + + std::cout << "constexpr transfer function evaluation: PASS" << std::endl; + + return nrOfFailedTestCases; +} + +// Determinant example from thesis: det = a*d - b*c +// With 20-bit output requirement, backward analysis should require +// more precision at inputs due to subtraction cancellation +int TestDeterminantExample() { + int nrOfFailedTestCases = 0; + + // Setup: det(M) = a*d - b*c + // Let a~10, b~9, c~9, d~10 (nearly singular matrix -> cancellation) + // ufp(a)=3, ufp(b)=3, ufp(c)=3, ufp(d)=3 + // ufp(a*d) ~ 6, ufp(b*c) ~ 6 + // det ~ 10*10 - 9*9 = 19, ufp(det) ~ 4 + + // ufp values for the 2x2 matrix entries (all ~10, so ufp=3) + // and their products (all ~100, so ufp=6) + int ufp_ad = 6, ufp_bc = 6; + int ufp_det = 4; + int nsb_det_required = 20; + + // Backward through subtraction: det = ad - bc + int nsb_ad = backward_sub_lhs(nsb_det_required, ufp_det, ufp_ad, 1); + int nsb_bc = backward_sub_rhs(nsb_det_required, ufp_det, ufp_bc, 1); + // nsb_ad = 20 + 4 - 6 + 1 = 19 + // nsb_bc = 20 + 4 - 6 + 1 = 19 + + if (nsb_ad != 19) { + std::cerr << "FAIL: det backward sub lhs expected 19, got " << nsb_ad << std::endl; + ++nrOfFailedTestCases; + } + if (nsb_bc != 19) { + std::cerr << "FAIL: det backward sub rhs expected 19, got " << nsb_bc << std::endl; + ++nrOfFailedTestCases; + } + + // Backward through multiplication: ad = a * d + int nsb_a = backward_mul_lhs(nsb_ad, 1); + int nsb_d = backward_mul_rhs(nsb_ad, 1); + // nsb_a = 19 + 1 = 20, nsb_d = 19 + 1 = 20 + + if (nsb_a != 20) { + std::cerr << "FAIL: det backward mul a expected 20, got " << nsb_a << std::endl; + ++nrOfFailedTestCases; + } + if (nsb_d != 20) { + std::cerr << "FAIL: det backward mul d expected 20, got " << nsb_d << std::endl; + ++nrOfFailedTestCases; + } + + // Backward through multiplication: bc = b * c + int nsb_b = backward_mul_lhs(nsb_bc, 1); + int nsb_c = backward_mul_rhs(nsb_bc, 1); + + if (nsb_b != 20) { + std::cerr << "FAIL: det backward mul b expected 20, got " << nsb_b << std::endl; + ++nrOfFailedTestCases; + } + if (nsb_c != 20) { + std::cerr << "FAIL: det backward mul c expected 20, got " << nsb_c << std::endl; + ++nrOfFailedTestCases; + } + + std::cout << "Determinant example: requiring " << nsb_det_required << " bits at output\n"; + std::cout << " a needs " << nsb_a << " bits, b needs " << nsb_b << " bits\n"; + std::cout << " c needs " << nsb_c << " bits, d needs " << nsb_d << " bits\n"; + std::cout << " a*d intermediate needs " << nsb_ad << " bits\n"; + std::cout << " b*c intermediate needs " << nsb_bc << " bits\n"; + + return nrOfFailedTestCases; +} + +}} // namespace sw::universal + +// Receive a test name and report pass/fail +#define TEST_CASE(name, func) \ + do { \ + int fails = func; \ + if (fails) { \ + std::cout << name << ": FAIL (" << fails << " errors)" << std::endl; \ + nrOfFailedTestCases += fails; \ + } else { \ + std::cout << name << ": PASS" << std::endl; \ + } \ + } while(0) + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "POP Transfer Function Tests\n"; + std::cout << std::string(40, '=') << "\n\n"; + + TEST_CASE("Forward transfer", TestForwardTransfer()); + TEST_CASE("Backward transfer", TestBackwardTransfer()); + TEST_CASE("Constexpr transfer", TestConstexprTransfer()); + TEST_CASE("Determinant example", TestDeterminantExample()); + + std::cout << "\n"; + if (nrOfFailedTestCases == 0) { + std::cout << "All transfer function tests PASSED\n"; + } else { + std::cout << nrOfFailedTestCases << " test(s) FAILED\n"; + } + + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (const char* msg) { + std::cerr << "Caught exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/mixedprecision/pop/test_ufp.cpp b/mixedprecision/pop/test_ufp.cpp new file mode 100644 index 000000000..c99e3df38 --- /dev/null +++ b/mixedprecision/pop/test_ufp.cpp @@ -0,0 +1,195 @@ +// test_ufp.cpp: validate UFP (unit in the first place) computation +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project. +// +// Tests compute_ufp against known values and validates integration +// with range_analyzer. + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +int TestUfpBasic() { + int nrOfFailedTestCases = 0; + + // Powers of 2 + struct test_case { double value; int expected_ufp; }; + std::vector cases = { + { 1.0, 0 }, + { 2.0, 1 }, + { 4.0, 2 }, + { 8.0, 3 }, + { 16.0, 4 }, + { 0.5, -1 }, + { 0.25, -2 }, + { 0.125, -3 }, + { 1024.0, 10 }, + }; + + for (const auto& tc : cases) { + int result = compute_ufp(tc.value); + if (result != tc.expected_ufp) { + std::cerr << "FAIL: compute_ufp(" << tc.value << ") = " << result + << ", expected " << tc.expected_ufp << std::endl; + ++nrOfFailedTestCases; + } + } + + // Non-powers of 2 + // ufp(3.0) = floor(log2(3)) = 1 + if (compute_ufp(3.0) != 1) { + std::cerr << "FAIL: compute_ufp(3.0) = " << compute_ufp(3.0) << ", expected 1" << std::endl; + ++nrOfFailedTestCases; + } + + // ufp(7.0) = floor(log2(7)) = 2 + if (compute_ufp(7.0) != 2) { + std::cerr << "FAIL: compute_ufp(7.0) = " << compute_ufp(7.0) << ", expected 2" << std::endl; + ++nrOfFailedTestCases; + } + + // ufp(0.3) = floor(log2(0.3)) = -2 + if (compute_ufp(0.3) != -2) { + std::cerr << "FAIL: compute_ufp(0.3) = " << compute_ufp(0.3) << ", expected -2" << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +int TestUfpNegative() { + int nrOfFailedTestCases = 0; + + // Negative values should give same ufp as positive + if (compute_ufp(-1.0) != 0) { + std::cerr << "FAIL: compute_ufp(-1.0) expected 0, got " << compute_ufp(-1.0) << std::endl; + ++nrOfFailedTestCases; + } + if (compute_ufp(-8.0) != 3) { + std::cerr << "FAIL: compute_ufp(-8.0) expected 3, got " << compute_ufp(-8.0) << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +int TestUfpSpecialValues() { + int nrOfFailedTestCases = 0; + + // Zero should return sentinel + int ufp_zero = compute_ufp(0.0); + if (ufp_zero != std::numeric_limits::min()) { + std::cerr << "FAIL: compute_ufp(0.0) should return INT_MIN, got " << ufp_zero << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +int TestUfpRange() { + int nrOfFailedTestCases = 0; + + // Test ufp from range [lo, hi] + int ufp = compute_ufp(-3.0, 10.0); + // max(|-3|, |10|) = 10, ufp(10) = 3 + if (ufp != 3) { + std::cerr << "FAIL: compute_ufp(-3, 10) expected 3, got " << ufp << std::endl; + ++nrOfFailedTestCases; + } + + ufp = compute_ufp(-100.0, 50.0); + // max(100, 50) = 100, ufp(100) = 6 + if (ufp != 6) { + std::cerr << "FAIL: compute_ufp(-100, 50) expected 6, got " << ufp << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +int TestUfpRangeAnalyzerIntegration() { + int nrOfFailedTestCases = 0; + + // Create a range_analyzer, observe some values, check ufp + range_analyzer ra; + ra.observe(1.0); + ra.observe(8.5); + ra.observe(0.5); + ra.observe(-3.0); + + int ufp = ufp_from_analyzer(ra); + // maxScale should be floor(log2(8.5)) = 3 + if (ufp != 3) { + std::cerr << "FAIL: ufp_from_analyzer expected 3, got " << ufp << std::endl; + ++nrOfFailedTestCases; + } + + // Larger values + range_analyzer ra2; + ra2.observe(1024.0); + ra2.observe(0.001); + int ufp2 = ufp_from_analyzer(ra2); + // maxScale = floor(log2(1024)) = 10 + if (ufp2 != 10) { + std::cerr << "FAIL: ufp_from_analyzer (large) expected 10, got " << ufp2 << std::endl; + ++nrOfFailedTestCases; + } + + return nrOfFailedTestCases; +} + +}} // namespace sw::universal + +#define TEST_CASE(name, func) \ + do { \ + int fails = func; \ + if (fails) { \ + std::cout << name << ": FAIL (" << fails << " errors)" << std::endl; \ + nrOfFailedTestCases += fails; \ + } else { \ + std::cout << name << ": PASS" << std::endl; \ + } \ + } while(0) + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "POP UFP Computation Tests\n"; + std::cout << std::string(40, '=') << "\n\n"; + + TEST_CASE("UFP basic values", TestUfpBasic()); + TEST_CASE("UFP negative values", TestUfpNegative()); + TEST_CASE("UFP special values", TestUfpSpecialValues()); + TEST_CASE("UFP from range", TestUfpRange()); + TEST_CASE("UFP range_analyzer integration", TestUfpRangeAnalyzerIntegration()); + + std::cout << "\n"; + if (nrOfFailedTestCases == 0) { + std::cout << "All UFP tests PASSED\n"; + } else { + std::cout << nrOfFailedTestCases << " test(s) FAILED\n"; + } + + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (const char* msg) { + std::cerr << "Caught exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +}