stillwater-sc
diff --git a/‎.github/workflows/cmake.yml‎
Lines changed: 32 additions & 1 deletion b/‎.github/workflows/cmake.yml‎
Lines changed: 32 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 5 additions & 5 deletions b/‎CMakeLists.txt‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎cmake/toolchains/ppc64le-linux-gnu.cmake‎
Lines changed: 23 additions & 0 deletions b/‎cmake/toolchains/ppc64le-linux-gnu.cmake‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎cmake/toolchains/riscv64-linux-gnu.cmake‎
Lines changed: 23 additions & 0 deletions b/‎cmake/toolchains/riscv64-linux-gnu.cmake‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎include/sw/universal/native/nonconstexpr/gcc_long_double.hpp‎
Lines changed: 61 additions & 2 deletions b/‎include/sw/universal/native/nonconstexpr/gcc_long_double.hpp‎
Lines changed: 61 additions & 2 deletions
diff --git a/‎include/sw/universal/number/posit/specialized/posit_32_2.hpp‎
Lines changed: 19 additions & 8 deletions b/‎include/sw/universal/number/posit/specialized/posit_32_2.hpp‎
Lines changed: 19 additions & 8 deletions
@@ -2,7 +2,7 @@ name: CMake
 
 on:
   push:
-    branches: [ v3.96, main ]
+    branches: [ v3.97, main ]
   pull_request:
     branches: [ main ]
 
@@ -49,6 +49,20 @@ jobs:
             name: macOS x64 (Apple Clang)
             artifact: macos-x64
             cmake_flags: -DUNIVERSAL_BUILD_CI_LITE=ON
+          # RISC-V cross-compilation with QEMU emulation
+          - os: ubuntu-latest
+            name: Linux RISC-V 64 (GCC cross)
+            artifact: linux-riscv64-gcc
+            cross: riscv64
+            toolchain: cmake/toolchains/riscv64-linux-gnu.cmake
+            cmake_flags: -DUNIVERSAL_BUILD_CI_LITE=ON
+          # IBM POWER cross-compilation with QEMU emulation
+          - os: ubuntu-latest
+            name: Linux POWER 64 LE (GCC cross)
+            artifact: linux-ppc64le-gcc
+            cross: ppc64le
+            toolchain: cmake/toolchains/ppc64le-linux-gnu.cmake
+            cmake_flags: -DUNIVERSAL_BUILD_CI_LITE=ON
 
     steps:
       - name: Checkout
@@ -60,6 +74,22 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y clang
 
+      - name: Install RISC-V cross-compiler and QEMU
+        if: matrix.cross == 'riscv64'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y g++-riscv64-linux-gnu qemu-user-static
+          # Set sysroot so QEMU can find the RISC-V dynamic linker and libs,
+          # whether invoked via CMAKE_CROSSCOMPILING_EMULATOR or binfmt_misc
+          echo "QEMU_LD_PREFIX=/usr/riscv64-linux-gnu" >> $GITHUB_ENV
+
+      - name: Install POWER cross-compiler and QEMU
+        if: matrix.cross == 'ppc64le'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y g++-powerpc64le-linux-gnu qemu-user-static
+          echo "QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu" >> $GITHUB_ENV
+
       # ccache for Linux and macOS
       - name: Install and configure ccache
         if: runner.os != 'Windows'
@@ -98,6 +128,7 @@ jobs:
           -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER || '' }}
           -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER || '' }}
           ${{ matrix.cmake_flags }}
+          ${{ matrix.toolchain && format('-DCMAKE_TOOLCHAIN_FILE={0}/{1}', github.workspace, matrix.toolchain) || '' }}
           ${{ matrix.cc && format('-DCMAKE_C_COMPILER={0}', matrix.cc) || '' }}
           ${{ matrix.cxx && format('-DCMAKE_CXX_COMPILER={0}', matrix.cxx) || '' }}
 
 
@@ -20,7 +20,7 @@ if(NOT DEFINED UNIVERSAL_VERSION_MAJOR)
   set(UNIVERSAL_VERSION_MAJOR 3)
 endif()
 if(NOT DEFINED UNIVERSAL_VERSION_MINOR)
-  set(UNIVERSAL_VERSION_MINOR 96)
+  set(UNIVERSAL_VERSION_MINOR 97)
 endif()
 if(NOT DEFINED UNIVERSAL_VERSION_PATCH)
   set(UNIVERSAL_VERSION_PATCH 1)
@@ -565,8 +565,8 @@ if(WIN32)
     set(config_install_dir CMake)
 elseif(UNIX)
     set(include_install_dir include)
-    set(include_install_dir_postfix "${project_library_target_name}")
-    set(include_install_dir_full    "${include_install_dir}/${include_install_dir_postfix}")
+    set(include_install_dir_postfix "")
+    set(include_install_dir_full    "${include_install_dir}")
 
     set(config_install_dir share/${PACKAGE_NAME})
 else()
@@ -614,7 +614,7 @@ message(STATUS "include_install_dir_postfix = ${include_install_dir_postfix}")
 
 # configure the library target
 target_include_directories(${project_library_target_name} 
-    INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
+    INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/sw>
     	      $<INSTALL_INTERFACE:${include_install_dir_full}>)
 
 # uninstall target
@@ -658,7 +658,7 @@ install(FILES
     DESTINATION ${config_install_dir} COMPONENT cmake)
 
 # Install headers
-install(DIRECTORY   ${PROJECT_SOURCE_DIR}/include/${project_library_target_name}
+install(DIRECTORY   ${PROJECT_SOURCE_DIR}/include/sw/
         DESTINATION ${include_install_dir})
 
 if(UNIVERSAL_BUILD_ALL)
 
@@ -0,0 +1,23 @@
+# CMake toolchain file for IBM POWER 64-bit little-endian cross-compilation
+# Uses powerpc64le-linux-gnu GCC cross-compiler and QEMU user-mode emulation
+#
+# Usage:
+#   cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchains/ppc64le-linux-gnu.cmake ..
+#
+# Prerequisites (Ubuntu/Debian):
+#   sudo apt-get install g++-powerpc64le-linux-gnu qemu-user-static
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR ppc64le)
+
+set(CMAKE_C_COMPILER powerpc64le-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER powerpc64le-linux-gnu-g++)
+
+# QEMU user-mode emulation for running cross-compiled test binaries
+set(CMAKE_CROSSCOMPILING_EMULATOR "qemu-ppc64le-static;-L;/usr/powerpc64le-linux-gnu")
+
+# Search paths for cross-compiled libraries
+set(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
@@ -0,0 +1,23 @@
+# CMake toolchain file for RISC-V 64-bit cross-compilation
+# Uses riscv64-linux-gnu GCC cross-compiler and QEMU user-mode emulation
+#
+# Usage:
+#   cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchains/riscv64-linux-gnu.cmake ..
+#
+# Prerequisites (Ubuntu/Debian):
+#   sudo apt-get install g++-riscv64-linux-gnu qemu-user-static
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR riscv64)
+
+set(CMAKE_C_COMPILER riscv64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER riscv64-linux-gnu-g++)
+
+# QEMU user-mode emulation for running cross-compiled test binaries
+set(CMAKE_CROSSCOMPILING_EMULATOR "qemu-riscv64-static;-L;/usr/riscv64-linux-gnu")
+
+# Search paths for cross-compiled libraries
+set(CMAKE_FIND_ROOT_PATH /usr/riscv64-linux-gnu)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
@@ -15,7 +15,7 @@ namespace sw { namespace universal {
 inline std::tuple<bool, int, std::uint64_t> ieee_components(long double fp) {
 	static_assert(std::numeric_limits<double>::is_iec559,
 		"This function only works when double complies with IEC 559 (IEEE 754)");
-	static_assert(sizeof(long double) == 16, "This function only works when double is 80 bit.");
+	static_assert(sizeof(long double) == 16, "This function only works when long double is 16 bytes.");
 
 	long_double_decoder dd{ fp }; // initializes the first member of the union
 	// Reading inactive union parts is forbidden in constexpr :-(
@@ -115,14 +115,35 @@ inline std::string to_binary(long double number, bool bNibbleMarker = false) {
 
 	s << '.';
 
-	// print fraction bits
+#if defined(UNIVERSAL_ARCH_POWER)
+	// POWER: IEEE 754 binary128 — 112 fraction bits (48 upper + 64 lower)
+	// No explicit integer bit (implicit leading 1 for normals)
+	{
+		uint64_t mask = (uint64_t(1) << 47);
+		for (int i = 47; i >= 0; --i) {
+			s << ((decoder.parts.upper & mask) ? '1' : '0');
+			if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
+			mask >>= 1;
+		}
+	}
+	{
+		uint64_t mask = (uint64_t(1) << 63);
+		for (int i = 63; i >= 0; --i) {
+			s << ((decoder.parts.fraction & mask) ? '1' : '0');
+			if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
+			mask >>= 1;
+		}
+	}
+#else
+	// x86: 80-bit extended — bit63 is the explicit integer bit, then 63 fraction bits
 	uint64_t mask = (uint64_t(1) << 62);
 	s << (decoder.parts.bit63 ? '1' : '0');
 	for (int i = 62; i >= 0; --i) {
 		s << ((decoder.parts.fraction & mask) ? '1' : '0');
 		if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
 		mask >>= 1;
 	}
+#endif
 
 	return s.str();
 }
@@ -151,12 +172,30 @@ inline std::string to_triple(long double number) {
 	s << scale << ',';
 
 	// print fraction bits
+#if defined(UNIVERSAL_ARCH_POWER)
+	// POWER: 112 fraction bits (48 upper + 64 lower), implicit leading 1
+	{
+		uint64_t mask = (uint64_t(1) << 47);
+		for (int i = 47; i >= 0; --i) {
+			s << ((decoder.parts.upper & mask) ? '1' : '0');
+			mask >>= 1;
+		}
+	}
+	{
+		uint64_t mask = (uint64_t(1) << 63);
+		for (int i = 63; i >= 0; --i) {
+			s << ((decoder.parts.fraction & mask) ? '1' : '0');
+			mask >>= 1;
+		}
+	}
+#else
 	s << (decoder.parts.bit63 ? '1' : '0');
 	uint64_t mask = (uint64_t(1) << 61);
 	for (int i = 61; i >= 0; --i) {
 		s << ((decoder.parts.fraction & mask) ? '1' : '0');
 		mask >>= 1;
 	}
+#endif
 
 	s << ')';
 	return s.str();
@@ -195,13 +234,33 @@ inline std::string color_print(long double number) {
 	s << '.';
 
 	// print fraction bits
+#if defined(UNIVERSAL_ARCH_POWER)
+	// POWER: 112 fraction bits (48 upper + 64 lower), implicit leading 1
+	{
+		uint64_t mask = (uint64_t(1) << 47);
+		for (int i = 47; i >= 0; --i) {
+			s << magenta << ((decoder.parts.upper & mask) ? '1' : '0');
+			if (i > 0 && i % 4 == 0) s << magenta << '\'';
+			mask >>= 1;
+		}
+	}
+	{
+		uint64_t mask = (uint64_t(1) << 63);
+		for (int i = 63; i >= 0; --i) {
+			s << magenta << ((decoder.parts.fraction & mask) ? '1' : '0');
+			if (i > 0 && i % 4 == 0) s << magenta << '\'';
+			mask >>= 1;
+		}
+	}
+#else
 	s << magenta << (decoder.parts.bit63 ? '1' : '0');
 	uint64_t mask = (uint64_t(1) << 61);
 	for (int i = 61; i >= 0; --i) {
 		s << magenta << ((decoder.parts.fraction & mask) ? '1' : '0');
 		if (i > 0 && i % 4 == 0) s << magenta << '\'';
 		mask >>= 1;
 	}
+#endif
 
 	s << def;
 	return s.str();
 
@@ -90,15 +90,15 @@ class posit<NBITS_IS_32, ES_IS_2> {
 	constexpr posit& operator=(short rhs) { return integer_assign((long)(rhs)); }
 	constexpr posit& operator=(int rhs) { return integer_assign((long)(rhs)); }
 	constexpr posit& operator=(long rhs) { return integer_assign(rhs); }
-	posit& operator=(long long rhs) { return float_assign((long double)(rhs)); }
+	posit& operator=(long long rhs) { return float_assign((double)(rhs)); }
 	constexpr posit& operator=(char rhs) { return integer_assign((long)(rhs)); }
 	constexpr posit& operator=(unsigned short rhs) { return integer_assign((long)(rhs)); }
 	constexpr posit& operator=(unsigned int rhs) { return integer_assign((long)(rhs)); }
-	          posit& operator=(unsigned long rhs) { return float_assign((long double)(rhs)); }
-	          posit& operator=(unsigned long long rhs) { return float_assign((long double)(rhs)); }
-	          posit& operator=(float rhs) { return float_assign((long double)rhs); }
-	          posit& operator=(double rhs) { return float_assign((long double)rhs); }
-	          posit& operator=(long double rhs) { return float_assign(rhs); }
+	          posit& operator=(unsigned long rhs) { return float_assign((double)(rhs)); }
+	          posit& operator=(unsigned long long rhs) { return float_assign((double)(rhs)); }
+	          posit& operator=(float rhs) { return float_assign((double)rhs); }
+	          posit& operator=(double rhs) { return float_assign(rhs); }
+	          posit& operator=(long double rhs) { return float_assign((double)rhs); }
 
 	explicit operator long double() const { return to_long_double(); }
 	explicit operator double() const { return to_double(); }
@@ -434,6 +434,11 @@ class posit<NBITS_IS_32, ES_IS_2> {
 		return tmp;
 	}
 	posit reciprocal() const {
+		if (isnar()) {
+			posit p;
+			p.setnar();
+			return p;
+		}
 		posit p = 1.0 / *this;
 		return p;
 	}
@@ -663,8 +668,14 @@ class posit<NBITS_IS_32, ES_IS_2> {
 		_bits = sign ? -raw : raw;
 		return *this;
 	}
-	posit& float_assign(long double rhs) {
-		constexpr int dfbits = std::numeric_limits<long double>::digits - 1;
+	// convert a double precision IEEE floating point to a posit<32,2>.
+	// Use double (not long double) so dfbits is consistent across
+	// architectures: x86 long double=80-bit, RISC-V long double=128-bit,
+	// which causes convert_to_bb to instantiate with different bitblock
+	// sizes and produce wrong results. Double's 52 fraction bits are
+	// more than sufficient for a 32-bit posit (max 28 fraction bits).
+	posit& float_assign(double rhs) {
+		constexpr int dfbits = std::numeric_limits<double>::digits - 1;
 		internal::value<dfbits> v(rhs);
 		// special case processing
 		if (v.iszero()) {