Add C++ agg_min_*_shared implementations

kurapov-peter · kurapov-peter · commit f01cc5664f10 · 2023-08-21T11:26:26.000+02:00
diff --git a/omniscidb/QueryEngine/Compiler/genx.cpp b/omniscidb/QueryEngine/Compiler/genx.cpp
@@ -6,9 +6,34 @@
 
 #include <algorithm>
 #include <cstdint>
+#include <limits>
 
 #include "Shared/funcannotations.h"
 
+namespace {
+constexpr float HDK_FLT_MIN = std::numeric_limits<float>::min();
+constexpr float HDK_FLT_MAX = std::numeric_limits<float>::max();
+constexpr double HDK_DBL_MIN = std::numeric_limits<double>::min();
+constexpr double HDK_DBL_MAX = std::numeric_limits<double>::max();
+inline int32_t hdk_float_as_int32_t(const float x) {
+  return *reinterpret_cast<const int32_t*>(&x);
+}
+inline float hdk_int32_t_as_float(const int32_t x) {
+  return *reinterpret_cast<const float*>(&x);
+}
+inline int64_t hdk_double_as_int64_t(const double x) {
+  return *reinterpret_cast<const int64_t*>(&x);
+}
+inline double hdk_int64_t_as_double(const int64_t x) {
+  return *reinterpret_cast<const double*>(&x);
+}
+
+template <class T>
+inline constexpr T hdk_min(const T lhs, const T rhs) {
+  return lhs < rhs ? lhs : rhs;
+}
+}  // namespace
+
 extern "C" {
 int64_t atomic_cas_int_64(GENERIC_ADDR_SPACE int64_t*, int64_t, int64_t);
 int32_t atomic_cas_int_32(GENERIC_ADDR_SPACE int32_t*, int32_t, int32_t);
@@ -58,19 +83,73 @@ void agg_min_double_shared(GENERIC_ADDR_SPACE int64_t* agg, const double val) {
   atomic_min_double(reinterpret_cast<GENERIC_ADDR_SPACE double*>(agg), val);
 }
 
+double atomic_min_float(GENERIC_ADDR_SPACE float* addr, const float val) {
+  GENERIC_ADDR_SPACE int32_t* address_as_ull =
+      reinterpret_cast<GENERIC_ADDR_SPACE int32_t*>(addr);
+  int32_t old = *address_as_ull, assumed;
+
+  do {
+    assumed = old;
+    old = atomic_cas_int_32(
+        address_as_ull,
+        assumed,
+        hdk_float_as_int32_t(hdk_min(val, hdk_int32_t_as_float(assumed))));
+  } while (assumed != old);
+
+  return hdk_int32_t_as_float(old);
+}
+
+double atomic_min_double(GENERIC_ADDR_SPACE double* addr, const double val) {
+  GENERIC_ADDR_SPACE int64_t* address_as_ull =
+      reinterpret_cast<GENERIC_ADDR_SPACE int64_t*>(addr);
+  int64_t old = *address_as_ull, assumed;
+
+  do {
+    assumed = old;
+    old = atomic_cas_int_64(
+        address_as_ull,
+        assumed,
+        hdk_double_as_int64_t(hdk_min(val, hdk_int64_t_as_double(assumed))));
+  } while (assumed != old);
+
+  return hdk_int64_t_as_double(old);
+}
+
+void atomicMinFltSkipVal(GENERIC_ADDR_SPACE int32_t* addr,
+                         const float val,
+                         const float skip_val) {
+  const int32_t flt_max = hdk_float_as_int32_t(HDK_FLT_MAX);
+  int32_t old = atomic_xchg_int_32(addr, flt_max);
+  agg_min_float_shared(addr,
+                       old == hdk_float_as_int32_t(skip_val)
+                           ? val
+                           : hdk_min(hdk_int32_t_as_float(old), val));
+}
+
+void atomicMinDblSkipVal(GENERIC_ADDR_SPACE int64_t* addr,
+                         const double val,
+                         const double skip_val) {
+  const int64_t dbl_max = hdk_double_as_int64_t(HDK_DBL_MAX);
+  int64_t old = atomic_xchg_int_64(addr, dbl_max);
+  agg_min_double_shared(addr,
+                        old == hdk_double_as_int64_t(skip_val)
+                            ? val
+                            : hdk_min(hdk_int64_t_as_double(old), val));
+}
+
 void agg_min_float_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
                                    const float val,
                                    const float skip_val) {
-  if (val != skip_val) {
-    agg_min_float_shared(agg, val);
+  if (hdk_float_as_int32_t(val) != hdk_float_as_int32_t(skip_val)) {
+    atomicMinFltSkipVal(agg, val, skip_val);
   }
 }
 
 void agg_min_double_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
                                     const double val,
                                     const double skip_val) {
-  if (val != skip_val) {
-    agg_min_double_shared(agg, val);
+  if (hdk_double_as_int64_t(val) != hdk_double_as_int64_t(skip_val)) {
+    atomicMinDblSkipVal(agg, val, skip_val);
   }
 }
 
diff --git a/omniscidb/QueryEngine/Compiler/genx.ll b/omniscidb/QueryEngine/Compiler/genx.ll
@@ -128,6 +128,7 @@ define void @agg_sum_float_shared(i32 addrspace(4)* %agg, float noundef %val) {
     ret void
 }
 
+; fixme
 define void @agg_sum_float_skip_val_shared(i32 addrspace(4)* %agg, float noundef %val, float noundef %skip_val) {
     %no_skip = fcmp one float %val, %skip_val
     br i1 %no_skip, label %.noskip, label %.skip
@@ -195,45 +196,6 @@ define void @atomic_or(i32 addrspace(4)* %addr, i32 noundef %val) {
     ret void
 }
 
-define double @atomic_min_float(float addrspace(4)* %addr, float noundef %val) {
-.entry:
-    %orig = load float, float addrspace(4)* %addr, align 8
-    br label %.loop
-.loop:
-    %loaded = phi float [ %orig, %.entry], [ %old.cst, %.loop ]
-    %isless = fcmp olt float %val, %loaded
-    %min = select i1 %isless, float %val, float %loaded
-    %min.cst = bitcast float %min to i32
-    %loaded.cst = bitcast float %loaded to i32
-    %addr.cst = bitcast float addrspace(4)* %addr to i32 addrspace(4)*
-    %old = call i32 @atomic_cas_int_32(i32 addrspace(4)* %addr.cst, i32 %loaded.cst, i32 %min.cst)
-    %old.cst = bitcast i32 %old to float
-    %success = icmp eq i32 %old, %loaded.cst
-    br i1 %success, label %.exit, label %.loop
-.exit:
-    %res = fpext float %old.cst to double
-    ret double %res
-}
-
-define double @atomic_min_double(double addrspace(4)* %addr, double noundef %val) {
-.entry:
-    %orig = load double, double addrspace(4)* %addr, align 8
-    br label %.loop
-.loop:
-    %loaded = phi double [ %orig, %.entry], [ %old.cst, %.loop ]
-    %isless = fcmp olt double %val, %loaded
-    %min = select i1 %isless, double %val, double %loaded
-    %min.cst = bitcast double %min to i64
-    %loaded.cst = bitcast double %loaded to i64
-    %addr.cst = bitcast double addrspace(4)* %addr to i64 addrspace(4)*
-    %old = call i64 @atomic_cas_int_64(i64 addrspace(4)* %addr.cst, i64 %loaded.cst, i64 %min.cst)
-    %old.cst = bitcast i64 %old to double
-    %success = icmp eq i64 %old, %loaded.cst
-    br i1 %success, label %.exit, label %.loop
-.exit:
-    ret double %old.cst
-}
-
 define double @atomic_max_float(float addrspace(4)* %addr, float noundef %val) {
 .entry:
     %orig = load float, float addrspace(4)* %addr, align 8