[SYCLomatic oneapi-src#1351] Disable double cases on some gpu. (oneapi-src#528)

tangjj11 · web-flow · commit 1f5253308ebc · 2023-12-05T10:48:43.000+08:00
Signed-off-by: Tang, Jiajun jiajun.tang@intel.com
diff --git a/features/feature_case/math/math-emu-bf16-conv-double.cu b/features/feature_case/math/math-emu-bf16-conv-double.cu
@@ -0,0 +1,86 @@
+// ====---------- math-emu-bf16-conv-double.cu---------- *- CUDA -* -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// ===---------------------------------------------------------------------===//
+
+#include <iomanip>
+#include <iostream>
+#include <vector>
+
+#include "cuda_bf16.h"
+
+using namespace std;
+
+typedef pair<__nv_bfloat16, int> bf16i_pair;
+
+int passed = 0;
+int failed = 0;
+
+void check(bool IsPassed) {
+  if (IsPassed) {
+    cout << " ---- passed" << endl;
+    passed++;
+  } else {
+    cout << " ---- failed" << endl;
+    failed++;
+  }
+}
+
+void checkResult(const string &FuncName, const vector<float> &Inputs,
+                 const float &Expect, const float &Result,
+                 const int precision) {
+  cout << FuncName << "(" << Inputs[0] << "";
+  for (size_t i = 1; i < Inputs.size(); ++i) {
+    cout << ", " << Inputs[i];
+  }
+  cout << ") = " << fixed << setprecision(precision < 0 ? 0 : precision)
+       << Result << " (expect " << Expect - pow(10, -precision) << " ~ "
+       << Expect + pow(10, -precision) << ")";
+  cout.unsetf(ios::fixed);
+  check(abs(Result - Expect) < pow(10, -precision));
+}
+
+void checkResult(const string &FuncName, const vector<float> &Inputs,
+                 const __nv_bfloat16 &Expect, const float &Result,
+                 const int precision) {
+  float FExpect = __bfloat162float(Expect);
+  checkResult(FuncName, Inputs, FExpect, Result, precision);
+}
+
+// Bfloat16 Precision Conversion and Data Movement
+
+__global__ void double2bfloat16(float *const Result, double Input1) {
+  *Result = __double2bfloat16(Input1);
+}
+
+void testDouble2bfloat16Cases(
+    const vector<pair<double, bf16i_pair>> &TestCases) {
+  float *Result;
+  cudaMallocManaged(&Result, sizeof(*Result));
+  for (const auto &TestCase : TestCases) {
+    double2bfloat16<<<1, 1>>>(Result, TestCase.first);
+    cudaDeviceSynchronize();
+    checkResult("__double2bfloat16", {(float)TestCase.first},
+                TestCase.second.first, *Result, TestCase.second.second);
+  }
+}
+
+int main() {
+  testDouble2bfloat16Cases({
+      {-0.3, {-0.30078125, 16}},
+      {0.3, {0.30078125, 16}},
+      {30, {30, 14}},
+      {0.432643, {0.43359375, 16}},
+      {1, {1, 15}},
+      {10.7, {10.6875, 15}},
+  });
+  cout << "passed " << passed << "/" << passed + failed << " cases!" << endl;
+  if (failed) {
+    cout << "failed!" << endl;
+  }
+  return failed;
+}
diff --git a/features/feature_case/math/math-emu-double.cu b/features/feature_case/math/math-emu-double.cu
@@ -11,11 +11,8 @@
 #include <iostream>
 #include <vector>
 
-#include "cuda_bf16.h"
-
 using namespace std;
 
-typedef pair<__nv_bfloat16, int> bf16i_pair;
 typedef vector<double> d_vector;
 typedef tuple<double, double, double> d_tuple3;
 typedef tuple<double, double, double, double> d_tuple4;
@@ -34,27 +31,6 @@ void check(bool IsPassed) {
   }
 }
 
-void checkResult(const string &FuncName, const vector<float> &Inputs,
-                 const float &Expect, const float &Result,
-                 const int precision) {
-  cout << FuncName << "(" << Inputs[0] << "";
-  for (size_t i = 1; i < Inputs.size(); ++i) {
-    cout << ", " << Inputs[i];
-  }
-  cout << ") = " << fixed << setprecision(precision) << Result << " (expect "
-       << Expect - pow(10, -precision) << " ~ " << Expect + pow(10, -precision)
-       << ")";
-  cout.unsetf(ios::fixed);
-  check(abs(Result - Expect) < pow(10, -precision));
-}
-
-void checkResult(const string &FuncName, const vector<float> &Inputs,
-                 const __nv_bfloat16 &Expect, const float &Result,
-                 const int precision) {
-  float FExpect = __bfloat162float(Expect);
-  checkResult(FuncName, Inputs, FExpect, Result, precision);
-}
-
 template <typename T = double>
 void checkResult(const string &FuncName, const vector<T> &Inputs,
                  const double &Expect, const double &DeviceResult,
@@ -74,24 +50,6 @@ __global__ void setVecValue(double *Input1, const double Input2) {
   *Input1 = Input2;
 }
 
-// Bfloat16 Precision Conversion and Data Movement
-
-__global__ void double2bfloat16(float *const Result, double Input1) {
-  *Result = __double2bfloat16(Input1);
-}
-
-void testDouble2bfloat16Cases(
-    const vector<pair<double, bf16i_pair>> &TestCases) {
-  float *Result;
-  cudaMallocManaged(&Result, sizeof(*Result));
-  for (const auto &TestCase : TestCases) {
-    double2bfloat16<<<1, 1>>>(Result, TestCase.first);
-    cudaDeviceSynchronize();
-    checkResult("__double2bfloat16", {(float)TestCase.first},
-                TestCase.second.first, *Result, TestCase.second.second);
-  }
-}
-
 // Double Precision Mathematical Functions
 
 __global__ void _norm(double *const DeviceResult, int Input1,
@@ -495,14 +453,6 @@ void testDsub_rzCases(
 }
 
 int main() {
-  testDouble2bfloat16Cases({
-      {-0.3, {-0.30078125, 16}},
-      {0.3, {0.30078125, 16}},
-      {30, {30, 14}},
-      {0.432643, {0.43359375, 16}},
-      {1, {1, 15}},
-      {10.7, {10.6875, 15}},
-  });
   testNormCases({
       {{-0.3, -0.34, -0.98}, {1.079814798935447, 15}},
       {{0.3, 0.34, 0.98}, {1.079814798935447, 15}},
diff --git a/features/feature_case/math/math-ext-bf16-conv-double.cu b/features/feature_case/math/math-ext-bf16-conv-double.cu
@@ -0,0 +1,86 @@
+// ====---------- math-ext-bf16-conv-double.cu---------- *- CUDA -* -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// ===---------------------------------------------------------------------===//
+
+#include <iomanip>
+#include <iostream>
+#include <vector>
+
+#include "cuda_bf16.h"
+
+using namespace std;
+
+typedef pair<__nv_bfloat16, int> bf16i_pair;
+
+int passed = 0;
+int failed = 0;
+
+void check(bool IsPassed) {
+  if (IsPassed) {
+    cout << " ---- passed" << endl;
+    passed++;
+  } else {
+    cout << " ---- failed" << endl;
+    failed++;
+  }
+}
+
+void checkResult(const string &FuncName, const vector<float> &Inputs,
+                 const float &Expect, const float &Result,
+                 const int precision) {
+  cout << FuncName << "(" << Inputs[0] << "";
+  for (size_t i = 1; i < Inputs.size(); ++i) {
+    cout << ", " << Inputs[i];
+  }
+  cout << ") = " << fixed << setprecision(precision < 0 ? 0 : precision)
+       << Result << " (expect " << Expect - pow(10, -precision) << " ~ "
+       << Expect + pow(10, -precision) << ")";
+  cout.unsetf(ios::fixed);
+  check(abs(Result - Expect) < pow(10, -precision));
+}
+
+void checkResult(const string &FuncName, const vector<float> &Inputs,
+                 const __nv_bfloat16 &Expect, const float &Result,
+                 const int precision) {
+  float FExpect = __bfloat162float(Expect);
+  checkResult(FuncName, Inputs, FExpect, Result, precision);
+}
+
+// Bfloat16 Precision Conversion and Data Movement
+
+__global__ void double2bfloat16(float *const Result, double Input1) {
+  *Result = __double2bfloat16(Input1);
+}
+
+void testDouble2bfloat16Cases(
+    const vector<pair<double, bf16i_pair>> &TestCases) {
+  float *Result;
+  cudaMallocManaged(&Result, sizeof(*Result));
+  for (const auto &TestCase : TestCases) {
+    double2bfloat16<<<1, 1>>>(Result, TestCase.first);
+    cudaDeviceSynchronize();
+    checkResult("__double2bfloat16", {(float)TestCase.first},
+                TestCase.second.first, *Result, TestCase.second.second);
+  }
+}
+
+int main() {
+  testDouble2bfloat16Cases({
+      {-0.3, {-0.30078125, 16}},
+      {0.3, {0.30078125, 16}},
+      {30, {30, 14}},
+      {0.432643, {0.43359375, 16}},
+      {1, {1, 15}},
+      {10.7, {10.6875, 15}},
+  });
+  cout << "passed " << passed << "/" << passed + failed << " cases!" << endl;
+  if (failed) {
+    cout << "failed!" << endl;
+  }
+  return failed;
+}
diff --git a/features/feature_case/math/math-ext-double.cu b/features/feature_case/math/math-ext-double.cu
@@ -11,11 +11,8 @@
 #include <iostream>
 #include <vector>
 
-#include "cuda_bf16.h"
-
 using namespace std;
 
-typedef pair<__nv_bfloat16, int> bf16i_pair;
 typedef vector<double> d_vector;
 typedef pair<double, int> di_pair;
 
@@ -32,27 +29,6 @@ void check(bool IsPassed) {
   }
 }
 
-void checkResult(const string &FuncName, const vector<float> &Inputs,
-                 const float &Expect, const float &Result,
-                 const int precision) {
-  cout << FuncName << "(" << Inputs[0] << "";
-  for (size_t i = 1; i < Inputs.size(); ++i) {
-    cout << ", " << Inputs[i];
-  }
-  cout << ") = " << fixed << setprecision(precision < 0 ? 0 : precision)
-       << Result << " (expect " << Expect - pow(10, -precision) << " ~ "
-       << Expect + pow(10, -precision) << ")";
-  cout.unsetf(ios::fixed);
-  check(abs(Result - Expect) < pow(10, -precision));
-}
-
-void checkResult(const string &FuncName, const vector<float> &Inputs,
-                 const __nv_bfloat16 &Expect, const float &Result,
-                 const int precision) {
-  float FExpect = __bfloat162float(Expect);
-  checkResult(FuncName, Inputs, FExpect, Result, precision);
-}
-
 template <typename T = double>
 void checkResult(const string &FuncName, const vector<T> &Inputs,
                  const double &Expect, const double &DeviceResult,
@@ -68,24 +44,6 @@ void checkResult(const string &FuncName, const vector<T> &Inputs,
   check(abs(DeviceResult - Expect) < pow(10, -precision));
 }
 
-// Bfloat16 Precision Conversion and Data Movement
-
-__global__ void double2bfloat16(float *const Result, double Input1) {
-  *Result = __double2bfloat16(Input1);
-}
-
-void testDouble2bfloat16Cases(
-    const vector<pair<double, bf16i_pair>> &TestCases) {
-  float *Result;
-  cudaMallocManaged(&Result, sizeof(*Result));
-  for (const auto &TestCase : TestCases) {
-    double2bfloat16<<<1, 1>>>(Result, TestCase.first);
-    cudaDeviceSynchronize();
-    checkResult("__double2bfloat16", {(float)TestCase.first},
-                TestCase.second.first, *Result, TestCase.second.second);
-  }
-}
-
 // Double Precision Mathematical Functions
 
 __global__ void cylBesselI0(double *const Result, double Input1) {
@@ -549,14 +507,6 @@ void testDsub_rzCases(
 }
 
 int main() {
-  testDouble2bfloat16Cases({
-      {-0.3, {-0.30078125, 16}},
-      {0.3, {0.30078125, 16}},
-      {30, {30, 14}},
-      {0.432643, {0.43359375, 16}},
-      {1, {1, 15}},
-      {10.7, {10.6875, 15}},
-  });
   testCylBesselI0Cases({
       {0.3, {1.022626879351597, 15}},
       {0.5, {1.063483370741324, 15}},
diff --git a/features/features.xml b/features/features.xml
@@ -107,6 +107,7 @@
     <test testName="math-drcp" configFile="config/TEMPLATE_math_drcp.xml"  splitGroup="double" />
     <test testName="math-emu" configFile="config/TEMPLATE_math.xml" />
     <test testName="math-emu-bf16" configFile="config/TEMPLATE_math_after_11.xml" />
+    <test testName="math-emu-bf16-conv-double" configFile="config/TEMPLATE_math_after_11_skip_double.xml" splitGroup="double"/>
     <test testName="math-emu-bf162-after12" configFile="config/TEMPLATE_math_after_12.xml" />
     <test testName="math-emu-bf162" configFile="config/TEMPLATE_math_after_11.xml" />
     <test testName="math-emu-double" configFile="config/TEMPLATE_math_skip_double.xml" splitGroup="double"/>
@@ -124,6 +125,7 @@
     <test testName="math-experimental-bf16" configFile="config/TEMPLATE_math_after_11.xml" />
     <test testName="math-experimental-bf162" configFile="config/TEMPLATE_math_after_11.xml" />
     <test testName="math-ext-bf16-conv" configFile="config/TEMPLATE_math_after_11.xml" />
+    <test testName="math-ext-bf16-conv-double" configFile="config/TEMPLATE_math_after_11_skip_double.xml" splitGroup="double"/>
     <test testName="math-ext-double" configFile="config/TEMPLATE_math_skip_double.xml" splitGroup="double"/>
     <test testName="math-ext-float" configFile="config/TEMPLATE_math_skip_double.xml" splitGroup="double"/>
     <test testName="math-ext-half" configFile="config/TEMPLATE_math_after_9.xml" />
diff --git a/features/test_feature.py b/features/test_feature.py
@@ -35,7 +35,7 @@
               'math-ext-bf16-conv', 'math-ext-double', 'math-ext-float', 'math-ext-half', 'math-ext-half-after11', 'math-ext-half-conv', 'math-ext-half2', 'math-ext-half2-after11', 'math-ext-simd', 'cudnn-activation',
               'cudnn-fill', 'cudnn-lrn', 'cudnn-memory', 'cudnn-pooling', 'cudnn-reorder', 'cudnn-scale', 'cudnn-softmax',
               'cudnn-sum', 'math-funnelshift', 'thrust-sort_by_key', 'thrust-find', 'thrust-inner_product', 'thrust-reduce_by_key',
-              'math-bf16-conv', 'math-half-conv',
+              'math-bf16-conv', 'math-emu-bf16-conv-double', 'math-ext-bf16-conv-double', 'math-half-conv',
               'math-bfloat16', 'libcu_atomic', 'test_shared_memory', 'cudnn-reduction', 'cudnn-binary', 'cudnn-bnp1', 'cudnn-bnp2', 'cudnn-bnp3',
               'cudnn-normp1', 'cudnn-normp2', 'cudnn-normp3', 'cudnn-convp1', 'cudnn-convp2', 'cudnn-convp3', 'cudnn-convp4', 'cudnn-convp5', 'cudnn-convp6',
               'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", "cusparse_4", "cusparse_5", "cusparse_6", "cusparse_7",