@@ -114,6 +114,33 @@ void checkResult(const string &FuncName, const vector<__nv_bfloat162> &Inputs,
114
114
checkResult (FuncName, FInputs, Expect, Result, precision);
115
115
}
116
116
117
+ void checkResult (const string &FuncName, const vector<float > &Inputs,
118
+ const float2 &Expect, const float2 &Result,
119
+ const int precision) {
120
+ cout << FuncName << " (" << Inputs[0 ] << " " ;
121
+ for (size_t i = 1 ; i < Inputs.size (); ++i) {
122
+ cout << " , " << Inputs[i];
123
+ }
124
+ cout << " ) = " << fixed << setprecision (precision) << " {" << Result.x << " , "
125
+ << Result.y << " } (expect {" << Expect.x - pow (10 , -precision) << " ~ "
126
+ << Expect.x + pow (10 , -precision) << " , "
127
+ << Expect.y - pow (10 , -precision) << " ~ "
128
+ << Expect.y + pow (10 , -precision) << " )" ;
129
+ cout.unsetf (ios::fixed);
130
+ check (abs (Result.x - Expect.x ) < pow (10 , -precision) &&
131
+ abs (Result.y - Expect.y ) < pow (10 , -precision));
132
+ }
133
+
134
+ void checkResult (const string &FuncName, const vector<__nv_bfloat16> &Inputs,
135
+ const __nv_bfloat162 &Expect, const float2 &Result,
136
+ const int precision) {
137
+ vector<float > FInputs;
138
+ for (const auto &Iter : Inputs)
139
+ FInputs.emplace_back (__bfloat162float (Iter));
140
+ float2 FExpect{__bfloat162float (Expect.x ), __bfloat162float (Expect.y )};
141
+ checkResult (FuncName, FInputs, FExpect, Result, precision);
142
+ }
143
+
117
144
__global__ void bfloat1622float2 (float *const Result, __nv_bfloat162 Input1) {
118
145
auto ret = __bfloat1622float2 (Input1);
119
146
Result[0 ] = ret.x ;
@@ -1073,6 +1100,25 @@ void testUshort_as_bfloat16Cases(
1073
1100
}
1074
1101
}
1075
1102
1103
+ __global__ void make_bfloat162 (float *const Result, __nv_bfloat16 Input1, __nv_bfloat16 Input2) {
1104
+ auto ret = make_bfloat162 (Input1, Input2);
1105
+ Result[0 ] = __bfloat162float (ret.x );
1106
+ Result[1 ] = __bfloat162float (ret.y );
1107
+ }
1108
+
1109
+ void testMake_bfloat162Cases (
1110
+ const vector<pair<pair<__nv_bfloat16, __nv_bfloat16>, pair<__nv_bfloat162, int >>> &TestCases) {
1111
+ float *Result;
1112
+ cudaMallocManaged (&Result, sizeof (*Result) * 2 );
1113
+ for (const auto &TestCase : TestCases) {
1114
+ make_bfloat162<<<1 , 1 >>> (Result, TestCase.first .first , TestCase.first .second );
1115
+ cudaDeviceSynchronize ();
1116
+ checkResult (" make_bfloat162" , {TestCase.first .first , TestCase.first .second },
1117
+ TestCase.second .first , {Result[0 ], Result[1 ]},
1118
+ TestCase.second .second );
1119
+ }
1120
+ }
1121
+
1076
1122
int main () {
1077
1123
testBfloat1622float2Cases ({
1078
1124
{{-0.3 , -0.5 }, {{-0.30078125 , -0.5 }, 16 }},
@@ -1542,6 +1588,12 @@ int main() {
1542
1588
{1000 , {0.0000000000000000000000000000000000013635734469538535 , 52 }},
1543
1589
{62536 , {-63382530011411470074835160268800.0 , -16 }},
1544
1590
});
1591
+ testMake_bfloat162Cases ({
1592
+ {{-0.3 , -0.4 }, {{-0.300048828125 , -0.39990234375 }, 16 }},
1593
+ {{0 , 0.7 }, {{0 , 0.7001953125 }, 16 }},
1594
+ {{1 , 100.6 }, {{1 , 100.625 }, 14 }},
1595
+ {{100.6 , 1 }, {{100.625 , 1 }, 14 }},
1596
+ });
1545
1597
cout << " passed " << passed << " /" << passed + failed << " cases!" << endl;
1546
1598
if (failed) {
1547
1599
cout << " failed!" << endl;
0 commit comments