@@ -109,6 +109,58 @@ bool test_ptx_version() {
109109 return true ;
110110}
111111
112+ __global__ void bfe_kernel (int *res) {
113+ if (cub::BFE ((uint8_t )0xF0 , 4 , 8 ) != 15 ) {
114+ *res = 1 ;
115+ return ;
116+ }
117+ if (cub::BFE ((uint16_t )0x0FF0u , 4 , 12 ) != 255 ) {
118+ *res = 2 ;
119+ return ;
120+ }
121+ if (cub::BFE (0x00FFFF00u , 8 , 16 ) != 65535u ) {
122+ *res = 3 ;
123+ return ;
124+ }
125+ if (cub::BFE (0x000000FFull , 0 , 9 ) != 255 ) {
126+ *res = 4 ;
127+ return ;
128+ }
129+ *res = 0 ;
130+ }
131+
132+ __global__ void bfi_kernel (int *res) {
133+ unsigned d = 0 ;
134+ cub::BFI (d, 0x00FF0000u , 0x0000FFFFu , 0 , 16 );
135+ if (d != 0x00FFFFFFu ) {
136+ *res = 1 ;
137+ return ;
138+ }
139+
140+ cub::BFI (d, 0x00FF0000u , 0x000000FFu , 0 , 8 );
141+ if (d != 0x00FF00FFu ) {
142+ *res = 2 ;
143+ return ;
144+ }
145+ *res = 0 ;
146+ }
147+
148+ bool test_bfe () {
149+ int *res;
150+ cudaMallocManaged (&res, sizeof (int ));
151+ bfe_kernel<<<1 , 1 >>> (res);
152+ cudaDeviceSynchronize ();
153+ return *res == 0 ;
154+ }
155+
156+ bool test_bfi () {
157+ int *res;
158+ cudaMallocManaged (&res, sizeof (int ));
159+ bfi_kernel<<<1 , 1 >>> (res);
160+ cudaDeviceSynchronize ();
161+ return *res == 0 ;
162+ }
163+
112164#define TEST (FUNC ) \
113165 if (!FUNC()) { \
114166 printf (#FUNC " failed\n " ); \
@@ -122,5 +174,7 @@ int main() {
122174 TEST (test_device_count);
123175 TEST (test_sync_stream);
124176 TEST (test_ptx_version);
177+ TEST (test_bfe);
178+ TEST (test_bfi);
125179 return 0 ;
126180}
0 commit comments