@@ -278,7 +278,7 @@ kernel3(const float c)
278278{
279279 double a = threadIdx.x ;
280280 float i = 0 ;
281- float d = 0 ;
281+ float d = threadIdx. x ;
282282 float e = 0 ;
283283 int tid_even = threadIdx.x % 2 ;
284284 for (int j = 0 ; j < ITER_NUM; j++)
@@ -388,106 +388,106 @@ kernel3(const float c)
388388 }
389389 else
390390 {
391- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
392- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
393- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
394- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
395- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
396- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
397- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
398- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
399- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
400- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
401- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
402- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
403- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
404- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
405- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
406- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
407- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
408- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
409- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
410- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
411- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
412- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
413- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
414- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
415- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
416- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
417- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
418- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
419- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
420- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
421- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
422- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
423- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
424- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
425- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
426- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
427- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
428- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
429- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
430- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
431- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
432- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
433- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
434- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
435- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
436- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
437- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
438- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
439- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
440- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
441- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
442- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
443- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
444- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
445- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
446- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
447- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
448- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
449- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
450- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
451- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
452- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
453- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
454- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
455- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
456- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
457- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
458- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
459- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
460- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
461- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
462- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
463- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
464- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
465- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
466- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
467- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
468- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
469- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
470- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
471- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
472- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
473- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
474- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
475- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
476- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
477- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
478- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
479- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
480- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
481- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
482- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
483- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
484- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
485- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
486- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
487- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
488- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
489- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
490- asm volatile (" v_fmac_f32 %0, %0, %1 \n " : " +v" (d) : " v " (e ));
391+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
392+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
393+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
394+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
395+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
396+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
397+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
398+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
399+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
400+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
401+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
402+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
403+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
404+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
405+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
406+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
407+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
408+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
409+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
410+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
411+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
412+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
413+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
414+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
415+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
416+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
417+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
418+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
419+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
420+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
421+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
422+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
423+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
424+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
425+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
426+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
427+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
428+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
429+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
430+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
431+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
432+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
433+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
434+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
435+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
436+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
437+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
438+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
439+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
440+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
441+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
442+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
443+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
444+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
445+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
446+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
447+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
448+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
449+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
450+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
451+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
452+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
453+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
454+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
455+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
456+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
457+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
458+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
459+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
460+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
461+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
462+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
463+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
464+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
465+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
466+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
467+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
468+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
469+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
470+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
471+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
472+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
473+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
474+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
475+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
476+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
477+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
478+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
479+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
480+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
481+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
482+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
483+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
484+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
485+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
486+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
487+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
488+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
489+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
490+ asm volatile (" v_rcp_f32 %0, %0\n " : " +v" (d), " =s " (e) : " s " (c ));
491491 }
492492 }
493493}
0 commit comments