@@ -1316,10 +1316,6 @@ void test_cusolverDnSyevdx() {
1316
1316
cudaFree (device_ws_z);
1317
1317
cudaFree (info);
1318
1318
1319
- printf (" a_s:%f,%f,%f,%f\n " , a_s.h_data [0 ], a_s.h_data [1 ], a_s.h_data [2 ], a_s.h_data [3 ]);
1320
- printf (" h_meig_s:%ld\n " , h_meig_s);
1321
- printf (" w_s:%f,%f\n " , w_s.h_data [0 ], w_s.h_data [1 ]);
1322
-
1323
1319
float expect_a[4 ] = {0.894427 ,-0.447214 ,0.447214 ,0.894427 };
1324
1320
int64_t expect_h_meig = 2 ;
1325
1321
float expect_w[2 ] = {0.000000 ,5.000000 };
@@ -1435,10 +1431,6 @@ void test_cusolverDnXsyevdx() {
1435
1431
free (host_ws_z);
1436
1432
cudaFree (info);
1437
1433
1438
- printf (" a_s:%f,%f,%f,%f\n " , a_s.h_data [0 ], a_s.h_data [1 ], a_s.h_data [2 ], a_s.h_data [3 ]);
1439
- printf (" h_meig_s:%ld\n " , h_meig_s);
1440
- printf (" w_s:%f,%f\n " , w_s.h_data [0 ], w_s.h_data [1 ]);
1441
-
1442
1434
float expect_a[4 ] = {0.894427 ,-0.447214 ,0.447214 ,0.894427 };
1443
1435
int64_t expect_h_meig = 2 ;
1444
1436
float expect_w[2 ] = {0.000000 ,5.000000 };
@@ -1461,6 +1453,200 @@ void test_cusolverDnXsyevdx() {
1461
1453
}
1462
1454
}
1463
1455
1456
+ void test_cusolverDnXsyevd () {
1457
+ std::vector<float > a = {1 , 2 , 2 , 4 };
1458
+ Data<float > a_s (a.data (), 4 );
1459
+ Data<double > a_d (a.data (), 4 );
1460
+ Data<float2 > a_c (a.data (), 4 );
1461
+ Data<double2 > a_z (a.data (), 4 );
1462
+ Data<float > w_s (2 );
1463
+ Data<double > w_d (2 );
1464
+ Data<float > w_c (2 );
1465
+ Data<double > w_z (2 );
1466
+
1467
+ cusolverDnHandle_t handle;
1468
+ cusolverDnCreate (&handle);
1469
+
1470
+ a_s.H2D ();
1471
+ a_d.H2D ();
1472
+ a_c.H2D ();
1473
+ a_z.H2D ();
1474
+
1475
+ cusolverDnParams_t params;
1476
+ cusolverDnCreateParams (¶ms);
1477
+
1478
+ size_t lwork_s;
1479
+ size_t lwork_d;
1480
+ size_t lwork_c;
1481
+ size_t lwork_z;
1482
+ size_t lwork_host_s;
1483
+ size_t lwork_host_d;
1484
+ size_t lwork_host_c;
1485
+ size_t lwork_host_z;
1486
+
1487
+ cusolverDnXsyevd_bufferSize (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_R_32F, a_s.d_data , 2 , CUDA_R_32F, w_s.d_data , CUDA_R_32F, &lwork_s, &lwork_host_s);
1488
+ cusolverDnXsyevd_bufferSize (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_R_64F, a_d.d_data , 2 , CUDA_R_64F, w_d.d_data , CUDA_R_64F, &lwork_d, &lwork_host_d);
1489
+ cusolverDnXsyevd_bufferSize (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_C_32F, a_c.d_data , 2 , CUDA_R_32F, w_c.d_data , CUDA_C_32F, &lwork_c, &lwork_host_c);
1490
+ cusolverDnXsyevd_bufferSize (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_C_64F, a_z.d_data , 2 , CUDA_R_64F, w_z.d_data , CUDA_C_64F, &lwork_z, &lwork_host_z);
1491
+
1492
+ void * device_ws_s;
1493
+ void * device_ws_d;
1494
+ void * device_ws_c;
1495
+ void * device_ws_z;
1496
+ cudaMalloc (&device_ws_s, lwork_s);
1497
+ cudaMalloc (&device_ws_d, lwork_d);
1498
+ cudaMalloc (&device_ws_c, lwork_c);
1499
+ cudaMalloc (&device_ws_z, lwork_z);
1500
+ void * host_ws_s;
1501
+ void * host_ws_d;
1502
+ void * host_ws_c;
1503
+ void * host_ws_z;
1504
+ host_ws_s = malloc (lwork_host_s);
1505
+ host_ws_d = malloc (lwork_host_d);
1506
+ host_ws_c = malloc (lwork_host_c);
1507
+ host_ws_z = malloc (lwork_host_z);
1508
+
1509
+ int *info;
1510
+ cudaMalloc (&info, sizeof (int ));
1511
+
1512
+ cusolverDnXsyevd (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_R_32F, a_s.d_data , 2 , CUDA_R_32F, w_s.d_data , CUDA_R_32F, device_ws_s, lwork_s, host_ws_s, lwork_host_s, info);
1513
+ cusolverDnXsyevd (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_R_64F, a_d.d_data , 2 , CUDA_R_64F, w_d.d_data , CUDA_R_64F, device_ws_d, lwork_d, host_ws_d, lwork_host_d, info);
1514
+ cusolverDnXsyevd (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_C_32F, a_c.d_data , 2 , CUDA_R_32F, w_c.d_data , CUDA_C_32F, device_ws_c, lwork_c, host_ws_c, lwork_host_c, info);
1515
+ cusolverDnXsyevd (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_C_64F, a_z.d_data , 2 , CUDA_R_64F, w_z.d_data , CUDA_C_64F, device_ws_z, lwork_z, host_ws_z, lwork_host_z, info);
1516
+
1517
+ a_s.D2H ();
1518
+ a_d.D2H ();
1519
+ a_c.D2H ();
1520
+ a_z.D2H ();
1521
+ w_s.D2H ();
1522
+ w_d.D2H ();
1523
+ w_c.D2H ();
1524
+ w_z.D2H ();
1525
+
1526
+ cudaStreamSynchronize (0 );
1527
+
1528
+ cusolverDnDestroyParams (params);
1529
+ cusolverDnDestroy (handle);
1530
+ cudaFree (device_ws_s);
1531
+ cudaFree (device_ws_d);
1532
+ cudaFree (device_ws_c);
1533
+ cudaFree (device_ws_z);
1534
+ free (host_ws_s);
1535
+ free (host_ws_d);
1536
+ free (host_ws_c);
1537
+ free (host_ws_z);
1538
+ cudaFree (info);
1539
+
1540
+ float expect_a[4 ] = {-0.894427 ,0.447214 ,0.447214 ,0.894427 };
1541
+ float expect_w[2 ] = {0.000000 ,5.000000 };
1542
+ if (compare_result (expect_a, a_s.h_data , 4 ) &&
1543
+ compare_result (expect_a, a_d.h_data , 4 ) &&
1544
+ compare_result (expect_a, a_c.h_data , 4 ) &&
1545
+ compare_result (expect_a, a_z.h_data , 4 ) &&
1546
+ compare_result (expect_w, w_s.h_data , 2 ) &&
1547
+ compare_result (expect_w, w_d.h_data , 2 ) &&
1548
+ compare_result (expect_w, w_c.h_data , 2 ) &&
1549
+ compare_result (expect_w, w_z.h_data , 2 ))
1550
+ printf (" DnXsyevd pass\n " );
1551
+ else {
1552
+ printf (" DnXsyevd fail\n " );
1553
+ test_passed = false ;
1554
+ }
1555
+ }
1556
+
1557
+ void test_cusolverDnSyevd () {
1558
+ std::vector<float > a = {1 , 2 , 2 , 4 };
1559
+ Data<float > a_s (a.data (), 4 );
1560
+ Data<double > a_d (a.data (), 4 );
1561
+ Data<float2 > a_c (a.data (), 4 );
1562
+ Data<double2 > a_z (a.data (), 4 );
1563
+ Data<float > w_s (2 );
1564
+ Data<double > w_d (2 );
1565
+ Data<float > w_c (2 );
1566
+ Data<double > w_z (2 );
1567
+
1568
+ cusolverDnHandle_t handle;
1569
+ cusolverDnCreate (&handle);
1570
+
1571
+ a_s.H2D ();
1572
+ a_d.H2D ();
1573
+ a_c.H2D ();
1574
+ a_z.H2D ();
1575
+
1576
+ cusolverDnParams_t params;
1577
+ cusolverDnCreateParams (¶ms);
1578
+
1579
+ size_t lwork_s;
1580
+ size_t lwork_d;
1581
+ size_t lwork_c;
1582
+ size_t lwork_z;
1583
+
1584
+ cusolverDnSyevd_bufferSize (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_R_32F, a_s.d_data , 2 , CUDA_R_32F, w_s.d_data , CUDA_R_32F, &lwork_s);
1585
+ cusolverDnSyevd_bufferSize (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_R_64F, a_d.d_data , 2 , CUDA_R_64F, w_d.d_data , CUDA_R_64F, &lwork_d);
1586
+ cusolverDnSyevd_bufferSize (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_C_32F, a_c.d_data , 2 , CUDA_R_32F, w_c.d_data , CUDA_C_32F, &lwork_c);
1587
+ cusolverDnSyevd_bufferSize (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_C_64F, a_z.d_data , 2 , CUDA_R_64F, w_z.d_data , CUDA_C_64F, &lwork_z);
1588
+
1589
+ void * device_ws_s;
1590
+ void * device_ws_d;
1591
+ void * device_ws_c;
1592
+ void * device_ws_z;
1593
+ cudaMalloc (&device_ws_s, lwork_s);
1594
+ cudaMalloc (&device_ws_d, lwork_d);
1595
+ cudaMalloc (&device_ws_c, lwork_c);
1596
+ cudaMalloc (&device_ws_z, lwork_z);
1597
+
1598
+ int *info;
1599
+ cudaMalloc (&info, sizeof (int ));
1600
+
1601
+ cusolverDnSyevd (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_R_32F, a_s.d_data , 2 , CUDA_R_32F, w_s.d_data , CUDA_R_32F, device_ws_s, lwork_s, info);
1602
+ cusolverDnSyevd (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_R_64F, a_d.d_data , 2 , CUDA_R_64F, w_d.d_data , CUDA_R_64F, device_ws_d, lwork_d, info);
1603
+ cusolverDnSyevd (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_C_32F, a_c.d_data , 2 , CUDA_R_32F, w_c.d_data , CUDA_C_32F, device_ws_c, lwork_c, info);
1604
+ cusolverDnSyevd (handle, params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, 2 , CUDA_C_64F, a_z.d_data , 2 , CUDA_R_64F, w_z.d_data , CUDA_C_64F, device_ws_z, lwork_z, info);
1605
+
1606
+ a_s.D2H ();
1607
+ a_d.D2H ();
1608
+ a_c.D2H ();
1609
+ a_z.D2H ();
1610
+ w_s.D2H ();
1611
+ w_d.D2H ();
1612
+ w_c.D2H ();
1613
+ w_z.D2H ();
1614
+
1615
+ cudaStreamSynchronize (0 );
1616
+
1617
+ cusolverDnDestroyParams (params);
1618
+ cusolverDnDestroy (handle);
1619
+ cudaFree (device_ws_s);
1620
+ cudaFree (device_ws_d);
1621
+ cudaFree (device_ws_c);
1622
+ cudaFree (device_ws_z);
1623
+ cudaFree (info);
1624
+
1625
+ float expect_a[4 ] = {-0.894427 ,0.447214 ,0.447214 ,0.894427 };
1626
+ float expect_w[2 ] = {0.000000 ,5.000000 };
1627
+ if (compare_result (expect_a, a_s.h_data , 4 ) &&
1628
+ compare_result (expect_a, a_d.h_data , 4 ) &&
1629
+ compare_result (expect_a, a_c.h_data , 4 ) &&
1630
+ compare_result (expect_a, a_z.h_data , 4 ) &&
1631
+ compare_result (expect_w, w_s.h_data , 2 ) &&
1632
+ compare_result (expect_w, w_d.h_data , 2 ) &&
1633
+ compare_result (expect_w, w_c.h_data , 2 ) &&
1634
+ compare_result (expect_w, w_z.h_data , 2 ))
1635
+ printf (" DnSyevd pass\n " );
1636
+ else {
1637
+ printf (" DnSyevd fail\n " );
1638
+ test_passed = false ;
1639
+ }
1640
+ }
1641
+
1642
+ void test_cusolverDnSetAdvOptions () {
1643
+ cusolverDnFunction_t a = CUSOLVERDN_GETRF;
1644
+ cusolverAlgMode_t b = CUSOLVER_ALG_1;
1645
+ cusolverDnParams_t p;
1646
+ cusolverDnSetAdvOptions (p, a, b);
1647
+ printf (" DnSetAdvOptions pass\n " );
1648
+ }
1649
+
1464
1650
int main () {
1465
1651
test_cusolverDnXgetrf ();
1466
1652
test_cusolverDnXgetrfnp ();
@@ -1477,6 +1663,9 @@ int main() {
1477
1663
test_cusolverDnPotrs ();
1478
1664
test_cusolverDnSyevdx ();
1479
1665
test_cusolverDnXsyevdx ();
1666
+ test_cusolverDnXsyevd ();
1667
+ test_cusolverDnSyevd ();
1668
+ test_cusolverDnSetAdvOptions ();
1480
1669
1481
1670
if (test_passed)
1482
1671
return 0 ;
0 commit comments