Skip to content

Commit 2d715a6

Browse files
committed
Added device ger(c), geru
1 parent 076d3af commit 2d715a6

File tree

12 files changed

+1119
-0
lines changed

12 files changed

+1119
-0
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ add_library(
358358
src/device_error.cc
359359
src/device_gemm.cc
360360
src/device_gemv.cc
361+
src/device_ger.cc
361362
src/device_hemm.cc
362363
src/device_hemv.cc
363364
src/device_her2k.cc

include/blas/device_blas.hh

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,80 @@ void trsv(
580580
std::complex<double>* x, int64_t incx,
581581
blas::Queue& queue );
582582

583+
//------------------------------------------------------------------------------
584+
void ger(
585+
blas::Layout layout,
586+
int64_t m, int64_t n,
587+
float alpha,
588+
float const* x, int64_t incx,
589+
float const* y, int64_t incy,
590+
float* A, int64_t lda,
591+
blas::Queue& queue );
592+
593+
void ger(
594+
blas::Layout layout,
595+
int64_t m, int64_t n,
596+
double alpha,
597+
double const* x, int64_t incx,
598+
double const* y, int64_t incy,
599+
double* A, int64_t lda,
600+
blas::Queue& queue );
601+
602+
void ger(
603+
blas::Layout layout,
604+
int64_t m, int64_t n,
605+
std::complex<float> alpha,
606+
std::complex<float> const* x, int64_t incx,
607+
std::complex<float> const* y, int64_t incy,
608+
std::complex<float>* A, int64_t lda,
609+
blas::Queue& queue );
610+
611+
void ger(
612+
blas::Layout layout,
613+
int64_t m, int64_t n,
614+
std::complex<double> alpha,
615+
std::complex<double> const* x, int64_t incx,
616+
std::complex<double> const* y, int64_t incy,
617+
std::complex<double>* A, int64_t lda,
618+
blas::Queue& queue );
619+
620+
//------------------------------------------------------------------------------
621+
void geru(
622+
blas::Layout layout,
623+
int64_t m, int64_t n,
624+
float alpha,
625+
float const* x, int64_t incx,
626+
float const* y, int64_t incy,
627+
float* A, int64_t lda,
628+
blas::Queue& queue );
629+
630+
void geru(
631+
blas::Layout layout,
632+
int64_t m, int64_t n,
633+
double alpha,
634+
double const* x, int64_t incx,
635+
double const* y, int64_t incy,
636+
double* A, int64_t lda,
637+
blas::Queue& queue );
638+
639+
void geru(
640+
blas::Layout layout,
641+
int64_t m, int64_t n,
642+
std::complex<float> alpha,
643+
std::complex<float> const* x, int64_t incx,
644+
std::complex<float> const* y, int64_t incy,
645+
std::complex<float>* A, int64_t lda,
646+
blas::Queue& queue );
647+
648+
void geru(
649+
blas::Layout layout,
650+
int64_t m, int64_t n,
651+
std::complex<double> alpha,
652+
std::complex<double> const* x, int64_t incx,
653+
std::complex<double> const* y, int64_t incy,
654+
std::complex<double>* A, int64_t lda,
655+
blas::Queue& queue );
656+
583657
//==============================================================================
584658
// Level 3 BLAS
585659

src/cublas_wrappers.cc

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,162 @@ void trsv(
13481348
(cuDoubleComplex*) dx, incdx ) );
13491349
}
13501350

1351+
//------------------------------------------------------------------------------
1352+
// ger
1353+
//------------------------------------------------------------------------------
1354+
void ger(
1355+
int64_t m, int64_t n,
1356+
float alpha,
1357+
float const* dx, int64_t incdx,
1358+
float const* dy, int64_t incdy,
1359+
float* dA, int64_t ldda,
1360+
blas::Queue& queue )
1361+
{
1362+
blas_dev_call(
1363+
cublasSger(
1364+
queue.handle(),
1365+
m, n,
1366+
&alpha,
1367+
dx, incdx,
1368+
dy, incdy,
1369+
dA, ldda ) );
1370+
}
1371+
1372+
//------------------------------------------------------------------------------
1373+
void ger(
1374+
int64_t m, int64_t n,
1375+
double alpha,
1376+
double const* dx, int64_t incdx,
1377+
double const* dy, int64_t incdy,
1378+
double* dA, int64_t ldda,
1379+
blas::Queue& queue )
1380+
{
1381+
blas_dev_call(
1382+
cublasDger(
1383+
queue.handle(),
1384+
m, n,
1385+
&alpha,
1386+
dx, incdx,
1387+
dy, incdy,
1388+
dA, ldda ) );
1389+
}
1390+
1391+
//------------------------------------------------------------------------------
1392+
void ger(
1393+
int64_t m, int64_t n,
1394+
std::complex<float> alpha,
1395+
std::complex<float> const* dx, int64_t incdx,
1396+
std::complex<float> const* dy, int64_t incdy,
1397+
std::complex<float>* dA, int64_t ldda,
1398+
blas::Queue& queue )
1399+
{
1400+
blas_dev_call(
1401+
cublasCgerc(
1402+
queue.handle(),
1403+
m, n,
1404+
(cuComplex*) &alpha,
1405+
(cuComplex*) dx, incdx,
1406+
(cuComplex*) dy, incdy,
1407+
(cuComplex*) dA, ldda ) );
1408+
}
1409+
1410+
//------------------------------------------------------------------------------
1411+
void ger(
1412+
int64_t m, int64_t n,
1413+
std::complex<double> alpha,
1414+
std::complex<double> const* dx, int64_t incdx,
1415+
std::complex<double> const* dy, int64_t incdy,
1416+
std::complex<double>* dA, int64_t ldda,
1417+
blas::Queue& queue )
1418+
{
1419+
blas_dev_call(
1420+
cublasZgerc(
1421+
queue.handle(),
1422+
m, n,
1423+
(cuDoubleComplex*) &alpha,
1424+
(cuDoubleComplex*) dx, incdx,
1425+
(cuDoubleComplex*) dy, incdy,
1426+
(cuDoubleComplex*) dA, ldda ) );
1427+
}
1428+
1429+
//------------------------------------------------------------------------------
1430+
// geru
1431+
//------------------------------------------------------------------------------
1432+
void geru(
1433+
int64_t m, int64_t n,
1434+
float alpha,
1435+
float const* dx, int64_t incdx,
1436+
float const* dy, int64_t incdy,
1437+
float* dA, int64_t ldda,
1438+
blas::Queue& queue )
1439+
{
1440+
blas_dev_call(
1441+
cublasSger(
1442+
queue.handle(),
1443+
m, n,
1444+
&alpha,
1445+
dx, incdx,
1446+
dy, incdy,
1447+
dA, ldda ) );
1448+
}
1449+
1450+
//------------------------------------------------------------------------------
1451+
void geru(
1452+
int64_t m, int64_t n,
1453+
double alpha,
1454+
double const* dx, int64_t incdx,
1455+
double const* dy, int64_t incdy,
1456+
double* dA, int64_t ldda,
1457+
blas::Queue& queue )
1458+
{
1459+
blas_dev_call(
1460+
cublasDger(
1461+
queue.handle(),
1462+
m, n,
1463+
&alpha,
1464+
dx, incdx,
1465+
dy, incdy,
1466+
dA, ldda ) );
1467+
}
1468+
1469+
//------------------------------------------------------------------------------
1470+
void geru(
1471+
int64_t m, int64_t n,
1472+
std::complex<float> alpha,
1473+
std::complex<float> const* dx, int64_t incdx,
1474+
std::complex<float> const* dy, int64_t incdy,
1475+
std::complex<float>* dA, int64_t ldda,
1476+
blas::Queue& queue )
1477+
{
1478+
blas_dev_call(
1479+
cublasCgeru(
1480+
queue.handle(),
1481+
m, n,
1482+
(cuComplex*) &alpha,
1483+
(cuComplex*) dx, incdx,
1484+
(cuComplex*) dy, incdy,
1485+
(cuComplex*) dA, ldda ) );
1486+
}
1487+
1488+
//------------------------------------------------------------------------------
1489+
void geru(
1490+
int64_t m, int64_t n,
1491+
std::complex<double> alpha,
1492+
std::complex<double> const* dx, int64_t incdx,
1493+
std::complex<double> const* dy, int64_t incdy,
1494+
std::complex<double>* dA, int64_t ldda,
1495+
blas::Queue& queue )
1496+
{
1497+
blas_dev_call(
1498+
cublasZgeru(
1499+
queue.handle(),
1500+
m, n,
1501+
(cuDoubleComplex*) &alpha,
1502+
(cuDoubleComplex*) dx, incdx,
1503+
(cuDoubleComplex*) dy, incdy,
1504+
(cuDoubleComplex*) dA, ldda ) );
1505+
}
1506+
13511507
//==============================================================================
13521508
// Level 3 BLAS - Device Interfaces
13531509

0 commit comments

Comments
 (0)