Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,10 @@ SERIAL/Sparse/sparse
SERIAL/Stencil/stencil
SERIAL/Synch_p2p/p2p
SERIAL/Transpose/transpose
COBOL/nstream
COBOL/transpose
COBOL/p2p
COBOL/dgemm
dgemm-vector.dSYM
dgemm.dSYM
nstream-opencl.dSYM
Expand Down
Empty file modified C1z/transpose-2d-openacc.c
100755 → 100644
Empty file.
38 changes: 30 additions & 8 deletions Cxx11/dgemm-cublas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,11 @@ int main(int argc, char * argv[])
/// Read and test input parameters
//////////////////////////////////////////////////////////////////////

int iterations;
int order;
int batches = 0;
int input_copy = 0;
int iterations, order, batches = 0;
bool input_copy = false, random_initialization = false;
try {
if (argc < 2) {
throw "Usage: <# iterations> <matrix order> [<batches>] [<copy input every iteration [0/1]>]";
throw "Usage: <# iterations> <matrix order> [<batches>] [<copy input every iteration [0/1]>] [<random initializatoin [0/1]>]";
}

iterations = std::atoi(argv[1]);
Expand All @@ -213,6 +211,13 @@ int main(int argc, char * argv[])
throw "ERROR: input_copy was not 0 or 1";
}
}

if (argc > 5) {
random_initialization = std::atoi(argv[5]);
if (random_initialization != 0 && random_initialization != 1) {
throw "ERROR: random_initialization was not 0 or 1";
}
}
}
catch (const char * e) {
std::cout << e << std::endl;
Expand All @@ -229,6 +234,7 @@ int main(int argc, char * argv[])
std::cout << "Batch size = " << batches << " (batched BLAS)" << std::endl;
}
std::cout << "Input copy = " << (input_copy ? "yes" : "no") << std::endl;
std::cout << "Randomized data = " << (random_initialization ? "yes" : "no") << std::endl;

cublasHandle_t h;
prk::check( cublasCreate(&h) );
Expand Down Expand Up @@ -270,10 +276,24 @@ int main(int argc, char * argv[])
prk::CUDA::copyH2Dasync(&(d_a[b*nelems]), h_a, nelems);
prk::CUDA::copyH2Dasync(&(d_b[b*nelems]), h_b, nelems);
}
prk::CUDA::sync();

init<<<dimGrid, dimBlock>>>(order, matrices, d_c);

} else if (random_initialization) {
// Initialize matrices with CURAND uniform distribution [0,1]
curandGenerator_t gen;
prk::check( curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT) );
prk::check( curandSetPseudoRandomGeneratorSeed(gen, 1234ULL) );

// Generate uniform random numbers in [0,1] for matrices A and B
prk::check( curandGenerateUniformDouble(gen, d_a, matrices * nelems) );
prk::check( curandGenerateUniformDouble(gen, d_b, matrices * nelems) );

prk::check( curandDestroyGenerator(gen) );

// Initialize matrix C to zero
init<<<dimGrid, dimBlock>>>(order, matrices, d_c);

} else {

init<<<dimGrid, dimBlock>>>(order, matrices, d_a, d_b, d_c);
Expand Down Expand Up @@ -346,12 +366,14 @@ int main(int argc, char * argv[])
}
residuum /= matrices;

if (residuum < epsilon) {
if (residuum < epsilon || random_initialization) {
#if VERBOSE
std::cout << "Reference checksum = " << reference << "\n"
<< "Actual checksum = " << checksum << std::endl;
#endif
std::cout << "Solution validates" << std::endl;
if (!random_initialization) {
std::cout << "Solution validates" << std::endl;
}
auto avgtime = gemm_time/iterations/matrices;
auto nflops = 2.0 * prk::pow(forder,3);
prk::print_flop_rate_time("FP64", nflops/avgtime, avgtime);
Expand Down
9 changes: 9 additions & 0 deletions Cxx11/prk_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#ifdef PRK_USE_CUBLAS
#include <cublas_v2.h>
#include <curand.h>
#endif

//#include <nvtx3.hpp>
Expand Down Expand Up @@ -58,6 +59,14 @@ namespace prk
std::abort();
}
}

void check(curandStatus_t rc)
{
if (rc!=CURAND_STATUS_SUCCESS) {
std::cerr << "PRK CURAND error: " << rc << std::endl;
std::abort();
}
}
#endif

namespace CUDA
Expand Down
35 changes: 28 additions & 7 deletions Cxx11/sgemm-cublas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@

#include "prk_util.h"
#include "prk_cuda.h"
#include <curand.h>

#if 0
__global__ void init(unsigned order, float * A, float * B, float * C)
Expand Down Expand Up @@ -182,14 +183,12 @@ int main(int argc, char * argv[])
/// Read and test input parameters
//////////////////////////////////////////////////////////////////////

int iterations;
int order;
int batches = 0;
bool input_copy{false};
int iterations, order, batches = 0;
bool input_copy = false, random_initialization = false;
bool tf32{false};
try {
if (argc < 2) {
throw "Usage: <# iterations> <matrix order> [<batches>] [<copy input every iteration [0/1]>] [<use TF32 [0/1]>]";
throw "Usage: <# iterations> <matrix order> [<batches>] [<copy input every iteration [0/1]>] [<use TF32 [0/1]>] [<random initialization [0/1]>]";
}

iterations = std::atoi(argv[1]);
Expand All @@ -215,6 +214,10 @@ int main(int argc, char * argv[])
if (argc > 5) {
tf32 = prk::parse_boolean(std::string(argv[5]));
}

if (argc > 6) {
random_initialization = prk::parse_boolean(std::string(argv[6]));
}
}
catch (const char * e) {
std::cout << e << std::endl;
Expand All @@ -232,6 +235,7 @@ int main(int argc, char * argv[])
}
std::cout << "Input copy = " << (input_copy ? "yes" : "no") << std::endl;
std::cout << "TF32 = " << (tf32 ? "yes" : "no") << std::endl;
std::cout << "Randomized data = " << (random_initialization ? "yes" : "no") << std::endl;

cublasHandle_t h;
prk::check( cublasCreate(&h) );
Expand Down Expand Up @@ -281,6 +285,21 @@ int main(int argc, char * argv[])

init<<<dimGrid, dimBlock>>>(order, matrices, d_c);

} else if (random_initialization) {
// Initialize matrices with CURAND uniform distribution [0,1]
curandGenerator_t gen;
prk::check( curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT) );
prk::check( curandSetPseudoRandomGeneratorSeed(gen, 1234ULL) );

// Generate uniform random numbers in [0,1] for matrices A and B
prk::check( curandGenerateUniform(gen, d_a, matrices * nelems) );
prk::check( curandGenerateUniform(gen, d_b, matrices * nelems) );

prk::check( curandDestroyGenerator(gen) );

// Initialize matrix C to zero
init<<<dimGrid, dimBlock>>>(order, matrices, d_c);

} else {

init<<<dimGrid, dimBlock>>>(order, matrices, d_a, d_b, d_c);
Expand Down Expand Up @@ -358,12 +377,14 @@ int main(int argc, char * argv[])
}
residuum /= matrices;

if (residuum < epsilon) {
if (residuum < epsilon || random_initialization) {
#if VERBOSE
std::cout << "Reference checksum = " << reference << "\n"
<< "Actual checksum = " << checksum << std::endl;
#endif
std::cout << "Solution validates" << std::endl;
if (!random_initialization) {
std::cout << "Solution validates" << std::endl;
}
auto avgtime = gemm_time/iterations/matrices;
auto nflops = 2.0 * prk::pow(forder,3);
std::cout << "Rate (MF/s): " << 1.0e-6 * nflops/avgtime
Expand Down
Loading