Skip to content

Commit 6b92dc4

Browse files
committed
hip - use BASIS_T_1D in codegen
1 parent 9942127 commit 6b92dc4

File tree

8 files changed

+246
-246
lines changed

8 files changed

+246
-246
lines changed

backends/hip-gen/ceed-hip-gen-operator-build.cpp

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -496,42 +496,42 @@ static int CeedOperatorBuildKernelBasis_Hip_gen(std::ostringstream &code, CeedOp
496496
std::string function_name = (dim == 1 ? "Interp" : "InterpTensor") + std::to_string(dim) + "d";
497497

498498
code << " CeedScalar r_c" << var_suffix << "[num_comp" << var_suffix << "*" << (dim >= 3 ? Q_name : "1") << "];\n";
499-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
500-
<< var_suffix << ", r_c" << var_suffix << ");\n";
499+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
500+
<< ", s_B" << var_suffix << ", r_c" << var_suffix << ");\n";
501501
} else {
502502
std::string function_name = is_tensor ? ((dim == 1 ? "Interp" : "InterpTensor") + std::to_string(dim) + "d") : "InterpNonTensor";
503503

504504
code << " CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << (is_tensor && (dim >= 3) ? Q_name : "1") << "];\n";
505-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
506-
<< var_suffix << ", r_q" << var_suffix << ");\n";
505+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
506+
<< ", s_B" << var_suffix << ", r_q" << var_suffix << ");\n";
507507
}
508508
break;
509509
case CEED_EVAL_GRAD:
510510
if (is_at_points) {
511511
std::string function_name = (dim == 1 ? "Interp" : "InterpTensor") + std::to_string(dim) + "d";
512512

513513
code << " CeedScalar r_c" << var_suffix << "[num_comp" << var_suffix << "*" << (dim >= 3 ? Q_name : "1") << "];\n";
514-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
515-
<< var_suffix << ", r_c" << var_suffix << ");\n";
514+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
515+
<< ", s_B" << var_suffix << ", r_c" << var_suffix << ");\n";
516516
} else if (use_3d_slices) {
517517
std::string function_name = (dim > 1 ? "InterpTensor" : "Interp") + std::to_string(dim) + "d";
518518

519519
code << " CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << Q_name << "];\n";
520-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
521-
<< var_suffix << ", r_q" << var_suffix << ");\n";
520+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
521+
<< ", s_B" << var_suffix << ", r_q" << var_suffix << ");\n";
522522
} else if (is_tensor) {
523523
bool is_collocated = dim == 3 && Q_1d >= P_1d;
524524
std::string function_name = (dim == 1 ? "Grad" : (is_collocated ? "GradTensorCollocated" : "GradTensor")) + std::to_string(dim) + "d";
525525

526526
code << " CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*dim*" << (dim >= 3 ? Q_name : "1") << "];\n";
527-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
528-
<< var_suffix << ", s_G" << var_suffix << ", r_q" << var_suffix << ");\n";
527+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
528+
<< ", s_B" << var_suffix << ", s_G" << var_suffix << ", r_q" << var_suffix << ");\n";
529529
} else {
530530
std::string function_name = "GradNonTensor";
531531

532532
code << " CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*dim];\n";
533-
code << " " << function_name << "<num_comp" << var_suffix << ", dim, " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix
534-
<< ", s_G" << var_suffix << ", r_q" << var_suffix << ");\n";
533+
code << " " << function_name << "<num_comp" << var_suffix << ", dim, " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e"
534+
<< var_suffix << ", s_G" << var_suffix << ", r_q" << var_suffix << ");\n";
535535
}
536536
break;
537537
case CEED_EVAL_WEIGHT: {
@@ -564,40 +564,40 @@ static int CeedOperatorBuildKernelBasis_Hip_gen(std::ostringstream &code, CeedOp
564564
if (is_at_points) {
565565
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d";
566566

567-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_c" << var_suffix << ", s_B"
568-
<< var_suffix << ", r_e" << var_suffix << ");\n";
567+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_c" << var_suffix
568+
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
569569
} else {
570570
std::string function_name =
571571
is_tensor ? ((dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d") : "InterpTransposeNonTensor";
572572

573-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_q" << var_suffix << ", s_B"
574-
<< var_suffix << ", r_e" << var_suffix << ");\n";
573+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q" << var_suffix
574+
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
575575
}
576576
break;
577577
case CEED_EVAL_GRAD:
578578
code << " CeedScalar *r_e" << var_suffix << " = r_e_scratch;\n";
579579
if (is_at_points) {
580580
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d";
581581

582-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_c" << var_suffix << ", s_B"
583-
<< var_suffix << ", r_e" << var_suffix << ");\n";
582+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_c" << var_suffix
583+
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
584584
} else if (use_3d_slices) {
585585
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d";
586586

587-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_q" << var_suffix << ", s_B"
588-
<< var_suffix << ", r_e" << var_suffix << ");\n";
587+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q" << var_suffix
588+
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
589589
} else if (is_tensor) {
590590
bool is_collocated = dim == 3 && Q_1d >= P_1d;
591591
std::string function_name =
592592
(dim == 1 ? "GradTranspose" : (is_collocated ? "GradTransposeTensorCollocated" : "GradTransposeTensor")) + std::to_string(dim) + "d";
593593

594-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_q" << var_suffix << ", s_B"
595-
<< var_suffix << ", s_G" << var_suffix << ", r_e" << var_suffix << ");\n";
594+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q" << var_suffix
595+
<< ", s_B" << var_suffix << ", s_G" << var_suffix << ", r_e" << var_suffix << ");\n";
596596
} else {
597597
std::string function_name = "GradTransposeNonTensor";
598598

599-
code << " " << function_name << "<num_comp" << var_suffix << ", dim, " << P_name << ", " << Q_name << ">(data, r_q" << var_suffix
600-
<< ", s_G" << var_suffix << ", r_e" << var_suffix << ");\n";
599+
code << " " << function_name << "<num_comp" << var_suffix << ", dim, " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q"
600+
<< var_suffix << ", s_G" << var_suffix << ", r_e" << var_suffix << ");\n";
601601
}
602602
break;
603603
// LCOV_EXCL_START
@@ -820,8 +820,8 @@ static int CeedOperatorBuildKernelQFunction_Hip_gen(std::ostringstream &code, Ce
820820
break;
821821
case CEED_EVAL_GRAD:
822822
code << " CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "*dim];\n";
823-
code << " GradColloSlice3d<num_comp" << var_suffix << ", " << Q_name << ">(data, q, r_q" << var_suffix << ", s_G" << var_suffix
824-
<< ", r_s" << var_suffix << ");\n";
823+
code << " GradColloSlice3d<num_comp" << var_suffix << ", " << Q_name << ", OP_T_1D>(data, q, r_q" << var_suffix << ", s_G"
824+
<< var_suffix << ", r_s" << var_suffix << ");\n";
825825
break;
826826
case CEED_EVAL_WEIGHT:
827827
code << " CeedScalar r_s" << var_suffix << "[1];\n";
@@ -989,7 +989,7 @@ static int CeedOperatorBuildKernelQFunction_Hip_gen(std::ostringstream &code, Ce
989989
code << " }\n";
990990
break;
991991
case CEED_EVAL_GRAD:
992-
code << " GradColloSliceTranspose3d<num_comp" << var_suffix << ", " << Q_name << ">(data, q, r_s" << var_suffix << ", s_G"
992+
code << " GradColloSliceTranspose3d<num_comp" << var_suffix << ", " << Q_name << ", OP_T_1D>(data, q, r_s" << var_suffix << ", s_G"
993993
<< var_suffix << ", r_q" << var_suffix << ");\n";
994994
break;
995995
// LCOV_EXCL_START
@@ -1216,7 +1216,7 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op, bool *is_good_bu
12161216
code << " data.t_id_y = threadIdx.y;\n";
12171217
code << " data.t_id_z = threadIdx.z;\n";
12181218
code << " data.t_id = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*blockDim.y*blockDim.x;\n";
1219-
code << " data.slice = slice + data.t_id_z*T_1D" << ((!is_tensor || dim == 1) ? "" : "*T_1D") << ";\n";
1219+
code << " data.slice = slice + data.t_id_z*OP_T_1D" << ((!is_tensor || dim == 1) ? "" : "*OP_T_1D") << ";\n";
12201220

12211221
// -- Determine input mat reuse
12221222
FieldReuse_Hip input_matrix_reuse[CEED_FIELD_MAX];
@@ -1459,7 +1459,7 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op, bool *is_good_bu
14591459
{
14601460
bool is_compile_good = false;
14611461

1462-
CeedCallBackend(CeedTryCompile_Hip(ceed, code.str().c_str(), &is_compile_good, &data->module, 2, "T_1D", block_sizes[0], "BLOCK_SIZE",
1462+
CeedCallBackend(CeedTryCompile_Hip(ceed, code.str().c_str(), &is_compile_good, &data->module, 2, "OP_T_1D", block_sizes[0], "BLOCK_SIZE",
14631463
block_sizes[0] * block_sizes[1] * block_sizes[2]));
14641464
if (is_compile_good) {
14651465
*is_good_build = true;

backends/hip-shared/ceed-hip-shared-basis.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ static int CeedBasisApplyAtPointsCore_Hip_shared(CeedBasis basis, bool apply_add
354354

355355
if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints));
356356
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
357-
CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "T_1D",
357+
CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_T_1D",
358358
CeedIntMax(Q_1d, P_1d), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim),
359359
"BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", max_num_points, "BASIS_INTERP_BLOCK_SIZE",
360360
data->block_sizes[0]));
@@ -692,7 +692,7 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
692692
// Compile basis kernels
693693
const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/hip/hip-shared-basis-tensor.h>\n";
694694

695-
CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 11, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "T_1D",
695+
CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 11, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_T_1D",
696696
CeedIntMax(Q_1d, P_1d), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim),
697697
"BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_INTERP_BLOCK_SIZE", data->block_sizes[0], "BASIS_GRAD_BLOCK_SIZE",
698698
data->block_sizes[1], "BASIS_WEIGHT_BLOCK_SIZE", data->block_sizes[2], "BASIS_HAS_COLLOCATED_GRAD",
@@ -768,7 +768,7 @@ int CeedBasisCreateH1_Hip_shared(CeedElemTopology topo, CeedInt dim, CeedInt num
768768

769769
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
770770
CeedCallBackend(ComputeBasisThreadBlockSizes(dim, num_nodes, num_qpts, num_comp, data->block_sizes));
771-
CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 6, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "T_1D",
771+
CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 6, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_T_1D",
772772
CeedIntMax(num_qpts, num_nodes), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_INTERP_BLOCK_SIZE",
773773
data->block_sizes[0]));
774774
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));

0 commit comments

Comments
 (0)