Skip to content

Commit 07223cd

Browse files
sanjibansglmoneta
authored andcommitted
feat: on-the-fly broadcasting
1 parent 495a50c commit 07223cd

File tree

5 files changed

+143
-70
lines changed

5 files changed

+143
-70
lines changed

tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public:
144144
fDimShapeA = ConvertShapeToDim(fShapeA);
145145
} else {
146146
// Add an intermediate tensor for broadcasting A
147-
model.AddIntermediateTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY);
147+
// model.AddIntermediateTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY);
148148
}
149149
}
150150
// Broadcast B to Y
@@ -170,7 +170,7 @@ public:
170170
fDimShapeB = ConvertShapeToDim(fShapeB);
171171
} else {
172172
// Add an intermediate tensor for broadcasting B
173-
model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY);
173+
// model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY);
174174
}
175175
}
176176
} else {
@@ -281,6 +281,52 @@ public:
281281
out << SP << "\n//------ " << BinaryOperatorTrait<T,Op>::Name() << "\n";
282282
auto length = ConvertDimShapeToLength(fDimShapeY);
283283
std::string typeName = TensorType<T>::Name();
284+
285+
286+
// non dynamic case
287+
if (!fShapeA.empty() && !fShapeB.empty() && !fShapeY.empty() ) {
288+
289+
auto stridesA = UTILITY::ComputeStrideFromShape(fShapeA);
290+
auto stridesB = UTILITY::ComputeStrideFromShape(fShapeB);
291+
auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY);
292+
293+
std::string compute_idx_A, compute_idx_B, compute_idx_Y;
294+
if (std::all_of(fShapeA.begin(), fShapeA.end(), [](size_t x) { return x == 1; })){
295+
compute_idx_A = "0";
296+
} else {
297+
for(size_t i = 0; i<fShapeA.size(); ++i){
298+
if(fShapeA[i]==1) continue;
299+
compute_idx_A += " idx_"+fNY+std::to_string(i+(fShapeY.size()-fShapeA.size()))+" * "+stridesA[i]+" +";
300+
}
301+
compute_idx_A.pop_back();
302+
}
303+
if (std::all_of(fShapeB.begin(), fShapeB.end(), [](size_t x) { return x == 1; })){
304+
compute_idx_B = "0";
305+
} else {
306+
for(size_t i = 0; i<fShapeB.size(); ++i){
307+
if(fShapeB[i]==1) continue;
308+
compute_idx_B += " idx_"+fNY+std::to_string(i+(fShapeY.size()-fShapeB.size()))+" * "+stridesB[i]+" +";
309+
}
310+
compute_idx_B.pop_back();
311+
}
312+
313+
for(size_t j = 0; j<fShapeY.size(); ++j){
314+
out << SP << "size_t "<<"idx_"<<fNY<<j<<";\n";
315+
}
316+
out << SP << "for(size_t idx = 0; idx < " << length << "; ++idx){\n";
317+
out<< SP << SP << "idx_"<<fNY<<"0 = idx / " << stridesY[0]<<";\n";
318+
compute_idx_Y += "idx_"+fNY+"0 * " + std::to_string(stridesY[0]);
319+
std::string modulo_op = "idx % " + std::to_string(stridesY[0]);
320+
for(size_t j = 1; j<fShapeY.size(); ++j){
321+
322+
out << SP << SP << "idx_"<<fNY<<j<<" = ("<<modulo_op<<") / "<<stridesY[j]<<";\n";
323+
modulo_op += "% " + std::to_string(stridesY[j]);
324+
compute_idx_Y = "idx_"+fNY+std::to_string(j)+" * "+std::to_string(stridesY[j])+" + "+compute_idx_Y;
325+
}
326+
out << SP << SP << "tensor_" << fNY <<"["<<compute_idx_Y<<"] = "<<BinaryOperatorTrait<T,Op>::Op("tensor_"+ fNA + "["+compute_idx_A+"]", "tensor_"+ fNB + "["+compute_idx_B+"]")<<" ;\n";
327+
328+
} else {
329+
// dynamic case with broadcasting
284330
// we need to check if we can broadcast (case flag has bit 4 set)
285331
if (fBroadcastFlag & 4) {
286332
// need to check if shapes are the same
@@ -337,6 +383,8 @@ public:
337383
out << SP << SP << "tensor_" << fNY << "[id] = "
338384
<< BinaryOperatorTrait<T,Op>::Op( "tensor_" + nameA + "[id]" , "tensor_" + nameB + "[id]")
339385
<< " ;\n";
386+
}
387+
340388
out << SP << "}\n";
341389
return out.str();
342390
}

tmva/sofie/inc/TMVA/ROperator_Comparision.hxx

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,6 @@ public:
131131
// Update the data and the shape of A
132132
model.UpdateInitializedTensor(fNX1, model.GetTensorType(fNX1), fShapeY, broadcastedData);
133133
fShapeX1 = fShapeY;
134-
} else {
135-
// Add an intermediate tensor for broadcasting A
136-
fNBroadcastedX1 = "Broadcasted" + fNX1;
137-
model.AddIntermediateTensor(fNBroadcastedX1, model.GetTensorType(fNX1), fShapeY);
138134
}
139135
}
140136
// Broadcast B to Y
@@ -147,10 +143,6 @@ public:
147143
// Update the data and the shape of B
148144
model.UpdateInitializedTensor(fNX2, model.GetTensorType(fNX2), fShapeY, broadcastedData);
149145
fShapeX2 = fShapeY;
150-
} else {
151-
// Add an intermediate tensor for broadcasting B
152-
fNBroadcastedX2 = "Broadcasted" + fNX2;
153-
model.AddIntermediateTensor(fNBroadcastedX2, model.GetTensorType(fNX2), fShapeY);
154146
}
155147
}
156148
} else {
@@ -264,32 +256,47 @@ public:
264256
out << SP << "\n//------ " << ComparisionTrait<T,Op>::Name() << " " << opName
265257
<< " --> " << ConvertShapeToString(fShapeY) << "\n";
266258
size_t length = ConvertShapeToLength(fShapeY);
267-
// Broadcast A if it's uninitialized
268-
if (!fNBroadcastedX1.empty()) {
269-
std::string type1 = ConvertTypeToString(fTensorType1);
270-
out << SP << "// Broadcasting uninitialized tensor " << fNX1 << "\n";
271-
out << SP << "{\n";
272-
out << SP << SP << type1 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type1 << ">(tensor_" << fNX1 << ", " << ConvertShapeToString(fShapeX1) << ", " << ConvertShapeToString(fShapeY) << ");\n";
273-
out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX1 << ");\n";
274-
out << SP << SP << "delete[] data;\n";
275-
out << SP << "}\n";
259+
260+
auto stridesX1 = UTILITY::ComputeStrideFromShape(fShapeX1);
261+
auto stridesX2 = UTILITY::ComputeStrideFromShape(fShapeX2);
262+
auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY);
263+
264+
std::string compute_idx_X1, compute_idx_X2, compute_idx_Y;
265+
if (std::all_of(fShapeX1.begin(), fShapeX1.end(), [](size_t x) { return x == 1; })){
266+
compute_idx_X1 = "0";
267+
} else {
268+
for(size_t i = 0; i<fShapeX1.size(); ++i){
269+
if(fShapeX1[i]==1) continue;
270+
compute_idx_X1 += " idx_"+fNY+std::to_string(i+(fShapeY.size()-fShapeX1.size()))+" * "+stridesX1[i]+" +";
271+
}
272+
compute_idx_X1.pop_back();
273+
}
274+
if (std::all_of(fShapeX2.begin(), fShapeX2.end(), [](size_t x) { return x == 1; })){
275+
compute_idx_X2 = "0";
276+
} else {
277+
for(size_t i = 0; i<fShapeX2.size(); ++i){
278+
if(fShapeX2[i]==1) continue;
279+
compute_idx_X2 += " idx_"+fNY+std::to_string(i+(fShapeY.size()-fShapeX2.size()))+" * "+stridesX2[i]+" +";
280+
}
281+
compute_idx_X2.pop_back();
276282
}
277-
// Broadcast B if it's uninitialized
278-
if (!fNBroadcastedX2.empty()) {
279-
std::string type2 = ConvertTypeToString(fTensorType2);
280-
out << SP << "// Broadcasting uninitialized tensor " << fNX2 << "\n";
281-
out << SP << "{\n";
282-
out << SP << SP << type2 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type2 << ">(tensor_" << fNX2 << ", " << ConvertShapeToString(fShapeX2) << ", " << ConvertShapeToString(fShapeY) << ");\n";
283-
out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX2 << ");\n";
284-
out << SP << SP << "delete[] data;\n";
285-
out << SP << "}\n";
283+
284+
for(size_t j = 0; j<fShapeY.size(); ++j){
285+
out << SP << "size_t "<<"idx_"<<fNY<<j<<";\n";
286286
}
287-
const std::string& nameX1 = fNBroadcastedX1.empty()? fNX1 : fNBroadcastedX1;
288-
const std::string& nameX2 = fNBroadcastedX2.empty()? fNX2 : fNBroadcastedX2;
289-
290-
out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n";
291-
out << SP << SP << "fTensor_" << fNY << "[id] = " << ComparisionTrait<T,Op>::Op( "tensor_" + nameX1 + "[id]" , "tensor_" + nameX2 + "[id]") << " ;\n";
287+
out << SP << "for(size_t idx = 0; idx < " << length << "; ++idx){\n";
288+
out<< SP << SP << "idx_"<<fNY<<"0 = idx / " << stridesY[0]<<";\n";
289+
compute_idx_Y += "idx_"+fNY+"0 * " + std::to_string(stridesY[0]);
290+
std::string modulo_op = "idx % " + std::to_string(stridesY[0]);
291+
for(size_t j = 1; j<fShapeY.size(); ++j){
292+
293+
out << SP << SP << "idx_"<<fNY<<j<<" = ("<<modulo_op<<") / "<<stridesY[j]<<";\n";
294+
modulo_op += "% " + std::to_string(stridesY[j]);
295+
compute_idx_Y = "idx_"+fNY+std::to_string(j)+" * "+std::to_string(stridesY[j])+" + "+compute_idx_Y;
296+
}
297+
out << SP << SP << "tensor_" << fNY <<"["<<compute_idx_Y<<"] = "<<ComparisionTrait<T,Op>::Op("tensor_"+ fNX1 + "["+compute_idx_X1+"]", "tensor_"+ fNX2 + "["+compute_idx_X2+"]")<<" ;\n";
292298
out << SP << "}\n";
299+
293300
// since output is a boolean need to add the tensor_xxx variable since it is not defined as a pointer to a boolean std::vector
294301
if (!fIsModelOutput)
295302
out << SP << "const std::vector<std::uint8_t> & tensor_" << fNY << " = fTensor_" << fNY << ";\n";

tmva/sofie/inc/TMVA/ROperator_Conv.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ public:
294294
// we need to add a new intermediate tensor for broadcasted bias tensor
295295
fNB2 = fNB + "bcast";
296296
model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape);
297+
fOutputTensorNames.emplace_back(fNB2);
297298
}
298299
}
299300
}

tmva/sofie/inc/TMVA/ROperator_Gemm.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ namespace SOFIE{
251251
fNC2 = fNC + "bcast";
252252
if (!fIsDynamic) {
253253
model.AddIntermediateTensor(fNC2, model.GetTensorType(fNC), shapeY);
254+
fOutputTensorNames.emplace_back(fNC2);
254255
}
255256
else
256257
model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY);

tmva/sofie/inc/TMVA/ROperator_Where.hxx

Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,6 @@ public:
110110
// Update the data and the shape of A
111111
model.AddConstantTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY, broadcastedData);
112112
fShapeA = fShapeY;
113-
} else {
114-
// Add an intermediate tensor for broadcasting A
115-
model.AddIntermediateTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY);
116113
}
117114
}
118115
// Broadcast B to Y
@@ -126,9 +123,6 @@ public:
126123
// do not update tensor B but add broadcasted one (since it can be input to some other operators)
127124
model.AddConstantTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY, broadcastedData);
128125
fShapeB = fShapeY;
129-
} else {
130-
// Add an intermediate tensor for broadcasting B
131-
model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY);
132126
}
133127
}
134128
// Broadcast C to Y
@@ -142,9 +136,6 @@ public:
142136
// do not update tensor C but add broadcasted one (since it can be input to some other operators)
143137
model.AddConstantTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeY, broadcastedData);
144138
fShapeC = fShapeY;
145-
} else {
146-
// Add an intermediate tensor for broadcasting B
147-
model.AddIntermediateTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeY);
148139
}
149140
}
150141
} else {
@@ -252,39 +243,64 @@ public:
252243
out << SP << "\n//-------- Where " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n";
253244
size_t length = ConvertShapeToLength(fShapeY);
254245
std::string typeName = TensorType<T>::Name();
255-
// Broadcast A if it's uninitialized
256-
if (fShapeA != fShapeY) {
257-
out << SP << "// Broadcasting uninitialized tensor " << fNA << "\n";
258-
//out << SP << "{\n";
259-
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << typeName << ">(tensor_" << fNA << ", " << ConvertShapeToString(fShapeA) << ", " << ConvertShapeToString(fShapeY)
260-
<< ", fTensor_" << fNBroadcastedA << ");\n";
246+
247+
auto stridesA = UTILITY::ComputeStrideFromShape(fShapeA);
248+
auto stridesB = UTILITY::ComputeStrideFromShape(fShapeB);
249+
auto stridesC = UTILITY::ComputeStrideFromShape(fShapeC);
250+
auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY);
251+
252+
std::string compute_idx_A, compute_idx_B, compute_idx_C, compute_idx_Y;
253+
254+
if (std::all_of(fShapeA.begin(), fShapeA.end(), [](size_t x) { return x == 1; })){
255+
compute_idx_A = "0";
256+
} else {
257+
for(size_t i = 0; i<fShapeA.size(); ++i){
258+
if(fShapeA[i]==1) continue;
259+
compute_idx_A += " idx_"+fNY+std::to_string(i+(fShapeY.size()-fShapeA.size()))+" * "+stridesA[i]+" +";
260+
}
261+
compute_idx_A.pop_back();
261262
}
262-
// Broadcast B if it's uninitialized
263-
if (fShapeB != fShapeY) {
264-
out << SP << "// Broadcasting uninitialized tensor " << fNB << "\n";
265-
//out << SP << "{\n";
266-
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << typeName << ">(tensor_" << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeY)
267-
<< ", fTensor_" << fNBroadcastedB << ");\n";
263+
if (std::all_of(fShapeB.begin(), fShapeB.end(), [](size_t x) { return x == 1; })){
264+
compute_idx_B = "0";
265+
} else {
266+
for(size_t i = 0; i<fShapeB.size(); ++i){
267+
if(fShapeB[i]==1) continue;
268+
compute_idx_B += " idx_"+fNY+std::to_string(i+(fShapeY.size()-fShapeB.size()))+" * "+stridesB[i]+" +";
269+
}
270+
compute_idx_B.pop_back();
268271
}
269-
// Broadcast C if it's uninitialized
270-
if (fShapeC != fShapeY) {
271-
// special case if C is an input tensor
272-
if (fIsInputBoolTensor) {
273-
size_t inputLength = ConvertShapeToLength(fShapeC);
274-
out << SP << "std::vector<std::uint8_t> fTensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n";
272+
if (std::all_of(fShapeC.begin(), fShapeC.end(), [](size_t x) { return x == 1; })){
273+
compute_idx_C = "0";
274+
} else {
275+
for(size_t i = 0; i<fShapeC.size(); ++i){
276+
if(fShapeC[i]==1) continue;
277+
compute_idx_C += " idx_"+fNY+std::to_string(i+(fShapeY.size()-fShapeC.size()))+" * "+stridesC[i]+" +";
275278
}
276-
out << SP << "// Broadcasting uninitialized tensor " << fNC << "\n";
277-
//out << SP << "{\n";
278-
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<std::uint8_t>(fTensor_" << fNC << ".data(), " << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY)
279-
<< ", fTensor_" << fNBroadcastedC << ");\n";
279+
compute_idx_C.pop_back();
280+
}
281+
282+
if (fIsInputBoolTensor) {
283+
size_t inputLength = ConvertShapeToLength(fShapeC);
284+
out << SP << "std::vector<bool> fTensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n";
280285
}
281-
std::string nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA;
282-
std::string nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB;
283-
std::string nameC = fNBroadcastedC.empty()? fNC : fNBroadcastedC;
284-
out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n";
286+
287+
for(size_t j = 0; j<fShapeY.size(); ++j){
288+
out << SP << "size_t "<< fNY << "idx_"<<j<<";\n";
289+
}
290+
out << SP << "for(size_t idx = 0; idx < " << length << "; ++idx){\n";
291+
out<< SP << SP << "idx_"<<fNY<<"0 = idx / " << stridesY[0]<<";\n";
292+
compute_idx_Y += "idx_"+fNY+"0 * " + std::to_string(stridesY[0]);
293+
std::string modulo_op = "idx % " + std::to_string(stridesY[0]);
294+
for(size_t j = 1; j<fShapeY.size(); ++j){
295+
296+
out << SP << SP << "idx_"<<fNY<<j<<" = ("<<modulo_op<<") / "<<stridesY[j]<<";\n";
297+
modulo_op += "% " + std::to_string(stridesY[j]);
298+
compute_idx_Y = "idx_"+fNY+std::to_string(j)+" * "+std::to_string(stridesY[j])+" + "+compute_idx_Y;
299+
}
300+
285301
// get output tensor applying condition
286-
out << SP << SP << "tensor_" << fNY << "[id] = " << "(fTensor_" << nameC << "[id]) ? tensor_"
287-
<< nameA << "[id] : tensor_" + nameB + "[id];\n";
302+
out << SP << SP << "tensor_" << fNY << "["<<compute_idx_Y<<"] = " << "(fTensor_" << fNC << "["<<compute_idx_C<<"]) ? tensor_"
303+
<< fNA << "["<<compute_idx_A<<"] : tensor_" + fNB + "["<<compute_idx_B<<"];\n";
288304
out << SP << "}\n";
289305
return out.str();
290306
}

0 commit comments

Comments
 (0)