|
| 1 | +#ifndef TMVA_SOFIE_ROPERATOR_GatherND |
| 2 | +#define TMVA_SOFIE_ROPERATOR_GatherND |
| 3 | + |
| 4 | +#include "TMVA/SOFIE_common.hxx" |
| 5 | +#include "TMVA/ROperator.hxx" |
| 6 | +#include "TMVA/RModel.hxx" |
| 7 | + |
| 8 | +#include <sstream> |
| 9 | +#include <stdexcept> |
| 10 | +#include <string> |
| 11 | + |
| 12 | +namespace TMVA{ |
| 13 | +namespace Experimental{ |
| 14 | +namespace SOFIE{ |
| 15 | + |
| 16 | +class ROperator_GatherND final : public ROperator |
| 17 | +{ |
| 18 | +private: |
| 19 | + |
| 20 | + size_t fBatchDims = 0; |
| 21 | + std::string fNX; |
| 22 | + std::string fNIndices; |
| 23 | + std::string fNY; |
| 24 | + |
| 25 | + std::vector<Dim> fShapeX; |
| 26 | + std::vector<Dim> fShapeIndices; |
| 27 | + std::vector<Dim> fShapeY; |
| 28 | + |
| 29 | + std::vector<int64_t> fIndices; // indices vector in case they are known at initialization |
| 30 | + |
| 31 | + std::string fType; |
| 32 | + |
| 33 | +public: |
| 34 | + ROperator_GatherND(){} |
| 35 | + ROperator_GatherND(int batch_dims, std::string nameX, std::string nameIndices, std::string nameY): |
| 36 | + fBatchDims(batch_dims), fNX(UTILITY::Clean_name(nameX)), fNIndices(UTILITY::Clean_name(nameIndices)), fNY(UTILITY::Clean_name(nameY)) { |
| 37 | + fInputTensorNames = { fNX, fNIndices }; |
| 38 | + fOutputTensorNames = { fNY }; |
| 39 | + } |
| 40 | + |
| 41 | + void Initialize(RModel& model) override { |
| 42 | + if (!model.CheckIfTensorAlreadyExist(fNX)) { |
| 43 | + throw std::runtime_error("TMVA SOFIE GatherND Op Input Tensor " + fNX + " is not found in model"); |
| 44 | + } |
| 45 | + fShapeX = model.GetDimTensorShape(fNX); |
| 46 | + if (model.Verbose()) |
| 47 | + std::cout << "GatherND - initial shape " << ConvertShapeToString(fShapeX) << " shape of indices " |
| 48 | + << ConvertShapeToString(model.GetDimTensorShape(fNIndices)) << std::endl; |
| 49 | + // fShapeIndices can be dynamic |
| 50 | + fShapeIndices = model.GetDimTensorShape(fNIndices); |
| 51 | + size_t q = fShapeIndices.size(); |
| 52 | + // Axis in range [0, r) where r=rank(X) |
| 53 | + size_t r = fShapeX.size(); |
| 54 | + |
| 55 | + if (q < 1) { |
| 56 | + throw std::runtime_error("TMVA SOFIE GatherND : rank of Indices is < 1"); |
| 57 | + } |
| 58 | + if (r < 1) { |
| 59 | + throw std::runtime_error("TMVA SOFIE GatherND : rank of input tensor is < 1"); |
| 60 | + } |
| 61 | + if (fBatchDims >= std::min(q,r)) { |
| 62 | + throw std::runtime_error("TMVA SOFIE GatherND : invalid batch dim value"); |
| 63 | + } |
| 64 | + if (fBatchDims > 0) { |
| 65 | + for (size_t i = 0; i < fBatchDims; i++) { |
| 66 | + if (fShapeX[i] != fShapeIndices[i]) { |
| 67 | + std::cout << " input shape " << ConvertShapeToString(fShapeX) << " " |
| 68 | + << " index shape " << ConvertShapeToString(fShapeIndices) << std::endl; |
| 69 | + throw std::runtime_error("TMVA SOFIE GatherND : invalid input or index shape for " + std::to_string(i)); |
| 70 | + } |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + //general case. Assumption is that last dimension of index shape is known (is not dynamic) |
| 75 | + if (fShapeIndices.back().isParam) |
| 76 | + throw std::runtime_error("TMVA SOFIE GatherND : Index_shape(-1) is not known"); |
| 77 | + |
| 78 | + // output shape size (output rank) |
| 79 | + // is (q-1)+r -index_shape[-1] |
| 80 | + size_t last_index_shape = fShapeIndices.back().dim; |
| 81 | + if (last_index_shape < 1 || last_index_shape > r - fBatchDims) { |
| 82 | + throw std::runtime_error("TMVA SOFIE GatherND : Index_shape(-1) has wrong value " + |
| 83 | + std::to_string(last_index_shape)); |
| 84 | + } |
| 85 | + |
| 86 | + size_t output_rank = r + q -1 - last_index_shape - fBatchDims; |
| 87 | + //fShapeY.resize(output_rank); |
| 88 | + // first index shape dimensions are same in output |
| 89 | + fShapeY = std::vector<Dim>(fShapeIndices.begin(), fShapeIndices.end() - 1); |
| 90 | + fShapeY.insert(fShapeY.end(), fShapeX.begin() + fBatchDims + last_index_shape, fShapeX.end()); |
| 91 | + if (fShapeY.size() != output_rank) { |
| 92 | + std::cout << " input shape " << ConvertShapeToString(fShapeX) << " " |
| 93 | + << " index shape " << ConvertShapeToString(fShapeIndices) |
| 94 | + << " output shape " << ConvertShapeToString(fShapeY) |
| 95 | + << " and output rank should be " << output_rank << std::endl; |
| 96 | + throw std::runtime_error("TMVA SOFIE GatherND : Something is wrong in initialization "); |
| 97 | + } |
| 98 | + |
| 99 | + if (!fIsOutputConstant) { |
| 100 | + // Add output tensor |
| 101 | + model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); |
| 102 | + fType = ConvertTypeToString(model.GetTensorType(fNX)); |
| 103 | + if (model.Verbose()) |
| 104 | + std::cout << "GatherND: input " << fNX << " " << ConvertShapeToString(fShapeX) << " indices " << fNIndices << ConvertShapeToString(fShapeIndices) |
| 105 | + << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) << std::endl; |
| 106 | + } |
| 107 | + |
| 108 | + |
| 109 | + |
| 110 | + // // case indices tensor is initialized |
| 111 | + // if (model.IsInitializedTensor(fNIndices)) { |
| 112 | + // // empty shape Indices is a scalar value for the indices |
| 113 | + // size_t indicesLength = ConvertShapeToLength(model.GetTensorShape(fNIndices)); |
| 114 | + // int64_t* indicesData = static_cast<int64_t*>(model.GetInitializedTensorData(fNIndices).get()); |
| 115 | + // //flag index tensor as not writable (not sure this is needed since index tensor might be used in generated code) |
| 116 | + // model.SetNotWritableInitializedTensor(fNIndices); |
| 117 | + // // update indices data in case of negative dim values |
| 118 | + // for (size_t i = 0; i < indicesLength; i++) { |
| 119 | + // // move this at generation time? |
| 120 | + // if (!fShapeX[fAttrAxis].isParam) { |
| 121 | + // if (indicesData[i] < 0) { |
| 122 | + // indicesData[i] += fShapeX[fAttrAxis].dim; |
| 123 | + // } |
| 124 | + // } |
| 125 | + // } |
| 126 | + // // Save in a vector GatherND Indices of size q |
| 127 | + // fIndices = std::vector<int64_t>(indicesData, indicesData + indicesLength); |
| 128 | + // } |
| 129 | + |
| 130 | + // case input is known (type is an integer) and input indices is a scalar (or vector of size 1) |
| 131 | + // if (model.IsInitializedTensor(fNX) && q <= 1 && r == 1 && fIndices.size() > 0) { |
| 132 | + // auto shapeX = ConvertShapeToInt(fShapeX); // we assume model is not dynamic |
| 133 | + // auto shapeY = ConvertShapeToInt(fShapeY); |
| 134 | + // if (model.GetTensorType(fNX) == ETensorType::INT64) { |
| 135 | + // auto inputData = static_cast<int64_t*>(model.GetInitializedTensorData(fNX).get()); |
| 136 | + // // if q <=1 and r = 1 output length = 1 (it is a scalar) |
| 137 | + // std::vector<int64_t> outputData(1); //ConvertShapeToLength(shapeY)); |
| 138 | + // outputData[0] = inputData[fIndices[0]]; |
| 139 | + // model.AddConstantTensor(fNY, shapeY, outputData.data()); |
| 140 | + // if (model.Verbose()) |
| 141 | + // std::cout << "GatherND: " << fNX << " " << ConvertShapeToString(shapeX) << " -> " << fNY << " with shape " << ConvertShapeToString(shapeY) |
| 142 | + // << " and values " << ConvertValuesToString(outputData) << " (constant) " << std::endl; |
| 143 | + // fIsOutputConstant = true; |
| 144 | + // } |
| 145 | + // } |
| 146 | + // // case input is a shape tensor (r is == 1 by definition) and indices are known |
| 147 | + // else if (model.IsShapeTensor(fNX) && q <=1 && fIndices.size() > 0) { |
| 148 | + // auto inputData = model.GetShapeTensorValues(fNX); |
| 149 | + // // if r == 1 and q<=1 then output length is 1 (is a scalar or tensor of size1) |
| 150 | + // std::vector<Dim> outputData(1); |
| 151 | + // outputData[0] = inputData[fIndices[0]]; |
| 152 | + // if (outputData[0].isParam) { |
| 153 | + // fIsOutputConstant = true; |
| 154 | + // // shapeY can be scalar or vector of size1 |
| 155 | + // model.AddShapeTensor(fNY, outputData, fShapeY.size() == 0); |
| 156 | + // if (model.Verbose()) |
| 157 | + // std::cout << "GatherND: " << fNX << " " << ConvertShapeToString(fShapeX) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) |
| 158 | + // << " and values " << ConvertShapeToString(outputData) << " (shape) " << std::endl; |
| 159 | + // } else { |
| 160 | + // int64_t value = static_cast<int64_t>(outputData[0].dim); |
| 161 | + // auto shapeY = ConvertShapeToInt(fShapeY); |
| 162 | + // model.AddConstantTensor(fNY, shapeY, &value); |
| 163 | + // fIsOutputConstant = true; |
| 164 | + // if (model.Verbose()) |
| 165 | + // std::cout << "GatherND: " << fNX << " " << ConvertShapeToString(fShapeX) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) |
| 166 | + // << " and values {" << value << "} (constant) " << std::endl; |
| 167 | + // } |
| 168 | + // } |
| 169 | + |
| 170 | + } |
| 171 | + |
| 172 | + std::string Generate(std::string opName) override { |
| 173 | + if (fIsOutputConstant) { |
| 174 | + // no code to generate here for constant output. Tensor output is defined in Session constructor |
| 175 | + return "//---------------------------------------\n"; |
| 176 | + } |
| 177 | + opName = "op_" + opName; |
| 178 | + std::stringstream out; |
| 179 | + out << "//--------- GatherND " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n"; |
| 180 | + // The shape of the output is q + r - 1 |
| 181 | + size_t r = fShapeX.size(); |
| 182 | + // Indices of shape q |
| 183 | + size_t q = fShapeIndices.size(); |
| 184 | + // Strides |
| 185 | + auto stridesX = UTILITY::ComputeStrideFromShape(fShapeX); |
| 186 | + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); |
| 187 | + auto stridesIndices = UTILITY::ComputeStrideFromShape(fShapeIndices); |
| 188 | + |
| 189 | + // case input_index_shape == rank of input |
| 190 | + size_t ss = fShapeIndices.back().dim; |
| 191 | + |
| 192 | + // check for negative indices |
| 193 | + auto indicesLength = ConvertDimShapeToLength(fShapeIndices); |
| 194 | + out << SP << "for (size_t i = 0; i < " << indicesLength << "; i++) {\n"; |
| 195 | + out << SP << SP << "if (tensor_" << fNIndices << "[i] < 0 ) {\n"; |
| 196 | + // corresponding input shape is i % strides[N-1] |
| 197 | + out << SP << SP << SP << "size_t s_i = " << fShapeX[fBatchDims] << ";\n"; |
| 198 | + for (size_t j = 1; j < ss; j++) { |
| 199 | + out << SP << SP << SP << "if (i % " << ss << " == " << j << ") s_i = " << fShapeX[fBatchDims+j] << ";\n"; |
| 200 | + } |
| 201 | + out << SP << SP << SP << "const_cast<int64_t &>(tensor_" << fNIndices << "[i]) += s_i;\n"; |
| 202 | + out << SP << SP << "}\n"; |
| 203 | + out << SP << "}\n"; |
| 204 | + // loop on batch dims |
| 205 | + std::string outIndex; |
| 206 | + std::string inIndex; |
| 207 | + std::string idIndex; |
| 208 | + for (size_t j = 0; j < fBatchDims; j++) { |
| 209 | + std::string index = "i_" + std::to_string(j); |
| 210 | + for (size_t k = 0; k <= j; k++) |
| 211 | + out << SP; |
| 212 | + out << "for (size_t " << index << " = 0; " << index << " < " << fShapeY[j] << "; " << index << "++) {\n"; |
| 213 | + if (j > 0) { |
| 214 | + outIndex += " + "; |
| 215 | + inIndex += " + "; |
| 216 | + idIndex += " + "; |
| 217 | + } |
| 218 | + outIndex += index; |
| 219 | + if (stridesY[j].GetVal() != "1") |
| 220 | + outIndex += " * " + stridesY[j].GetVal(); |
| 221 | + inIndex += index; |
| 222 | + if (stridesX[j].GetVal() != "1") |
| 223 | + inIndex += " * " + stridesX[j].GetVal(); |
| 224 | + idIndex += index; |
| 225 | + if (stridesIndices[j].GetVal() != "1") |
| 226 | + idIndex += " * " + stridesIndices[j].GetVal(); |
| 227 | + } |
| 228 | + // loop between b and q-1 |
| 229 | + for (size_t j = fBatchDims; j < q - 1; j++) { |
| 230 | + std::string index = "i_" + std::to_string(j); |
| 231 | + for (size_t k = 0; k <= j; k++) out << SP; |
| 232 | + out << "for (size_t " << index << " = 0; " << index << " < " << fShapeY[j] << "; " << index << "++) {\n"; |
| 233 | + if (j > 0) { |
| 234 | + outIndex += " + "; |
| 235 | + idIndex += " + "; |
| 236 | + } |
| 237 | + outIndex += index; |
| 238 | + if (stridesY[j].GetVal() != "1") |
| 239 | + outIndex += " * " + stridesY[j].GetVal(); |
| 240 | + idIndex += index; |
| 241 | + if (stridesIndices[j].GetVal() != "1") |
| 242 | + idIndex += " * " + stridesIndices[j].GetVal(); |
| 243 | + } |
| 244 | + for (size_t k = 0; k <= q - 1; k++) out << SP; |
| 245 | + out << "size_t inputIndex = " << inIndex; |
| 246 | + std::string indexIndex = idIndex; |
| 247 | + for (size_t l = 0; l < ss; l++) { |
| 248 | + if (l > 0) |
| 249 | + indexIndex = idIndex + " + " + std::to_string(l); |
| 250 | + // compute input index using index tensors |
| 251 | + if (!indexIndex.empty() || l>0) |
| 252 | + out << " + "; |
| 253 | + out << "tensor_" << fNIndices << "[" << indexIndex << "]"; |
| 254 | + if (stridesX[fBatchDims + l].GetVal() != "1") out |
| 255 | + << " * " << stridesX[fBatchDims + l]; |
| 256 | + } |
| 257 | + out << ";\n"; |
| 258 | + for (size_t k = 0; k <= q - 1; k++) out << SP; |
| 259 | + // case slice is a scalar |
| 260 | + if (ss == r - fBatchDims) { |
| 261 | + out << "tensor_" << fNY << "[" << outIndex << "] = " |
| 262 | + << "tensor_" << fNX << "[inputIndex];\n"; |
| 263 | + } else { |
| 264 | + // we make a copy of slice |
| 265 | + out << "std::copy(tensor_" << fNX << " + inputIndex, tensor_" << fNX << " + inputIndex + " |
| 266 | + << stridesX[fBatchDims + ss - 1] << "," |
| 267 | + << "tensor_" << fNY << "+" << outIndex << ");\n"; |
| 268 | + } |
| 269 | + // close the loops |
| 270 | + |
| 271 | + // end loops j_k, j_{k + 1}, ..., j_{r - 2} |
| 272 | + for (size_t j = q-1; j > 0; j--) { |
| 273 | + for (size_t k = 0; k <j; k++) out << SP; |
| 274 | + out << "}\n"; |
| 275 | + } |
| 276 | + |
| 277 | + return out.str(); |
| 278 | + } |
| 279 | + |
| 280 | +}; |
| 281 | + |
| 282 | +}//SOFIE |
| 283 | +}//Experimental |
| 284 | +}//TMVA |
| 285 | + |
| 286 | + |
| 287 | +#endif //TMVA_SOFIE_ROPERATOR_RELU |
0 commit comments