cleanup

kpedro88 · kpedro88 · commit c84fcba195a8 · 2020-07-30T17:09:56.000-05:00
diff --git a/TensorRT/interface/TRTClientFPGA.h b/TensorRT/interface/TRTClientFPGA.h
@@ -55,7 +55,7 @@ class TRTClientFPGA : public Client {
 		unsigned ninput_;
 		unsigned noutput_;
 		std::unique_ptr<nic::InferContext> context_;
-		std::shared_ptr<nic::InferContext::Input> nicinput_; 
+		std::shared_ptr<nic::InferContext::Input> nicinput_;
 };
 typedef TRTClientFPGA<SonicClientSync<std::vector<unsigned short>>> TRTClientFPGASync;
 typedef TRTClientFPGA<SonicClientPseudoAsync<std::vector<unsigned short>>> TRTClientFPGAPseudoAsync;
diff --git a/TensorRT/plugins/HcalProducerFPGA.cc b/TensorRT/plugins/HcalProducerFPGA.cc
@@ -3,6 +3,7 @@
 #include <sstream>
 #include <string>
 #include <fstream>
+#include <cmath>
 
 #include "SonicCMS/Core/interface/SonicEDProducer.h"
 #include "SonicCMS/TensorRT/interface/TRTClientFPGA.h"
@@ -22,61 +23,23 @@ class HcalProducerFPGA : public SonicEDProducer<Client>
 		//needed because base class has dependent scope
 		using typename SonicEDProducer<Client>::Input;
 		using typename SonicEDProducer<Client>::Output;
-                //std::vector<float> fInVals; => debug
-                //std::vector<float> fPrVals;
-                //int fCount;
 		explicit HcalProducerFPGA(edm::ParameterSet const& cfg) : SonicEDProducer<Client>(cfg), topN_(cfg.getParameter<unsigned>("topN")) {
 			//for debugging
 			this->setDebugName("HcalProducerFPGA");
-			/* Validation on temporary files
-			std::ifstream lFIn("tb_input_features.dat");
-			std::ifstream lFPr("tb_output_predictions.dat");
-			std::string iline;
-			std::string pline;
-			bool hit_end = false;
-			bool valid_data = true;
-			for (int istream = 0; istream < 16000; istream++) {
-			  if (valid_data && !hit_end){
-			    if(std::getline(lFIn,iline) && std::getline(lFPr,pline)) {
-			      char* cstr=const_cast<char*>(iline.c_str());
-			      char* current;
-			      std::vector<float> in;
-			      current=strtok(cstr," ");
-			      while(current!=NULL){
-				fInVals.push_back(atof(current));
-				current=strtok(NULL," ");
-			      }
-			      cstr=const_cast<char*>(pline.c_str());
-			      current=strtok(cstr," ");
-			      while(current!=NULL){
-				fPrVals.push_back(atof(current));
-				current=strtok(NULL," ");
-			      }
-			    }
-			  }
-	                }			
-			fCount = 0;
-			*/
-			
 		}
 		void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override {
 			auto ninput = client_.ninput();
 			auto batchSize = client_.batchSize();
 			iInput = Input(ninput*batchSize, 0.f);
 			//make some random channels
-			for(unsigned ib = 0; ib < batchSize; ib++) { 
-			  //Current Hcal setup takes 11 inputs but it is sent in chunks of 16 to the FPGA 
-			  iInput[ib*ninput+0] = f_to_ui<16,6>(1);
-			  iInput[ib*ninput+1] = f_to_ui<16,6>(2);
-			  iInput[ib*ninput+3] = f_to_ui<16,6>(int(rand() % 30)-15); 
-			  iInput[ib*ninput+4] = f_to_ui<16,6>(int(rand() % 36)-36); 
-			  for(unsigned i0 = 5; i0 < 18; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(1);
-			  for(unsigned i0 = 18; i0 < ninput; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(0);
-			  /* for debug
-			  for(unsigned i0 = 0;  i0 < 11; i0++)     iInput[ib*ninput+i0] = f_to_ui<32,14>(fInVals[i0+11*fCount]);
-			  for(unsigned i0 = 11; i0 < ninput; i0++) iInput[ib*ninput+i0] = 0;
-			  fCount++;
-			  */
+			for(unsigned ib = 0; ib < batchSize; ib++) {
+				//Current Hcal setup takes 11 inputs but it is sent in chunks of 16 to the FPGA
+				iInput[ib*ninput+0] = f_to_ui<16,6>(1);
+				iInput[ib*ninput+1] = f_to_ui<16,6>(2);
+				iInput[ib*ninput+3] = f_to_ui<16,6>(int(rand() % 30)-15);
+				iInput[ib*ninput+4] = f_to_ui<16,6>(int(rand() % 36)-36);
+				for(unsigned i0 = 5; i0 < 18; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(1);
+				for(unsigned i0 = 18; i0 < ninput; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(0);
 			}
 		}
 		void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override {
@@ -119,34 +82,33 @@ class HcalProducerFPGA : public SonicEDProducer<Client>
 
 	private:
 		using SonicEDProducer<Client>::client_;
-                template<unsigned int B, unsigned int I>
-                unsigned short f_to_ui(float f) {
-                    bool isPos = f > 0.;
-                    short tmpIs = int(fabs(f));
-                    unsigned short tmpI = tmpIs;
-                    if (not isPos) {
-		      //unsigned int comp = ((unsigned int)((1<<(sizeof(unsigned int)*8-I+1))-1)<<I);
-		      unsigned short comp = ((unsigned short)((1<<(sizeof(unsigned short)*4-I+1))-1)<<I);
-		      tmpI = -tmpIs;
-		      tmpI = tmpI-comp;
-                    }
-                    float tmpF = fabs(f) - float(tmpIs);
-                    unsigned short fracs = tmpF*float(1<<(B-I));
-                    unsigned short val = (tmpI << (B-I)) + fracs;
-                    return val;
-                }
-                template<unsigned int B, unsigned int I>
-                float ui_to_f(const unsigned short ui) {
-		  unsigned short i = ui >> (B-I);
-		  unsigned short mask = (1 << (B-I))-1;
-		  unsigned short dec = ui & mask;
-		  float lDec = float(dec)/float(1 << (B-I));
-		  return float(i)+lDec;
-		} 
-                uint32_t merge(unsigned short iA,unsigned short iB) {
-		  uint32_t result = (uint32_t) iA << 16 | iB;
-		  return result;
-		} 
+		template<unsigned int B, unsigned int I>
+		unsigned short f_to_ui(float f) {
+			bool isPos = f > 0.;
+			short tmpIs = int(std::abs(f));
+			unsigned short tmpI = tmpIs;
+			if (not isPos) {
+				unsigned short comp = ((unsigned short)((1<<(sizeof(unsigned short)*4-I+1))-1)<<I);
+				tmpI = -tmpIs;
+				tmpI = tmpI-comp;
+			}
+			float tmpF = std::abs(f) - float(tmpIs);
+			unsigned short fracs = tmpF*float(1<<(B-I));
+			unsigned short val = (tmpI << (B-I)) + fracs;
+			return val;
+		}
+		template<unsigned int B, unsigned int I>
+		float ui_to_f(const unsigned short ui) {
+			unsigned short i = ui >> (B-I);
+			unsigned short mask = (1 << (B-I))-1;
+			unsigned short dec = ui & mask;
+			float lDec = float(dec)/float(1 << (B-I));
+			return float(i)+lDec;
+		}
+		uint32_t merge(unsigned short iA,unsigned short iB) {
+			uint32_t result = (uint32_t) iA << 16 | iB;
+			return result;
+		}
 		unsigned topN_;
 };
 
diff --git a/TensorRT/src/TRTClient.cc b/TensorRT/src/TRTClient.cc
@@ -30,7 +30,7 @@ TRTClient<Client>::TRTClient(const edm::ParameterSet& params) :
 
 template <typename Client>
 void TRTClient<Client>::setup() {
-	if(!fSetup) { 
+	if(!fSetup) {
 		auto err = nic::InferGrpcContext::Create(&context_, url_, modelName_, -1, false);
 		if(!err.IsOk()) throw cms::Exception("BadGrpc") << "unable to create inference context: " << err;
 
@@ -49,7 +49,7 @@ void TRTClient<Client>::setup() {
 	const std::vector<std::shared_ptr<nic::InferContext::Input>>& nicinputs = context_->Inputs();
 	nicinput_ = nicinputs[0];
 	nicinput_->Reset();
-	
+
 	auto t2 = std::chrono::high_resolution_clock::now();
 	std::vector<int64_t> input_shape;
 	for(unsigned i0 = 0; i0 < batchSize_; i0++) {
diff --git a/TensorRT/src/TRTClientFPGA.cc b/TensorRT/src/TRTClientFPGA.cc
@@ -22,51 +22,46 @@ TRTClientFPGA<Client>::TRTClientFPGA(const edm::ParameterSet& params) :
 	ninput_(params.getParameter<unsigned>("ninput")),
 	noutput_(params.getParameter<unsigned>("noutput"))
 {
-  fSetup = false;
+	fSetup = false;
 }
 
 template <typename Client>
 void TRTClientFPGA<Client>::setup() {
-  if(!fSetup) { 
-    auto err = nic::InferGrpcContext::Create(&context_, url_, modelName_, -1, false);
-    if(!err.IsOk()) throw cms::Exception("BadGrpc") << "unable to create inference context: " << err;
-    std::unique_ptr<nic::InferContext::Options> options;
-    nic::InferContext::Options::Create(&options);
-    
-    options->SetBatchSize(batchSize_);
-    for (const auto& output : context_->Outputs()) {
-      options->AddRawResult(output);
-    }
-    context_->SetRunOptions(*options);
-    fSetup = true;
-  }
-  const std::vector<std::shared_ptr<nic::InferContext::Input>>& nicinputs = context_->Inputs();
-  nicinput_ = nicinputs[0];
-  nicinput_->Reset();
-  
-  auto t2 = std::chrono::high_resolution_clock::now();
-  for(unsigned i0 = 0; i0 < batchSize_; i0++) {
-   nic::Error err1 = nicinput_->SetRaw(reinterpret_cast<const uint8_t*>(&(this->input_[i0*ninput_])), ninput_ * sizeof(unsigned short));
-   //nic::Error err1 = nicinput_->SetRaw(reinterpret_cast<const uint8_t*>(this->input_.data()), ninput_ * sizeof(unsigned short));
-  }
-  //nic::Error err1 = nicinput_->SetRaw(reinterpret_cast<const uint8_t*>(this->input_.data()), batchSize_*ninput_ * sizeof(unsigned short));
-  auto t3 = std::chrono::high_resolution_clock::now();
-  edm::LogInfo("TRTClientFPGA") << "Image array time: " << std::chrono::duration_cast<std::chrono::microseconds>(t3-t2).count();
+	if(!fSetup) {
+		auto err = nic::InferGrpcContext::Create(&context_, url_, modelName_, -1, false);
+		if(!err.IsOk()) throw cms::Exception("BadGrpc") << "unable to create inference context: " << err;
+		std::unique_ptr<nic::InferContext::Options> options;
+		nic::InferContext::Options::Create(&options);
+
+		options->SetBatchSize(batchSize_);
+		for (const auto& output : context_->Outputs()) {
+			options->AddRawResult(output);
+		}
+		context_->SetRunOptions(*options);
+		fSetup = true;
+	}
+	const std::vector<std::shared_ptr<nic::InferContext::Input>>& nicinputs = context_->Inputs();
+	nicinput_ = nicinputs[0];
+	nicinput_->Reset();
+
+	auto t2 = std::chrono::high_resolution_clock::now();
+	for(unsigned i0 = 0; i0 < batchSize_; i0++) {
+		nic::Error err1 = nicinput_->SetRaw(reinterpret_cast<const uint8_t*>(&(this->input_[i0*ninput_])), ninput_ * sizeof(unsigned short));
+	}
+	auto t3 = std::chrono::high_resolution_clock::now();
+	edm::LogInfo("TRTClientFPGA") << "Image array time: " << std::chrono::duration_cast<std::chrono::microseconds>(t3-t2).count();
 }
 
 template <typename Client>
 void TRTClientFPGA<Client>::getResults(const std::unique_ptr<nic::InferContext::Result>& result) {
 	auto t2 = std::chrono::high_resolution_clock::now();
 	unsigned short tmp=0;
 	this->output_.resize(noutput_*batchSize_,tmp);
-	//for(unsigned i0 = 0; i0 < batchSize_; i0++) { 
 	const uint8_t* r0;
 	size_t content_byte_size;
 	result->GetRaw(0, &r0, &content_byte_size);
 	const unsigned int *lVal = reinterpret_cast<const unsigned int*>(r0);
 	memcpy(this->output_.data(),&lVal[0],content_byte_size*batchSize_);
-	//for(unsigned i1 = 0; i1 < noutput_; i1++) this->output_[i0*noutput_+i1] = lVal[i1]; //This should be replaced with a memcpy
-	//}
 	auto t3 = std::chrono::high_resolution_clock::now();
 	edm::LogInfo("TRTClientFPGA") << "Output time: " << std::chrono::duration_cast<std::chrono::microseconds>(t3-t2).count();
 }