Skip to content

Commit c84fcba

Browse files
committed
cleanup
1 parent 6cca508 commit c84fcba

File tree

4 files changed

+63
-106
lines changed

4 files changed

+63
-106
lines changed

TensorRT/interface/TRTClientFPGA.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class TRTClientFPGA : public Client {
5555
unsigned ninput_;
5656
unsigned noutput_;
5757
std::unique_ptr<nic::InferContext> context_;
58-
std::shared_ptr<nic::InferContext::Input> nicinput_;
58+
std::shared_ptr<nic::InferContext::Input> nicinput_;
5959
};
6060
typedef TRTClientFPGA<SonicClientSync<std::vector<unsigned short>>> TRTClientFPGASync;
6161
typedef TRTClientFPGA<SonicClientPseudoAsync<std::vector<unsigned short>>> TRTClientFPGAPseudoAsync;

TensorRT/plugins/HcalProducerFPGA.cc

Lines changed: 36 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <sstream>
44
#include <string>
55
#include <fstream>
6+
#include <cmath>
67

78
#include "SonicCMS/Core/interface/SonicEDProducer.h"
89
#include "SonicCMS/TensorRT/interface/TRTClientFPGA.h"
@@ -22,61 +23,23 @@ class HcalProducerFPGA : public SonicEDProducer<Client>
2223
//needed because base class has dependent scope
2324
using typename SonicEDProducer<Client>::Input;
2425
using typename SonicEDProducer<Client>::Output;
25-
//std::vector<float> fInVals; => debug
26-
//std::vector<float> fPrVals;
27-
//int fCount;
2826
explicit HcalProducerFPGA(edm::ParameterSet const& cfg) : SonicEDProducer<Client>(cfg), topN_(cfg.getParameter<unsigned>("topN")) {
2927
//for debugging
3028
this->setDebugName("HcalProducerFPGA");
31-
/* Validation on temporary files
32-
std::ifstream lFIn("tb_input_features.dat");
33-
std::ifstream lFPr("tb_output_predictions.dat");
34-
std::string iline;
35-
std::string pline;
36-
bool hit_end = false;
37-
bool valid_data = true;
38-
for (int istream = 0; istream < 16000; istream++) {
39-
if (valid_data && !hit_end){
40-
if(std::getline(lFIn,iline) && std::getline(lFPr,pline)) {
41-
char* cstr=const_cast<char*>(iline.c_str());
42-
char* current;
43-
std::vector<float> in;
44-
current=strtok(cstr," ");
45-
while(current!=NULL){
46-
fInVals.push_back(atof(current));
47-
current=strtok(NULL," ");
48-
}
49-
cstr=const_cast<char*>(pline.c_str());
50-
current=strtok(cstr," ");
51-
while(current!=NULL){
52-
fPrVals.push_back(atof(current));
53-
current=strtok(NULL," ");
54-
}
55-
}
56-
}
57-
}
58-
fCount = 0;
59-
*/
60-
6129
}
6230
void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override {
6331
auto ninput = client_.ninput();
6432
auto batchSize = client_.batchSize();
6533
iInput = Input(ninput*batchSize, 0.f);
6634
//make some random channels
67-
for(unsigned ib = 0; ib < batchSize; ib++) {
68-
//Current Hcal setup takes 11 inputs but it is sent in chunks of 16 to the FPGA
69-
iInput[ib*ninput+0] = f_to_ui<16,6>(1);
70-
iInput[ib*ninput+1] = f_to_ui<16,6>(2);
71-
iInput[ib*ninput+3] = f_to_ui<16,6>(int(rand() % 30)-15);
72-
iInput[ib*ninput+4] = f_to_ui<16,6>(int(rand() % 36)-36);
73-
for(unsigned i0 = 5; i0 < 18; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(1);
74-
for(unsigned i0 = 18; i0 < ninput; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(0);
75-
/* for debug
76-
for(unsigned i0 = 0; i0 < 11; i0++) iInput[ib*ninput+i0] = f_to_ui<32,14>(fInVals[i0+11*fCount]);
77-
for(unsigned i0 = 11; i0 < ninput; i0++) iInput[ib*ninput+i0] = 0;
78-
fCount++;
79-
*/
35+
for(unsigned ib = 0; ib < batchSize; ib++) {
36+
//Current Hcal setup takes 11 inputs but it is sent in chunks of 16 to the FPGA
37+
iInput[ib*ninput+0] = f_to_ui<16,6>(1);
38+
iInput[ib*ninput+1] = f_to_ui<16,6>(2);
39+
iInput[ib*ninput+3] = f_to_ui<16,6>(int(rand() % 30)-15);
40+
iInput[ib*ninput+4] = f_to_ui<16,6>(int(rand() % 36)-36);
41+
for(unsigned i0 = 5; i0 < 18; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(1);
42+
for(unsigned i0 = 18; i0 < ninput; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(0);
8043
}
8144
}
8245
void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override {
@@ -119,34 +82,33 @@ class HcalProducerFPGA : public SonicEDProducer<Client>
11982

12083
private:
12184
using SonicEDProducer<Client>::client_;
122-
template<unsigned int B, unsigned int I>
123-
unsigned short f_to_ui(float f) {
124-
bool isPos = f > 0.;
125-
short tmpIs = int(fabs(f));
126-
unsigned short tmpI = tmpIs;
127-
if (not isPos) {
128-
//unsigned int comp = ((unsigned int)((1<<(sizeof(unsigned int)*8-I+1))-1)<<I);
129-
unsigned short comp = ((unsigned short)((1<<(sizeof(unsigned short)*4-I+1))-1)<<I);
130-
tmpI = -tmpIs;
131-
tmpI = tmpI-comp;
132-
}
133-
float tmpF = fabs(f) - float(tmpIs);
134-
unsigned short fracs = tmpF*float(1<<(B-I));
135-
unsigned short val = (tmpI << (B-I)) + fracs;
136-
return val;
137-
}
138-
template<unsigned int B, unsigned int I>
139-
float ui_to_f(const unsigned short ui) {
140-
unsigned short i = ui >> (B-I);
141-
unsigned short mask = (1 << (B-I))-1;
142-
unsigned short dec = ui & mask;
143-
float lDec = float(dec)/float(1 << (B-I));
144-
return float(i)+lDec;
145-
}
146-
uint32_t merge(unsigned short iA,unsigned short iB) {
147-
uint32_t result = (uint32_t) iA << 16 | iB;
148-
return result;
149-
}
85+
template<unsigned int B, unsigned int I>
86+
unsigned short f_to_ui(float f) {
87+
bool isPos = f > 0.;
88+
short tmpIs = int(std::abs(f));
89+
unsigned short tmpI = tmpIs;
90+
if (not isPos) {
91+
unsigned short comp = ((unsigned short)((1<<(sizeof(unsigned short)*4-I+1))-1)<<I);
92+
tmpI = -tmpIs;
93+
tmpI = tmpI-comp;
94+
}
95+
float tmpF = std::abs(f) - float(tmpIs);
96+
unsigned short fracs = tmpF*float(1<<(B-I));
97+
unsigned short val = (tmpI << (B-I)) + fracs;
98+
return val;
99+
}
100+
template<unsigned int B, unsigned int I>
101+
float ui_to_f(const unsigned short ui) {
102+
unsigned short i = ui >> (B-I);
103+
unsigned short mask = (1 << (B-I))-1;
104+
unsigned short dec = ui & mask;
105+
float lDec = float(dec)/float(1 << (B-I));
106+
return float(i)+lDec;
107+
}
108+
uint32_t merge(unsigned short iA,unsigned short iB) {
109+
uint32_t result = (uint32_t) iA << 16 | iB;
110+
return result;
111+
}
150112
unsigned topN_;
151113
};
152114

TensorRT/src/TRTClient.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ TRTClient<Client>::TRTClient(const edm::ParameterSet& params) :
3030

3131
template <typename Client>
3232
void TRTClient<Client>::setup() {
33-
if(!fSetup) {
33+
if(!fSetup) {
3434
auto err = nic::InferGrpcContext::Create(&context_, url_, modelName_, -1, false);
3535
if(!err.IsOk()) throw cms::Exception("BadGrpc") << "unable to create inference context: " << err;
3636

@@ -49,7 +49,7 @@ void TRTClient<Client>::setup() {
4949
const std::vector<std::shared_ptr<nic::InferContext::Input>>& nicinputs = context_->Inputs();
5050
nicinput_ = nicinputs[0];
5151
nicinput_->Reset();
52-
52+
5353
auto t2 = std::chrono::high_resolution_clock::now();
5454
std::vector<int64_t> input_shape;
5555
for(unsigned i0 = 0; i0 < batchSize_; i0++) {

TensorRT/src/TRTClientFPGA.cc

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,51 +22,46 @@ TRTClientFPGA<Client>::TRTClientFPGA(const edm::ParameterSet& params) :
2222
ninput_(params.getParameter<unsigned>("ninput")),
2323
noutput_(params.getParameter<unsigned>("noutput"))
2424
{
25-
fSetup = false;
25+
fSetup = false;
2626
}
2727

2828
template <typename Client>
2929
void TRTClientFPGA<Client>::setup() {
30-
if(!fSetup) {
31-
auto err = nic::InferGrpcContext::Create(&context_, url_, modelName_, -1, false);
32-
if(!err.IsOk()) throw cms::Exception("BadGrpc") << "unable to create inference context: " << err;
33-
std::unique_ptr<nic::InferContext::Options> options;
34-
nic::InferContext::Options::Create(&options);
35-
36-
options->SetBatchSize(batchSize_);
37-
for (const auto& output : context_->Outputs()) {
38-
options->AddRawResult(output);
39-
}
40-
context_->SetRunOptions(*options);
41-
fSetup = true;
42-
}
43-
const std::vector<std::shared_ptr<nic::InferContext::Input>>& nicinputs = context_->Inputs();
44-
nicinput_ = nicinputs[0];
45-
nicinput_->Reset();
46-
47-
auto t2 = std::chrono::high_resolution_clock::now();
48-
for(unsigned i0 = 0; i0 < batchSize_; i0++) {
49-
nic::Error err1 = nicinput_->SetRaw(reinterpret_cast<const uint8_t*>(&(this->input_[i0*ninput_])), ninput_ * sizeof(unsigned short));
50-
//nic::Error err1 = nicinput_->SetRaw(reinterpret_cast<const uint8_t*>(this->input_.data()), ninput_ * sizeof(unsigned short));
51-
}
52-
//nic::Error err1 = nicinput_->SetRaw(reinterpret_cast<const uint8_t*>(this->input_.data()), batchSize_*ninput_ * sizeof(unsigned short));
53-
auto t3 = std::chrono::high_resolution_clock::now();
54-
edm::LogInfo("TRTClientFPGA") << "Image array time: " << std::chrono::duration_cast<std::chrono::microseconds>(t3-t2).count();
30+
if(!fSetup) {
31+
auto err = nic::InferGrpcContext::Create(&context_, url_, modelName_, -1, false);
32+
if(!err.IsOk()) throw cms::Exception("BadGrpc") << "unable to create inference context: " << err;
33+
std::unique_ptr<nic::InferContext::Options> options;
34+
nic::InferContext::Options::Create(&options);
35+
36+
options->SetBatchSize(batchSize_);
37+
for (const auto& output : context_->Outputs()) {
38+
options->AddRawResult(output);
39+
}
40+
context_->SetRunOptions(*options);
41+
fSetup = true;
42+
}
43+
const std::vector<std::shared_ptr<nic::InferContext::Input>>& nicinputs = context_->Inputs();
44+
nicinput_ = nicinputs[0];
45+
nicinput_->Reset();
46+
47+
auto t2 = std::chrono::high_resolution_clock::now();
48+
for(unsigned i0 = 0; i0 < batchSize_; i0++) {
49+
nic::Error err1 = nicinput_->SetRaw(reinterpret_cast<const uint8_t*>(&(this->input_[i0*ninput_])), ninput_ * sizeof(unsigned short));
50+
}
51+
auto t3 = std::chrono::high_resolution_clock::now();
52+
edm::LogInfo("TRTClientFPGA") << "Image array time: " << std::chrono::duration_cast<std::chrono::microseconds>(t3-t2).count();
5553
}
5654

5755
template <typename Client>
5856
void TRTClientFPGA<Client>::getResults(const std::unique_ptr<nic::InferContext::Result>& result) {
5957
auto t2 = std::chrono::high_resolution_clock::now();
6058
unsigned short tmp=0;
6159
this->output_.resize(noutput_*batchSize_,tmp);
62-
//for(unsigned i0 = 0; i0 < batchSize_; i0++) {
6360
const uint8_t* r0;
6461
size_t content_byte_size;
6562
result->GetRaw(0, &r0, &content_byte_size);
6663
const unsigned int *lVal = reinterpret_cast<const unsigned int*>(r0);
6764
memcpy(this->output_.data(),&lVal[0],content_byte_size*batchSize_);
68-
//for(unsigned i1 = 0; i1 < noutput_; i1++) this->output_[i0*noutput_+i1] = lVal[i1]; //This should be replaced with a memcpy
69-
//}
7065
auto t3 = std::chrono::high_resolution_clock::now();
7166
edm::LogInfo("TRTClientFPGA") << "Output time: " << std::chrono::duration_cast<std::chrono::microseconds>(t3-t2).count();
7267
}

0 commit comments

Comments
 (0)