Skip to content

Commit 97ae4c4

Browse files
authored
Merge pull request #7 from hls-fpga-machine-learning/trt_fpga_rebase
Trt fpga rebase
2 parents ff1b4cf + 6f5d62e commit 97ae4c4

File tree

6 files changed

+415
-9
lines changed

6 files changed

+415
-9
lines changed

TensorRT/interface/TRTClient.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class TRTClient : public Client {
5555

5656
protected:
5757
void predictImpl() override;
58-
58+
bool fSetup;
5959
//helper for common ops
6060
void setup();
6161

TensorRT/interface/TRTClientFPGA.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#ifndef SonicCMS_TensorRT_TRTClientFPGA
2+
#define SonicCMS_TensorRT_TRTClientFPGA
3+
4+
#include "FWCore/ParameterSet/interface/ParameterSet.h"
5+
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
6+
#include "SonicCMS/Core/interface/SonicClientSync.h"
7+
#include "SonicCMS/Core/interface/SonicClientPseudoAsync.h"
8+
#include "SonicCMS/Core/interface/SonicClientAsync.h"
9+
10+
#include <vector>
11+
#include <string>
12+
13+
#include "request_grpc.h"
14+
15+
namespace nic = nvidia::inferenceserver::client;
16+
17+
template <typename Client>
18+
class TRTClientFPGA : public Client {
19+
public:
20+
//constructor
21+
TRTClientFPGA(const edm::ParameterSet& params);
22+
23+
//helper
24+
void getResults(const std::unique_ptr<nic::InferContext::Result>& result);
25+
26+
//accessors
27+
unsigned ninput() const { return ninput_; }
28+
unsigned noutput() const { return noutput_; }
29+
unsigned batchSize() const { return batchSize_; }
30+
31+
//for fillDescriptions
32+
static void fillPSetDescription(edm::ParameterSetDescription& iDesc) {
33+
edm::ParameterSetDescription descClient;
34+
descClient.add<unsigned>("ninput");
35+
descClient.add<unsigned>("noutput");
36+
descClient.add<unsigned>("batchSize");
37+
descClient.add<std::string>("address");
38+
descClient.add<unsigned>("port");
39+
descClient.add<unsigned>("timeout");
40+
descClient.add<std::string>("modelName");
41+
iDesc.add<edm::ParameterSetDescription>("Client",descClient);
42+
}
43+
44+
protected:
45+
void predictImpl() override;
46+
bool fSetup;
47+
//helper for common ops
48+
void setup();
49+
50+
//members
51+
std::string url_;
52+
unsigned timeout_;
53+
std::string modelName_;
54+
unsigned batchSize_;
55+
unsigned ninput_;
56+
unsigned noutput_;
57+
std::unique_ptr<nic::InferContext> context_;
58+
std::shared_ptr<nic::InferContext::Input> nicinput_;
59+
};
60+
typedef TRTClientFPGA<SonicClientSync<std::vector<unsigned short>>> TRTClientFPGASync;
61+
typedef TRTClientFPGA<SonicClientPseudoAsync<std::vector<unsigned short>>> TRTClientFPGAPseudoAsync;
62+
typedef TRTClientFPGA<SonicClientAsync<std::vector<unsigned short>>> TRTClientFPGAAsync;
63+
64+
#endif
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#include <vector>
2+
#include <map>
3+
#include <sstream>
4+
#include <string>
5+
#include <fstream>
6+
#include <cmath>
7+
8+
#include "SonicCMS/Core/interface/SonicEDProducer.h"
9+
#include "SonicCMS/TensorRT/interface/TRTClientFPGA.h"
10+
#include "FWCore/Framework/interface/Event.h"
11+
#include "FWCore/ParameterSet/interface/ParameterSet.h"
12+
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
13+
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
14+
#include "FWCore/Framework/interface/EventSetup.h"
15+
#include "FWCore/Framework/interface/Frameworkfwd.h"
16+
#include "FWCore/Framework/interface/MakerMacros.h"
17+
#include "FWCore/MessageLogger/interface/MessageLogger.h"
18+
19+
template <typename Client>
20+
class HcalProducerFPGA : public SonicEDProducer<Client>
21+
{
22+
public:
23+
//needed because base class has dependent scope
24+
using typename SonicEDProducer<Client>::Input;
25+
using typename SonicEDProducer<Client>::Output;
26+
explicit HcalProducerFPGA(edm::ParameterSet const& cfg) : SonicEDProducer<Client>(cfg), topN_(cfg.getParameter<unsigned>("topN")) {
27+
//for debugging
28+
this->setDebugName("HcalProducerFPGA");
29+
}
30+
void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override {
31+
auto ninput = client_.ninput();
32+
auto batchSize = client_.batchSize();
33+
iInput = Input(ninput*batchSize, 0.f);
34+
//make some random channels
35+
for(unsigned ib = 0; ib < batchSize; ib++) {
36+
//Current Hcal setup takes 11 inputs but it is sent in chunks of 16 to the FPGA
37+
iInput[ib*ninput+0] = f_to_ui<16,6>(1);
38+
iInput[ib*ninput+1] = f_to_ui<16,6>(2);
39+
iInput[ib*ninput+3] = f_to_ui<16,6>(int(rand() % 30)-15);
40+
iInput[ib*ninput+4] = f_to_ui<16,6>(int(rand() % 36)-36);
41+
for(unsigned i0 = 5; i0 < 18; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(1);
42+
for(unsigned i0 = 18; i0 < ninput; i0++) iInput[ib*ninput+i0] = f_to_ui<16,8>(0);
43+
}
44+
}
45+
void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override {
46+
//check the results
47+
findTopN(iOutput);
48+
}
49+
~HcalProducerFPGA() override {}
50+
//to ensure distinct cfi names - specialized below
51+
static std::string getCfiName();
52+
static void fillDescriptions(edm::ConfigurationDescriptions & descriptions) {
53+
edm::ParameterSetDescription desc;
54+
Client::fillPSetDescription(desc);
55+
desc.add<unsigned>("topN",5);
56+
descriptions.add(getCfiName(),desc);
57+
}
58+
//Just putting something in for the hell of it
59+
void findTopN(const Output& scores) const {
60+
auto dim = client_.noutput();
61+
//int batchsize = client_.batchSize();
62+
for(unsigned i0 = 0; i0 < client_.batchSize(); i0++) {
63+
//match score to type by index, then put in largest-first map
64+
std::map<float,std::string,std::greater<float>> score_map;
65+
for(unsigned i = 0; i < (unsigned)dim; ++i){
66+
std::stringstream pSS; pSS << "Dummy Channel " << i;
67+
score_map.emplace(scores[i0*dim+i],pSS.str());
68+
}
69+
//get top n
70+
std::stringstream msg;
71+
msg << "Scores:\n";
72+
unsigned counter = 0;
73+
for(const auto& item: score_map){
74+
msg << item.second << " : " << item.first << "\n";
75+
++counter;
76+
if(counter>=topN_) break;
77+
}
78+
edm::LogInfo("HcalProducerFPGA") << msg.str();
79+
}
80+
}
81+
82+
83+
private:
84+
using SonicEDProducer<Client>::client_;
85+
template<unsigned int B, unsigned int I>
86+
unsigned short f_to_ui(float f) {
87+
bool isPos = f > 0.;
88+
short tmpIs = int(std::abs(f));
89+
unsigned short tmpI = tmpIs;
90+
if (not isPos) {
91+
unsigned short comp = ((unsigned short)((1<<(sizeof(unsigned short)*4-I+1))-1)<<I);
92+
tmpI = -tmpIs;
93+
tmpI = tmpI-comp;
94+
}
95+
float tmpF = std::abs(f) - float(tmpIs);
96+
unsigned short fracs = tmpF*float(1<<(B-I));
97+
unsigned short val = (tmpI << (B-I)) + fracs;
98+
return val;
99+
}
100+
template<unsigned int B, unsigned int I>
101+
float ui_to_f(const unsigned short ui) {
102+
unsigned short i = ui >> (B-I);
103+
unsigned short mask = (1 << (B-I))-1;
104+
unsigned short dec = ui & mask;
105+
float lDec = float(dec)/float(1 << (B-I));
106+
return float(i)+lDec;
107+
}
108+
uint32_t merge(unsigned short iA,unsigned short iB) {
109+
uint32_t result = (uint32_t) iA << 16 | iB;
110+
return result;
111+
}
112+
unsigned topN_;
113+
};
114+
115+
typedef HcalProducerFPGA<TRTClientFPGASync> HcalProducerFPGASync;
116+
typedef HcalProducerFPGA<TRTClientFPGAAsync> HcalProducerFPGAAsync;
117+
typedef HcalProducerFPGA<TRTClientFPGAPseudoAsync> HcalProducerFPGAPseudoAsync;
118+
119+
template<> std::string HcalProducerFPGASync::getCfiName() { return "HcalProducerFPGASync"; }
120+
template<> std::string HcalProducerFPGAAsync::getCfiName() { return "HcalProducerFPGAAsync"; }
121+
template<> std::string HcalProducerFPGAPseudoAsync::getCfiName() { return "HcalProducerFPGAPseudoAsync"; }
122+
123+
DEFINE_FWK_MODULE(HcalProducerFPGASync);
124+
DEFINE_FWK_MODULE(HcalProducerFPGAAsync);
125+
DEFINE_FWK_MODULE(HcalProducerFPGAPseudoAsync);
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from FWCore.ParameterSet.VarParsing import VarParsing
2+
import FWCore.ParameterSet.Config as cms
3+
import os, sys, json
4+
5+
options = VarParsing("analysis")
6+
#options.register("address", "ailab01.fnal.gov", VarParsing.multiplicity.singleton, VarParsing.varType.string)
7+
#options.register("address", "prp-gpu-1.t2.ucsd.edu", VarParsing.multiplicity.singleton, VarParsing.varType.string)
8+
#options.register("address", "18.4.112.82", VarParsing.multiplicity.singleton, VarParsing.varType.string)
9+
#options.register("address", "34.221.148.178", VarParsing.multiplicity.singleton, VarParsing.varType.string)
10+
#options.register("address", "34.221.148.178", VarParsing.multiplicity.singleton, VarParsing.varType.string)
11+
options.register("address", "ailab01.fnal.gov", VarParsing.multiplicity.singleton, VarParsing.varType.string)
12+
options.register("port", 5001, VarParsing.multiplicity.singleton, VarParsing.varType.int)
13+
options.register("timeout", 30, VarParsing.multiplicity.singleton, VarParsing.varType.int)
14+
options.register("params", "", VarParsing.multiplicity.singleton, VarParsing.varType.string)
15+
options.register("threads", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int)
16+
options.register("streams", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int)
17+
options.register("batchsize", 16000, VarParsing.multiplicity.singleton, VarParsing.varType.int)
18+
options.register("modelname","facile", VarParsing.multiplicity.singleton, VarParsing.varType.string)
19+
options.register("mode", "Async", VarParsing.multiplicity.singleton, VarParsing.varType.string)
20+
options.parseArguments()
21+
22+
if len(options.params)>0:
23+
with open(options.params,'r') as pfile:
24+
pdict = json.load(pfile)
25+
options.address = pdict["address"]
26+
options.port = int(pdict["port"])
27+
print("server = "+options.address+":"+str(options.port))
28+
29+
# check mode
30+
allowed_modes = {
31+
"Async": "HcalProducerFPGAAsync",
32+
"Sync": "HcalProducerFPGASync",
33+
"PseudoAsync": "HcalProducerPseudoFPGAAsync",
34+
}
35+
if options.mode not in allowed_modes:
36+
raise ValueError("Unknown mode: "+options.mode)
37+
38+
process = cms.Process('imageTest')
39+
40+
#--------------------------------------------------------------------------------
41+
# Import of standard configurations
42+
#================================================================================
43+
process.load('FWCore/MessageService/MessageLogger_cfi')
44+
process.load('Configuration/StandardSequences/GeometryDB_cff')
45+
process.load('Configuration/StandardSequences/MagneticField_38T_cff')
46+
47+
#process.load("Configuration.StandardSequences.FrontierConditions_GlobalTag_cff")
48+
#process.GlobalTag.globaltag = cms.string('100X_upgrade2018_realistic_v10')
49+
50+
process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(options.maxEvents) )
51+
process.source = cms.Source("PoolSource",
52+
fileNames = cms.untracked.vstring('file:../../Core/data/store_mc_RunIISpring18MiniAOD_BulkGravTohhTohbbhbb_narrow_M-2000_13TeV-madgraph_MINIAODSIM_100X_upgrade2018_realistic_v10-v1_30000_24A0230C-B530-E811-ADE3-14187741120B.root')
53+
)
54+
55+
if len(options.inputFiles)>0: process.source.fileNames = options.inputFiles
56+
57+
################### EDProducer ##############################
58+
process.HcalProducer = cms.EDProducer(allowed_modes[options.mode],
59+
topN = cms.uint32(5),
60+
Client = cms.PSet(
61+
ninput = cms.uint32(32),
62+
noutput = cms.uint32(1),
63+
batchSize = cms.uint32(options.batchsize),
64+
address = cms.string(options.address),
65+
port = cms.uint32(options.port),
66+
timeout = cms.uint32(options.timeout),
67+
modelName = cms.string(options.modelname),
68+
)
69+
)
70+
71+
# Let it run
72+
process.p = cms.Path(
73+
process.HcalProducer
74+
)
75+
76+
process.MessageLogger.cerr.FwkReport.reportEvery = 1
77+
keep_msgs = ['TRTClient']#,'HcalProducerFPGA']
78+
for msg in keep_msgs:
79+
process.MessageLogger.categories.append(msg)
80+
setattr(process.MessageLogger.cerr,msg,
81+
cms.untracked.PSet(
82+
optionalPSet = cms.untracked.bool(True),
83+
limit = cms.untracked.int32(10000000),
84+
)
85+
)
86+
87+
if options.threads>0:
88+
if not hasattr(process,"options"):
89+
process.options = cms.untracked.PSet()
90+
process.options.numberOfThreads = cms.untracked.uint32(options.threads)
91+
process.options.numberOfStreams = cms.untracked.uint32(options.streams if options.streams>0 else 0)

TensorRT/src/TRTClient.cc

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,26 @@ TRTClient<Client>::TRTClient(const edm::ParameterSet& params) :
2525
ninput_(params.getParameter<unsigned>("ninput")),
2626
noutput_(params.getParameter<unsigned>("noutput"))
2727
{
28+
fSetup = false;
2829
}
2930

3031
template <typename Client>
3132
void TRTClient<Client>::setup() {
32-
auto err = nic::InferGrpcContext::Create(&context_, url_, modelName_, -1, false);
33-
if (!err.IsOk()) throw cms::Exception("BadGrpc") << "unable to create inference context: " << err;
33+
if(!fSetup) {
34+
auto err = nic::InferGrpcContext::Create(&context_, url_, modelName_, -1, false);
35+
if(!err.IsOk()) throw cms::Exception("BadGrpc") << "unable to create inference context: " << err;
3436

35-
std::unique_ptr<nic::InferContext::Options> options;
36-
nic::InferContext::Options::Create(&options);
37+
std::unique_ptr<nic::InferContext::Options> options;
38+
nic::InferContext::Options::Create(&options);
3739

38-
options->SetBatchSize(batchSize_);
39-
for (const auto& output : context_->Outputs()) {
40-
options->AddRawResult(output);
40+
options->SetBatchSize(batchSize_);
41+
for (const auto& output : context_->Outputs()) {
42+
options->AddRawResult(output);
43+
}
44+
context_->SetRunOptions(*options);
45+
46+
fSetup = true;
4147
}
42-
context_->SetRunOptions(*options);
4348

4449
const std::vector<std::shared_ptr<nic::InferContext::Input>>& nicinputs = context_->Inputs();
4550
nicinput_ = nicinputs[0];

0 commit comments

Comments
 (0)