guodongxiaren
diff --git a/‎.gitignore‎
Lines changed: 7 additions & 0 deletions b/‎.gitignore‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎C++/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎C++/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎C++/Makefile‎
Lines changed: 40 additions & 0 deletions b/‎C++/Makefile‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎C++/README.md‎
Lines changed: 8 additions & 0 deletions b/‎C++/README.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎C++/a.cpp‎
Lines changed: 137 additions & 0 deletions b/‎C++/a.cpp‎
Lines changed: 137 additions & 0 deletions
diff --git a/‎C++/client.cpp‎
Lines changed: 74 additions & 0 deletions b/‎C++/client.cpp‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎C++/infer.proto‎
Lines changed: 16 additions & 0 deletions b/‎C++/infer.proto‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎C++/ort_pred.cpp‎
Lines changed: 29 additions & 0 deletions b/‎C++/ort_pred.cpp‎
Lines changed: 29 additions & 0 deletions
@@ -3,4 +3,11 @@
 *.ckpt
 *.onnx
 *.tar.gz
+*.o
+*.a
+*.so
+*.out
+*.bak
+*.pb.*
+core.*
 __pycache__
@@ -0,0 +1,2 @@
+client
+infer_server
@@ -0,0 +1,40 @@
+CXX = g++
+PROTOC = protoc
+BRPC_PATH= ~/github/incubator-brpc/
+HDRS+=$(BRPC_PATH)/output/include
+LIBS+=$(BRPC_PATH)/output/lib
+CXXFLAGS = -std=c++11 -pthread -O2 -fPIC -fno-omit-frame-pointer -I $(HDRS) -L $(LIBS) -lprotobuf -lgflags -lbrpc
+CXXFLAGS += -I ~/local/onnxruntime/include -L ~/local/onnxruntime/lib -lonnxruntime -I ~/local -I ~/local/include -lutf8proc -L ~/local/lib  -I .
+BIN = infer_server client
+PROTOS = $(wildcard *.proto)
+PROTO_OBJS = $(PROTOS:.proto=.pb.o)
+MODEL_OBJ = model.o 
+TOKEN_OBJ = tokenization.o
+
+ALL: $(BIN)
+client: client.cpp $(PROTO_OBJS)
+	@echo "> Linking $@"
+	$(CXX) $(CXXFLAGS) $^ -o $@
+
+infer_server: server.cpp $(PROTO_OBJS) $(MODEL_OBJ) $(TOKEN_OBJ)
+	@echo "> Linking $@"
+	$(CXX) $(CXXFLAGS) $^ -o $@
+
+%.pb.cc %.pb.h: %.proto
+	@echo "> Generating $@"
+	$(PROTOC) --cpp_out=. --proto_path=. $<
+
+%.o: %.cc
+	@echo "> Compiling $@"
+	$(CXX) -c $(CXXFLAGS) $< -o $@
+
+%.o: %.cpp
+	@echo "> Compiling $@"
+	$(CXX) -c $(CXXFLAGS) $< -o $@
+
+%.o: util/%.cpp
+	@echo "> Compiling $@"
+	$(CXX) -c $(CXXFLAGS) $< -o $@
+
+clean:
+	rm -rf *.o *.pb.* $(BIN)
@@ -0,0 +1,8 @@
+# 依赖
+[boost](https://www.boost.org/)，[utf8proc](https://github.com/guodongxiaren/utf8proc)
+强烈不建议使用Github上面的boost项的Release（缺少submodule）
+
+ g++ token.cpp -std=c++11 -I ~/local/ -I ~/local/include -L ~/local/lib/ -lutf8proc
+ export LD_LIBRARY_PATH=~/local/lib:$LB_LIBRARY_PATH
+
+ g++ ort_pred.cpp -I ~/local/onnxruntime/include --std=c++11 -L ~/local/onnxruntime/lib -lonnxruntime
@@ -0,0 +1,137 @@
+#include <iostream>
+#include <algorithm>
+#include <vector>
+#include <chrono>
+#include <string>
+#include <vector>
+#include <onnxruntime_cxx_api.h>
+#include "util/tokenization.h"
+
+using namespace std;
+
+const static std::vector<std::string> key = {                                                                                                                                                                                                                     
+    "finance",
+    "realty",
+    "stocks",
+    "education",
+    "science",
+    "society",
+    "politics",
+    "sports",
+    "game",
+    "entertainment"
+};
+
+template <typename T>
+int argmax(const std::vector<T>& v) {
+    if (v.empty()) {
+        return -1;
+    }
+    return std::max_element(v.begin(), v.end()) - v.begin();
+}
+template <typename T>
+int argmax(T a, T b) {
+    return std::max_element(a, b) - a;
+}
+class Model {
+public:
+    Model(const std::string& vocab_path) {
+        tokenizer_ = new FullTokenizer(vocab_path);
+    }
+
+    std::vector<std::vector<int64_t>> build_input(const std::string& text) {
+        auto tokens = tokenizer_->tokenize(text);
+        auto token_ids = tokenizer_->convertTokensToIds(tokens);
+
+        std::vector<std::vector<int64_t>> res;
+
+        std::vector<int64_t> input(32);
+        std::vector<int64_t> mask(32);
+        input[0] = 101;
+        mask[0] = 1;
+        for (int i = 0; i < token_ids.size() && i < 31; ++i) {
+            input[i+1] = token_ids[i];
+            mask[i+1] = token_ids[i] > 0;
+        }
+        res.push_back(std::move(input));
+        res.push_back(std::move(mask));
+        return res;
+    }
+    FullTokenizer* tokenizer_ = nullptr;
+};
+
+int main()
+{
+    const char* text = "李稻葵:过去2年抗疫为每人增寿10天";
+    const char* vocab_path = "/home/guodong/bert_pretrain/vocab.txt";
+    Model model(vocab_path);
+    auto res = model.build_input(text);
+
+    Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
+    Ort::SessionOptions session_options;
+
+    OrtCUDAProviderOptions cuda_options; //= {
+//          0,
+//          //OrtCudnnConvAlgoSearch::EXHAUSTIVE,
+//          OrtCudnnConvAlgoSearchExhaustive,
+//          std::numeric_limits<size_t>::max(),
+//          0,
+//          true
+//      };
+
+    session_options.AppendExecutionProvider_CUDA(cuda_options);
+    const char* model_path = "/home/guodong/github/Bert-Chinese-Text-Classification-Pytorch/model.onnx";
+
+
+    Ort::Session session(env, model_path, session_options);
+    // print model input layer (node names, types, shape etc.)
+    Ort::AllocatorWithDefaultOptions allocator;
+
+    // print number of model input nodes
+    size_t num_input_nodes = session.GetInputCount();
+    std::cout<< num_input_nodes <<std::endl;
+    std::cout<< session.GetOutputCount() <<std::endl;
+
+    std::vector<int64_t> input_node_dims = {1, 32};
+
+    auto& input_tensor_values = res[0];
+    auto& mask_tensor_values = res[1];
+
+    //size_t input_tensor_size = 32;
+    for (auto i : input_tensor_values) {
+        std::cout << i << "\t" ;
+    }
+std::cout<<std::endl;
+    for (auto i : mask_tensor_values) {
+        std::cout << i << "\t" ;
+    }
+std::cout<<std::endl;
+        
+    // create input tensor object from data values ！！！！！！！！！！
+    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+
+    Ort::Value input_tensor = Ort::Value::CreateTensor<int64_t>(memory_info, input_tensor_values.data(),
+                                                            input_tensor_values.size(), input_node_dims.data(), 2);
+
+    Ort::Value mask_tensor = Ort::Value::CreateTensor<int64_t>(memory_info, mask_tensor_values.data(),
+                                                            mask_tensor_values.size(), input_node_dims.data(), 2);
+
+    std::vector<Ort::Value> ort_inputs;
+    ort_inputs.push_back(std::move(input_tensor));
+    ort_inputs.push_back(std::move(mask_tensor));
+
+    std::vector<const char*> input_node_names = {"ids", "mask"};
+    std::vector<const char*> output_node_names = {"output"};
+    auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), ort_inputs.data(),
+                                    ort_inputs.size(), output_node_names.data(), 1);
+
+    float* floatarr = output_tensors[0].GetTensorMutableData<float>();
+
+    for (int i=0; i<10; i++)
+    {
+        std::cout<<floatarr[i]<<std::endl;
+    }
+    std::cout<< key[argmax(floatarr, floatarr+10)] << std::endl;
+
+    return 0;
+}
@@ -0,0 +1,74 @@
+//
+#include <gflags/gflags.h>
+#include <butil/logging.h>
+#include <butil/time.h>
+#include <brpc/channel.h>
+#include "infer.pb.h"
+
+DEFINE_string(attachment, "", "Carry this along with requests");
+DEFINE_string(protocol, "baidu_std", "Protocol type. Defined in src/brpc/options.proto");
+DEFINE_string(connection_type, "", "Connection type. Available values: single, pooled, short");
+DEFINE_string(server, "0.0.0.0:8000", "IP Address of server");
+DEFINE_string(load_balancer, "", "The algorithm for load balancing");
+DEFINE_int32(timeout_ms, 100, "RPC timeout in milliseconds");
+DEFINE_int32(max_retry, 3, "Max retries(not including the first RPC)"); 
+DEFINE_int32(interval_ms, 1000, "Milliseconds between consecutive requests");
+
+int main(int argc, char* argv[]) {
+    // Parse gflags. We recommend you to use gflags as well.
+    //GFLAGS_NS::ParseCommandLineFlags(&argc, &argv, true);
+    gflags::ParseCommandLineFlags(&argc, &argv, true);
+    
+    // A Channel represents a communication line to a Server. Notice that 
+    // Channel is thread-safe and can be shared by all threads in your program.
+    brpc::Channel channel;
+    
+    // Initialize the channel, NULL means using default options.
+    brpc::ChannelOptions options;
+    options.protocol = FLAGS_protocol;
+    options.connection_type = FLAGS_connection_type;
+    options.timeout_ms = FLAGS_timeout_ms/*milliseconds*/;
+    options.max_retry = FLAGS_max_retry;
+    if (channel.Init(FLAGS_server.c_str(), FLAGS_load_balancer.c_str(), &options) != 0) {
+        LOG(ERROR) << "Fail to initialize channel";
+        return -1;
+    }
+
+    // Normally, you should not call a Channel directly, but instead construct
+    // a stub Service wrapping it. stub can be shared by all threads as well.
+    guodongxiaren::InferService_Stub stub(&channel);
+
+    // Send a request and wait for the response every 1 second.
+    int log_id = 0;
+    while (!brpc::IsAskedToQuit()) {
+        // We will receive response synchronously, safe to put variables
+        // on stack.
+        guodongxiaren::NewsClassifyRequest request;
+        guodongxiaren::NewsClassifyResponse response;
+        brpc::Controller cntl;
+
+        request.set_title("李稻葵:过去2年抗疫为每人增寿10天");
+
+        cntl.set_log_id(log_id ++);  // set by user
+        // Set attachment which is wired to network directly instead of 
+        // being serialized into protobuf messages.
+        cntl.request_attachment().append(FLAGS_attachment);
+
+        // Because `done'(last parameter) is NULL, this function waits until
+        // the response comes back or error occurs(including timedout).
+        stub.NewsClassify(&cntl, &request, &response, NULL);
+        if (!cntl.Failed()) {
+            LOG(INFO) << "Received response from " << cntl.remote_side()
+                << " to " << cntl.local_side()
+                << ": " << response.result() << " (attached="
+                << cntl.response_attachment() << ")"
+                << " latency=" << cntl.latency_us() << "us";
+        } else {
+            LOG(WARNING) << cntl.ErrorText();
+        }
+        usleep(FLAGS_interval_ms * 1000L);
+    }
+
+    LOG(INFO) << "NewsClassifyClient is going to quit";
+    return 0;
+}
@@ -0,0 +1,16 @@
+syntax="proto2";
+package guodongxiaren;
+
+option cc_generic_services = true;
+
+message NewsClassifyRequest {
+    required string title = 1;
+};
+
+message NewsClassifyResponse {
+    required string result = 1;
+};
+
+service InferService {
+    rpc NewsClassify(NewsClassifyRequest) returns (NewsClassifyResponse);
+};
@@ -0,0 +1,29 @@
+#include <iostream>
+#include <algorithm>
+#include <vector>
+#include <chrono>
+#include <string>
+#include <vector>
+#include "util/model.h"
+
+using namespace std;
+
+int main() {
+    const char* vocab_path = "/home/guodong/bert_pretrain/vocab.txt";
+    const char* model_path = "/home/guodong/github/Bert-Chinese-Text-Classification-Pytorch/model.onnx";
+
+    Model model(model_path, vocab_path);
+
+    //const char* text = "李稻葵:过去2年抗疫为每人增寿10天";
+    //int idx = model.predict(text);
+    
+    std::string line;
+    while (std::getline(std::cin, line)) {
+        auto a = gettimeofday_us();
+        std::string r = model.predict(line);
+        auto b = gettimeofday_us();
+        std::cout << line << " is " << r << " cost:" << (b-a) <<" us" <<std::endl;
+    }
+
+    return 0;
+}