Copy #include <memory>
#include <iostream>
#include <string>
#include <type_traits>
#include "model.hpp"
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "utils.hpp"
#include "cuda_runtime.h"
using namespace std;
class Logger : public nvinfer1::ILogger{
public:
virtual void log (Severity severity, const char* msg) noexcept override{
string str;
switch (severity){
case Severity::kINTERNAL_ERROR: str = RED "[fatal]: " CLEAR;
case Severity::kERROR: str = RED "[error]: " CLEAR;
case Severity::kWARNING: str = BLUE "[warn]: " CLEAR;
case Severity::kINFO: str = YELLOW "[info]: " CLEAR;
case Severity::kVERBOSE: str = PURPLE "[verb]: " CLEAR;
}
if (severity <= Severity::kINFO)
cout << str << string(msg) << endl;
}
};
struct InferDeleter
{
template <typename T>
void operator()(T* obj) const
{
delete obj;
}
};
template <typename T>
using make_unique = std::unique_ptr<T, InferDeleter>;
Model::Model(string onnxPath){
if (!fileExists(onnxPath)) {
LOGE("%s not found. Program terminated", onnxPath.c_str());
exit(1);
}
mOnnxPath = onnxPath;
mEnginePath = getEnginePath(mOnnxPath);
}
bool Model::build(){
if (fileExists(mEnginePath)){
LOG("%s has been generated!", mEnginePath.c_str());
return true;
} else {
LOG("%s not found. Building engine...", mEnginePath.c_str());
}
Logger logger;
auto builder = make_unique<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger));
auto network = make_unique<nvinfer1::INetworkDefinition>(builder->createNetworkV2(1));
auto config = make_unique<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
auto parser = make_unique<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger));
config->setMaxWorkspaceSize(1<<28);
if (!parser->parseFromFile(mOnnxPath.c_str(), 1)){
LOGE("ERROR: failed to %s", mOnnxPath.c_str());
return false;
}
auto engine = make_unique<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network, *config));
auto plan = builder->buildSerializedNetwork(*network, *config);
auto runtime = make_unique<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger));
auto f = fopen(mEnginePath.c_str(), "wb");
fwrite(plan->data(), 1, plan->size(), f);
fclose(f);
mEngine = shared_ptr<nvinfer1::ICudaEngine>(runtime->deserializeCudaEngine(plan->data(), plan->size()), InferDeleter());
mInputDims = network->getInput(0)->getDimensions();
mOutputDims = network->getOutput(0)->getDimensions();
return true;
};
bool Model::infer(){
/*
我们在infer需要做的事情
1. 读取model => 创建runtime, engine, context
2. 把数据进行host->device传输
3. 使用context推理
4. 把数据进行device->host传输
*/
/* 1. 读取model => 创建runtime, engine, context */
if (!fileExists(mEnginePath)) {
LOGE("ERROR: %s not found", mEnginePath.c_str());
return false;
}
/* 反序列化从文件中读取的数据以unsigned char的vector保存*/
vector<unsigned char> modelData;
modelData = loadFile(mEnginePath);
Logger logger;
auto runtime = make_unique<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger));
auto engine = make_unique<nvinfer1::ICudaEngine>(runtime->deserializeCudaEngine(modelData.data(), modelData.size()));
auto context = make_unique<nvinfer1::IExecutionContext>(engine->createExecutionContext());
// Binding [[input[0], input[1]...],[output[0], output[1]...]]
auto input_dims = context->getBindingDimensions(0);
auto output_dims = context->getBindingDimensions(1);
LOG("input dim shape is: %s", printDims(input_dims).c_str());
LOG("output dim shape is: %s", printDims(output_dims).c_str());
/* 2. host->device的数据传递 */
cudaStream_t stream;
cudaStreamCreate(&stream);
/* host memory上的数据*/
float input_host[]{0.0193, 0.2616, 0.7713, 0.3785, 0.9980, 0.9008, 0.4766, 0.1663, 0.8045, 0.6552};
float output_host[5];
/* device memory上的数据*/
float* input_device = nullptr;
float* weight_device = nullptr;
float* output_device = nullptr;
int input_size = 10;
int output_size = 5;
/* 分配空间, 并传送数据从host到device*/
cudaMalloc(&input_device, sizeof(input_host));
cudaMalloc(&output_device, sizeof(output_host));
cudaMemcpyAsync(input_device, input_host, sizeof(input_host), cudaMemcpyKind::cudaMemcpyHostToDevice, stream);
/* 3. 模型推理, 最后做同步处理 */
float* bindings[] = {input_device, output_device};
bool success = context->enqueueV2((void**)bindings, stream, nullptr);
/* 4. device->host的数据传递 */
cudaMemcpyAsync(output_host, output_device, sizeof(output_host), cudaMemcpyKind::cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
LOG("input data is: %s", printTensor(input_host, input_size).c_str());
LOG("output data is: %s", printTensor(output_host, output_size).c_str());
LOG("finished inference");
return true;
}