> 文章列表 > 3、picodet c++版onnxruntime推理及reshape和transpose的c++实现

3、picodet c++版onnxruntime推理及reshape和transpose的c++实现

3、picodet c++版onnxruntime推理及reshape和transpose的c++实现

文章目录

  • 1、完整onnx c++推理
  • 2、裁剪后模型的推理
    • 2.1 分类reshape和transpose用python模拟c++
    • 2.2 回归的reshape和transpose的python模拟
  • 3、softmax改进

1、完整onnx c++推理

这里指的完整是指在用paddle export.py benchmark=True时的导出的模型,模型没有post和nms,推理方法可以直接参考:https://github.com/hpc203/picodet-onnxruntime

我做了一点小修改,代码如下:

#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
//#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>//using namespace cv;
//using namespace std;
//using namespace Ort;typedef struct BoxInfo
{float x1;float y1;float x2;float y2;float score;int label;
} BoxInfo;class PicoDet
{
public:PicoDet(std::string model_path, std::string classesFile, float nms_threshold, float objThreshold);void detect(cv::Mat& cv_image);
private:float score_threshold = 0.5;float nms_threshold = 0.5;std::vector<std::string> class_names;int num_class;cv::Mat resize_image(cv::Mat srcimg, int* newh, int* neww, int* top, int* left);std::vector<float> input_image_;void normalize_(cv::Mat img);void softmax_(const float* x, float* y, int length);void generate_proposal(std::vector<BoxInfo>& generate_boxes, const int stride_, const float* out_score, const float* out_box);void nms(std::vector<BoxInfo>& input_boxes);const bool keep_ratio = false;int inpWidth;int inpHeight;int num_outs;int reg_max;std::vector<int> stride;//const float mean[3] = { 103.53, 116.28, 123.675 };//const float stds[3] = { 57.375, 57.12, 58.395 };const float mean[3] = { 0.0, 0.0, 0.0 };const float stds[3] = { 255.0, 255.0, 255.0 };Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "picodet");Ort::Session* ort_session = nullptr;Ort::SessionOptions sessionOptions = Ort::SessionOptions();std::vector<char*> input_names;std::vector<char*> output_names;std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputsstd::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
};PicoDet::PicoDet(std::string model_path, std::string classesFile, float nms_threshold, float objThreshold)
{std::ifstream ifs(classesFile.c_str());std::string line;while (std::getline(ifs, line)) this->class_names.push_back(line);this->num_class = class_names.size();this->nms_threshold = nms_threshold;this->score_threshold = objThreshold;std::wstring widestr = std::wstring(model_path.begin(), model_path.end());//OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);ort_session = new Ort::Session(env, widestr.c_str(), sessionOptions);size_t numInputNodes = ort_session->GetInputCount();size_t numOutputNodes = ort_session->GetOutputCount();Ort::AllocatorWithDefaultOptions allocator;for (int i = 0; i < numInputNodes; i++){input_names.push_back(ort_session->GetInputName(i, allocator));Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();auto input_dims = input_tensor_info.GetShape();input_node_dims.push_back(input_dims);}for (int i = 0; i < numOutputNodes; i++){output_names.push_back(ort_session->GetOutputName(i, allocator));Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();auto output_dims = output_tensor_info.GetShape();output_node_dims.push_back(output_dims);/*for (int j = 0; j < output_dims.size(); j++){cout << output_dims[j] << ",";}cout << endl;*/}this->inpHeight = input_node_dims[0][2];this->inpWidth = input_node_dims[0][3];this->num_outs = int(numOutputNodes * 0.5);this->reg_max = output_node_dims[this->num_outs][output_node_dims[this->num_outs].size() - 1] / 4 - 1;for (int i = 0; i < this->num_outs; i++){stride.push_back(int(8 * pow(2, i)));}
}cv::Mat PicoDet::resize_image(cv::Mat srcimg, int* newh, int* neww, int* top, int* left)
{int srch = srcimg.rows, srcw = srcimg.cols;*newh = this->inpHeight;*neww = this->inpWidth;cv::Mat dstimg;if (this->keep_ratio && srch != srcw) {float hw_scale = (float)srch / srcw;if (hw_scale > 1) {*newh = this->inpHeight;*neww = int(this->inpWidth / hw_scale);resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);*left = int((this->inpWidth - *neww) * 0.5);copyMakeBorder(dstimg, dstimg, 0, 0, *left, this->inpWidth - *neww - *left, cv::BORDER_CONSTANT, 0);}else {*newh = (int)this->inpHeight * hw_scale;*neww = this->inpWidth;resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);*top = (int)(this->inpHeight - *newh) * 0.5;copyMakeBorder(dstimg, dstimg, *top, this->inpHeight - *newh - *top, 0, 0, cv::BORDER_CONSTANT, 0);}}else {cv::resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);}return dstimg;
}void PicoDet::normalize_(cv::Mat img)
{//    img.convertTo(img, CV_32F);int row = img.rows;int col = img.cols;this->input_image_.resize(row * col * img.channels());for (int c = 0; c < 3; c++){for (int i = 0; i < row; i++){for (int j = 0; j < col; j++){float pix = img.ptr<uchar>(i)[j * 3 + c];this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - mean[c] / 255.0) / (stds[c] / 255.0);//this->input_image_[c * row * col + i * col + j] = (pix - mean[c]) / stds[c];}}}
}void PicoDet::softmax_(const float* x, float* y, int length)
{float sum = 0;int i = 0;for (i = 0; i < length; i++){y[i] = exp(x[i]);sum += y[i];}for (i = 0; i < length; i++){y[i] /= sum;}
}void PicoDet::generate_proposal(std::vector<BoxInfo>& generate_boxes, const int stride_, const float* out_score, const float* out_box)
{const int num_grid_y = (int)ceil((float)this->inpHeight / stride_);const int num_grid_x = (int)ceil((float)this->inpWidth / stride_);cout << "num_grid_x=" << num_grid_x << ",num_grid_y=" << num_grid_y << endl;const int reg_1max = reg_max + 1;//std::cout << "score:" << std::endl;for (int i = 0; i < num_grid_y; i++){for (int j = 0; j < num_grid_x; j++){int max_ind = 0;float max_score = 0;for (int k = 0; k < num_class; k++){   /*这个代码是原始的输出*/float score = out_score[i * num_grid_x * num_class + j * num_class + k];/*以下代码是去掉reshape和transpose的,用C来实现这个功能的这两部分代码选一个即可,可以理解成ijk对应kij*///float score = std::sqrt(out_score[k*num_grid_y*num_grid_x+i*num_grid_x+j]);//std::cout <<score << " ";if (score > max_score){max_score = score;max_ind = k;}}if (max_score >= score_threshold){std::cout << "box:" << std::endl;//const float* pbox = out_box + idx * reg_1max * 4;float dis_pred[4];float* y = new float[reg_1max];for (int k = 0; k < 4; k++){/*原始模型*/const float* tmp = out_box + i * num_grid_x * reg_1max * 4 + j * reg_1max * 4 + k * reg_1max;//std::cout << "r:" << *tmp << std::endl;/*换用没有reshape transpose的*///float* tmp = new float[reg_1max];//for (int m = 0; m < reg_1max; m++)//{//tmp[m] = out_box[k * num_grid_y * num_grid_x * reg_1max + i * num_grid_x + j + m * num_grid_y * num_grid_x];//}//std::cout << "r:" << *tmp << std::endl;softmax_(tmp, y, reg_1max);float dis = 0.f;for (int l = 0; l < reg_1max; l++){dis += l * y[l];}dis_pred[k] = dis * stride_;}delete[] y;float pb_cx = (j + 0.5f) * stride_ - 0.5;float pb_cy = (i + 0.5f) * stride_ - 0.5;float x0 = pb_cx - dis_pred[0];float y0 = pb_cy - dis_pred[1];float x1 = pb_cx + dis_pred[2];float y1 = pb_cy + dis_pred[3];generate_boxes.push_back(BoxInfo{ x0, y0, x1, y1, max_score, max_ind });}}}
}void PicoDet::nms(std::vector<BoxInfo>& input_boxes)
{sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });std::vector<float> vArea(input_boxes.size());for (int i = 0; i < int(input_boxes.size()); ++i){vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);}std::vector<bool> isSuppressed(input_boxes.size(), false);for (int i = 0; i < int(input_boxes.size()); ++i){if (isSuppressed[i]) { continue; }for (int j = i + 1; j < int(input_boxes.size()); ++j){if (isSuppressed[j]) { continue; }float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);float w = (std::max)(float(0), xx2 - xx1 + 1);float h = (std::max)(float(0), yy2 - yy1 + 1);float inter = w * h;float ovr = inter / (vArea[i] + vArea[j] - inter);if (ovr >= this->nms_threshold){isSuppressed[j] = true;}}}// return post_nms;int idx_t = 0;input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end());
}void PicoDet::detect(cv::Mat& srcimg)
{int newh = 0, neww = 0, top = 0, left = 0;cv::Mat cv_image = srcimg.clone();cv::Mat dst = this->resize_image(cv_image, &newh, &neww, &top, &left);this->normalize_(dst);std::array<int64_t, 4> input_shape_{ 1, 3, this->inpHeight, this->inpWidth };auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);Ort::Value input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());std::vector<Ort::Value> ort_outputs = ort_session->Run(Ort::RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size());   // ???????/generate proposalsstd::vector<BoxInfo> generate_boxes;for (int i = 0; i < this->num_outs; i++){//auto cls_shape = this->output_node_dims[i];const float* cls_score = ort_outputs[i].GetTensorMutableData<float>();//std::vector<int64_t> new_cls_shape = { cls_shape[0],cls_shape[1],cls_shape[2] * cls_shape[3] };const float* bbox_pred = ort_outputs[i + this->num_outs].GetTensorMutableData<float>();//auto reg_shape = this->output_node_dims[i+this->num_outs];generate_proposal(generate_boxes, stride[i], cls_score, bbox_pred);} Perform non maximum suppression to eliminate redundant overlapping boxes with lower confidencesnms(generate_boxes);float ratioh = (float)cv_image.rows / newh;float ratiow = (float)cv_image.cols / neww;for (size_t i = 0; i < generate_boxes.size(); ++i){int xmin = (int)std::max((generate_boxes[i].x1 - left) * ratiow, 0.f);int ymin = (int)std::max((generate_boxes[i].y1 - top) * ratioh, 0.f);int xmax = (int)std::min((generate_boxes[i].x2 - left) * ratiow, (float)cv_image.cols);int ymax = (int)std::min((generate_boxes[i].y2 - top) * ratioh, (float)cv_image.rows);rectangle(srcimg, cv::Point(xmin, ymin), cv::Point(xmax, ymax), cv::Scalar(0, 0, 255), 2);std::string label = cv::format("%.2f", generate_boxes[i].score);label = this->class_names[generate_boxes[i].label] + ":" + label;putText(srcimg, label, cv::Point(xmin, ymin - 5), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 255, 0), 1);}
}int main()
{PicoDet mynet("picodet_xs_320_voc_256_20230405_shape.onnx", "ball.names", 0.5, 0.5);  /// choice = ["picodet_m_320_coco.onnx", "picodet_m_416_coco.onnx", "picodet_s_320_coco.onnx", "picodet_s_416_coco.onnx"]//PicoDet mynet("Cpicodet_xs_320_voc_256_20230405_shape_sim_prune.onnx", "ball.names", 0.5, 0.5);std::string imgpath = "test.jpg";cv::Mat srcimg = cv::imread(imgpath);mynet.detect(srcimg);cv::imwrite("test_result.jpg", srcimg);static const std::string kWinName = "Deep learning object detection in ONNXRuntime";cv::namedWindow(kWinName, cv::WINDOW_NORMAL);cv::imshow(kWinName, srcimg);cv::waitKey(0);cv::destroyAllWindows();
}

2、裁剪后模型的推理

这里主要是把reshape和两个算子给去掉了,需要用代码来实现

这部分只要把裁剪的部分加下即可,我们用的是直接从原始模型上进行裁剪的,与原始模型相比,差的部分就是:

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-tbslpGvo-1681886941953)(attachment:image.png)]

所以把这部分加上就可以了

2.1 分类reshape和transpose用python模拟c++

如图,有两个头,分别是分类和位置回归。对于onnxruntime的输出形状 为1xcxkxk ->reshape->1xcx(kk)->transpose->1x(kk)c;
对于位置回归头 1x32xkxk->reshape->1x32x(k
k)->transpose->1x(k*k)x32

k是每个检测头最后的大小,如输入是256,每个头的stride分别是[8,16,32,64],那么应有k就是[32,16,8,4],直接c++实现,有些搞不明白,先用python来实现。
我们选用k为4来做实验,类别数为2,batchsize为1。python要模拟c++,按照内存分布来说python要flatten,在内存中连续分布。

import numpy as np
num_grid_x=4 #宽
num_grid_y=4 #高
num_cls=2 #分类
a = np.arange(num_cls*num_grid_x*num_grid_y).reshape(num_cls,num_grid_y,num_grid_x) #裁剪后模型输出c,k,k
aa=a.flatten()
b = a.transpose(1,2,0) # reshape transpose
bb = b.flatten()
#以上部分a就是裁剪后的模型,b就是完整的,我们想用正常的访问b的方法来实现对a的访问,从而实现reshape和transpose 
for i in range(num_grid_y):for j in range(num_grid_x):for k in range(num_cls): print(b[i,j,k],"  ",a[k,i,j])
0    0
16    16
1    1
17    17
2    2
18    18
3    3
19    19
4    4
20    20
5    5
21    21
6    6
22    22
7    7
23    23
8    8
24    24
9    9
25    25
10    10
26    26
11    11
27    27
12    12
28    28
13    13
29    29
14    14
30    30
15    15
31    31

可发看到结果是相同的,那么如果输出不是正方形,会怎么样,看下边代码:

num_grid_x=4 #宽
num_grid_y=3 #高
num_cls=2 #分类
a = np.arange(num_cls*num_grid_x*num_grid_y).reshape(num_cls,num_grid_y,num_grid_x) #裁剪后模型输出c,k,k
aa=a.flatten()
b = a.transpose(1,2,0) # reshape transpose
bb = b.flatten()
#以上部分a就是裁剪后的模型,b就是完整的,我们想用正常的访问b的方法来实现对a的访问,从而实现reshape和transpose 
for i in range(num_grid_y):for j in range(num_grid_x):for k in range(num_cls): print(b[i,j,k],"  ",a[k,i,j])
0    0
12    12
1    1
13    13
2    2
14    14
3    3
15    15
4    4
16    16
5    5
17    17
6    6
18    18
7    7
19    19
8    8
20    20
9    9
21    21
10    10
22    22
11    11
23    23

同样是没有问题的,也就是说可以得出结论:kxkxc 与cxkxk索引对应关系是 i,j,k对应k,i,j,也可以理解成kxkxc变成cxkxk是transpose 0,1,2变成1,2,0,ijk变成kij
接差用c语言的思路来实现

num_grid_x=4 #宽
num_grid_y=4 #高
num_cls=2 #分类
a = np.arange(num_cls*num_grid_x*num_grid_y).reshape(num_cls,num_grid_y,num_grid_x) #裁剪后模型输出c,k,k
aa=a.flatten()
b = a.transpose(1,2,0) # reshape transpose
bb = b.flatten()
#以上部分a就是裁剪后的模型,b就是完整的,我们想用正常的访问b的方法来实现对a的访问,从而实现reshape和transpose 
for i in range(num_grid_y):for j in range(num_grid_x):for k in range(num_cls): tb=i*num_grid_x*num_cls+j*num_cls+kta=k*num_grid_y*num_grid_x+i*num_grid_x+jprint(bb[tb]," ",aa[ta])
0   0
16   16
1   1
17   17
2   2
18   18
3   3
19   19
4   4
20   20
5   5
21   21
6   6
22   22
7   7
23   23
8   8
24   24
9   9
25   25
10   10
26   26
11   11
27   27
12   12
28   28
13   13
29   29
14   14
30   30
15   15
31   31

关于c代码,可以查看我上边的PicoDet::generate_proposal里代码

2.2 回归的reshape和transpose的python模拟

前边32是固定的

num_grid_x=4
num_grid_y=4a = np.arange(4*8*num_grid_x*num_grid_y).reshape(32,num_grid_y,num_grid_x).reshape(4,8,num_grid_x,num_grid_y)# a是裁剪后的返回结果 32xkxk与4x8xkxk在内存中是差别不大的
b = a.transpose(2,3,0,1) 
for i in range(num_grid_y):for j in range(num_grid_x):for k in range(4):print(b[i,j,k],"  ",a[k,:,i,j])
[  0  16  32  48  64  80  96 112]    [  0  16  32  48  64  80  96 112]
[128 144 160 176 192 208 224 240]    [128 144 160 176 192 208 224 240]
[256 272 288 304 320 336 352 368]    [256 272 288 304 320 336 352 368]
[384 400 416 432 448 464 480 496]    [384 400 416 432 448 464 480 496]
[  1  17  33  49  65  81  97 113]    [  1  17  33  49  65  81  97 113]
[129 145 161 177 193 209 225 241]    [129 145 161 177 193 209 225 241]
[257 273 289 305 321 337 353 369]    [257 273 289 305 321 337 353 369]
[385 401 417 433 449 465 481 497]    [385 401 417 433 449 465 481 497]
[  2  18  34  50  66  82  98 114]    [  2  18  34  50  66  82  98 114]
[130 146 162 178 194 210 226 242]    [130 146 162 178 194 210 226 242]
[258 274 290 306 322 338 354 370]    [258 274 290 306 322 338 354 370]
[386 402 418 434 450 466 482 498]    [386 402 418 434 450 466 482 498]
[  3  19  35  51  67  83  99 115]    [  3  19  35  51  67  83  99 115]
[131 147 163 179 195 211 227 243]    [131 147 163 179 195 211 227 243]
[259 275 291 307 323 339 355 371]    [259 275 291 307 323 339 355 371]
[387 403 419 435 451 467 483 499]    [387 403 419 435 451 467 483 499]
[  4  20  36  52  68  84 100 116]    [  4  20  36  52  68  84 100 116]
[132 148 164 180 196 212 228 244]    [132 148 164 180 196 212 228 244]
[260 276 292 308 324 340 356 372]    [260 276 292 308 324 340 356 372]
[388 404 420 436 452 468 484 500]    [388 404 420 436 452 468 484 500]
[  5  21  37  53  69  85 101 117]    [  5  21  37  53  69  85 101 117]
[133 149 165 181 197 213 229 245]    [133 149 165 181 197 213 229 245]
[261 277 293 309 325 341 357 373]    [261 277 293 309 325 341 357 373]
[389 405 421 437 453 469 485 501]    [389 405 421 437 453 469 485 501]
[  6  22  38  54  70  86 102 118]    [  6  22  38  54  70  86 102 118]
[134 150 166 182 198 214 230 246]    [134 150 166 182 198 214 230 246]
[262 278 294 310 326 342 358 374]    [262 278 294 310 326 342 358 374]
[390 406 422 438 454 470 486 502]    [390 406 422 438 454 470 486 502]
[  7  23  39  55  71  87 103 119]    [  7  23  39  55  71  87 103 119]
[135 151 167 183 199 215 231 247]    [135 151 167 183 199 215 231 247]
[263 279 295 311 327 343 359 375]    [263 279 295 311 327 343 359 375]
[391 407 423 439 455 471 487 503]    [391 407 423 439 455 471 487 503]
[  8  24  40  56  72  88 104 120]    [  8  24  40  56  72  88 104 120]
[136 152 168 184 200 216 232 248]    [136 152 168 184 200 216 232 248]
[264 280 296 312 328 344 360 376]    [264 280 296 312 328 344 360 376]
[392 408 424 440 456 472 488 504]    [392 408 424 440 456 472 488 504]
[  9  25  41  57  73  89 105 121]    [  9  25  41  57  73  89 105 121]
[137 153 169 185 201 217 233 249]    [137 153 169 185 201 217 233 249]
[265 281 297 313 329 345 361 377]    [265 281 297 313 329 345 361 377]
[393 409 425 441 457 473 489 505]    [393 409 425 441 457 473 489 505]
[ 10  26  42  58  74  90 106 122]    [ 10  26  42  58  74  90 106 122]
[138 154 170 186 202 218 234 250]    [138 154 170 186 202 218 234 250]
[266 282 298 314 330 346 362 378]    [266 282 298 314 330 346 362 378]
[394 410 426 442 458 474 490 506]    [394 410 426 442 458 474 490 506]
[ 11  27  43  59  75  91 107 123]    [ 11  27  43  59  75  91 107 123]
[139 155 171 187 203 219 235 251]    [139 155 171 187 203 219 235 251]
[267 283 299 315 331 347 363 379]    [267 283 299 315 331 347 363 379]
[395 411 427 443 459 475 491 507]    [395 411 427 443 459 475 491 507]
[ 12  28  44  60  76  92 108 124]    [ 12  28  44  60  76  92 108 124]
[140 156 172 188 204 220 236 252]    [140 156 172 188 204 220 236 252]
[268 284 300 316 332 348 364 380]    [268 284 300 316 332 348 364 380]
[396 412 428 444 460 476 492 508]    [396 412 428 444 460 476 492 508]
[ 13  29  45  61  77  93 109 125]    [ 13  29  45  61  77  93 109 125]
[141 157 173 189 205 221 237 253]    [141 157 173 189 205 221 237 253]
[269 285 301 317 333 349 365 381]    [269 285 301 317 333 349 365 381]
[397 413 429 445 461 477 493 509]    [397 413 429 445 461 477 493 509]
[ 14  30  46  62  78  94 110 126]    [ 14  30  46  62  78  94 110 126]
[142 158 174 190 206 222 238 254]    [142 158 174 190 206 222 238 254]
[270 286 302 318 334 350 366 382]    [270 286 302 318 334 350 366 382]
[398 414 430 446 462 478 494 510]    [398 414 430 446 462 478 494 510]
[ 15  31  47  63  79  95 111 127]    [ 15  31  47  63  79  95 111 127]
[143 159 175 191 207 223 239 255]    [143 159 175 191 207 223 239 255]
[271 287 303 319 335 351 367 383]    [271 287 303 319 335 351 367 383]
[399 415 431 447 463 479 495 511]    [399 415 431 447 463 479 495 511]

上边代码,因为是四维的,但只用三维的来操作,只是为了做到32变成4*8,获取4份连续的8个数(8个可能的位置),换成c代码来看下边两份代码:

num_grid_x=4
num_grid_y=4
a = np.arange(4*8*num_grid_x*num_grid_y).reshape(32,num_grid_y,num_grid_x).reshape(4,8,num_grid_x,num_grid_y)
aa = a.flatten()
b = a.transpose(2,3,0,1)
bb = b.flatten()
for i in range(num_grid_y):for j in range(num_grid_x):for k in range(4):#print(b[i,j,k],"  ",a[k,:,i,j])t1 =i*num_grid_x*32+j*32+k*8t2 =k*8*num_grid_x*num_grid_y+i*num_grid_x+jprint(f"{bb[t1:t1+8]} {aa[t2:t2+8]}")
[  0  16  32  48  64  80  96 112] [0 1 2 3 4 5 6 7]
[128 144 160 176 192 208 224 240] [128 129 130 131 132 133 134 135]
[256 272 288 304 320 336 352 368] [256 257 258 259 260 261 262 263]
[384 400 416 432 448 464 480 496] [384 385 386 387 388 389 390 391]
[  1  17  33  49  65  81  97 113] [1 2 3 4 5 6 7 8]
[129 145 161 177 193 209 225 241] [129 130 131 132 133 134 135 136]
[257 273 289 305 321 337 353 369] [257 258 259 260 261 262 263 264]
[385 401 417 433 449 465 481 497] [385 386 387 388 389 390 391 392]
[  2  18  34  50  66  82  98 114] [2 3 4 5 6 7 8 9]
[130 146 162 178 194 210 226 242] [130 131 132 133 134 135 136 137]
[258 274 290 306 322 338 354 370] [258 259 260 261 262 263 264 265]
[386 402 418 434 450 466 482 498] [386 387 388 389 390 391 392 393]
[  3  19  35  51  67  83  99 115] [ 3  4  5  6  7  8  9 10]
[131 147 163 179 195 211 227 243] [131 132 133 134 135 136 137 138]
[259 275 291 307 323 339 355 371] [259 260 261 262 263 264 265 266]
[387 403 419 435 451 467 483 499] [387 388 389 390 391 392 393 394]
[  4  20  36  52  68  84 100 116] [ 4  5  6  7  8  9 10 11]
[132 148 164 180 196 212 228 244] [132 133 134 135 136 137 138 139]
[260 276 292 308 324 340 356 372] [260 261 262 263 264 265 266 267]
[388 404 420 436 452 468 484 500] [388 389 390 391 392 393 394 395]
[  5  21  37  53  69  85 101 117] [ 5  6  7  8  9 10 11 12]
[133 149 165 181 197 213 229 245] [133 134 135 136 137 138 139 140]
[261 277 293 309 325 341 357 373] [261 262 263 264 265 266 267 268]
[389 405 421 437 453 469 485 501] [389 390 391 392 393 394 395 396]
[  6  22  38  54  70  86 102 118] [ 6  7  8  9 10 11 12 13]
[134 150 166 182 198 214 230 246] [134 135 136 137 138 139 140 141]
[262 278 294 310 326 342 358 374] [262 263 264 265 266 267 268 269]
[390 406 422 438 454 470 486 502] [390 391 392 393 394 395 396 397]
[  7  23  39  55  71  87 103 119] [ 7  8  9 10 11 12 13 14]
[135 151 167 183 199 215 231 247] [135 136 137 138 139 140 141 142]
[263 279 295 311 327 343 359 375] [263 264 265 266 267 268 269 270]
[391 407 423 439 455 471 487 503] [391 392 393 394 395 396 397 398]
[  8  24  40  56  72  88 104 120] [ 8  9 10 11 12 13 14 15]
[136 152 168 184 200 216 232 248] [136 137 138 139 140 141 142 143]
[264 280 296 312 328 344 360 376] [264 265 266 267 268 269 270 271]
[392 408 424 440 456 472 488 504] [392 393 394 395 396 397 398 399]
[  9  25  41  57  73  89 105 121] [ 9 10 11 12 13 14 15 16]
[137 153 169 185 201 217 233 249] [137 138 139 140 141 142 143 144]
[265 281 297 313 329 345 361 377] [265 266 267 268 269 270 271 272]
[393 409 425 441 457 473 489 505] [393 394 395 396 397 398 399 400]
[ 10  26  42  58  74  90 106 122] [10 11 12 13 14 15 16 17]
[138 154 170 186 202 218 234 250] [138 139 140 141 142 143 144 145]
[266 282 298 314 330 346 362 378] [266 267 268 269 270 271 272 273]
[394 410 426 442 458 474 490 506] [394 395 396 397 398 399 400 401]
[ 11  27  43  59  75  91 107 123] [11 12 13 14 15 16 17 18]
[139 155 171 187 203 219 235 251] [139 140 141 142 143 144 145 146]
[267 283 299 315 331 347 363 379] [267 268 269 270 271 272 273 274]
[395 411 427 443 459 475 491 507] [395 396 397 398 399 400 401 402]
[ 12  28  44  60  76  92 108 124] [12 13 14 15 16 17 18 19]
[140 156 172 188 204 220 236 252] [140 141 142 143 144 145 146 147]
[268 284 300 316 332 348 364 380] [268 269 270 271 272 273 274 275]
[396 412 428 444 460 476 492 508] [396 397 398 399 400 401 402 403]
[ 13  29  45  61  77  93 109 125] [13 14 15 16 17 18 19 20]
[141 157 173 189 205 221 237 253] [141 142 143 144 145 146 147 148]
[269 285 301 317 333 349 365 381] [269 270 271 272 273 274 275 276]
[397 413 429 445 461 477 493 509] [397 398 399 400 401 402 403 404]
[ 14  30  46  62  78  94 110 126] [14 15 16 17 18 19 20 21]
[142 158 174 190 206 222 238 254] [142 143 144 145 146 147 148 149]
[270 286 302 318 334 350 366 382] [270 271 272 273 274 275 276 277]
[398 414 430 446 462 478 494 510] [398 399 400 401 402 403 404 405]
[ 15  31  47  63  79  95 111 127] [15 16 17 18 19 20 21 22]
[143 159 175 191 207 223 239 255] [143 144 145 146 147 148 149 150]
[271 287 303 319 335 351 367 383] [271 272 273 274 275 276 277 278]
[399 415 431 447 463 479 495 511] [399 400 401 402 403 404 405 406]
num_grid_x=4
num_grid_y=4
a = np.arange(4*8*num_grid_x*num_grid_y).reshape(32,num_grid_y,num_grid_x).reshape(4,8,num_grid_x,num_grid_y)
aa = a.flatten()
b = a.transpose(2,3,0,1) # num_grid_y num_grid_x 4 8
bb = b.flatten()
for i in range(num_grid_y):for j in range(num_grid_x):for k in range(4):#print(b[i,j,k],"  ",a[k,:,i,j])t1 =i*num_grid_x*32+j*32+k*8t2 =k*8*num_grid_x*num_grid_y+i*num_grid_x+jprint(f"{bb[t1:t1+8]} {aa[t2:t2+8*16:16]}")
[  0  16  32  48  64  80  96 112] [  0  16  32  48  64  80  96 112]
[128 144 160 176 192 208 224 240] [128 144 160 176 192 208 224 240]
[256 272 288 304 320 336 352 368] [256 272 288 304 320 336 352 368]
[384 400 416 432 448 464 480 496] [384 400 416 432 448 464 480 496]
[  1  17  33  49  65  81  97 113] [  1  17  33  49  65  81  97 113]
[129 145 161 177 193 209 225 241] [129 145 161 177 193 209 225 241]
[257 273 289 305 321 337 353 369] [257 273 289 305 321 337 353 369]
[385 401 417 433 449 465 481 497] [385 401 417 433 449 465 481 497]
[  2  18  34  50  66  82  98 114] [  2  18  34  50  66  82  98 114]
[130 146 162 178 194 210 226 242] [130 146 162 178 194 210 226 242]
[258 274 290 306 322 338 354 370] [258 274 290 306 322 338 354 370]
[386 402 418 434 450 466 482 498] [386 402 418 434 450 466 482 498]
[  3  19  35  51  67  83  99 115] [  3  19  35  51  67  83  99 115]
[131 147 163 179 195 211 227 243] [131 147 163 179 195 211 227 243]
[259 275 291 307 323 339 355 371] [259 275 291 307 323 339 355 371]
[387 403 419 435 451 467 483 499] [387 403 419 435 451 467 483 499]
[  4  20  36  52  68  84 100 116] [  4  20  36  52  68  84 100 116]
[132 148 164 180 196 212 228 244] [132 148 164 180 196 212 228 244]
[260 276 292 308 324 340 356 372] [260 276 292 308 324 340 356 372]
[388 404 420 436 452 468 484 500] [388 404 420 436 452 468 484 500]
[  5  21  37  53  69  85 101 117] [  5  21  37  53  69  85 101 117]
[133 149 165 181 197 213 229 245] [133 149 165 181 197 213 229 245]
[261 277 293 309 325 341 357 373] [261 277 293 309 325 341 357 373]
[389 405 421 437 453 469 485 501] [389 405 421 437 453 469 485 501]
[  6  22  38  54  70  86 102 118] [  6  22  38  54  70  86 102 118]
[134 150 166 182 198 214 230 246] [134 150 166 182 198 214 230 246]
[262 278 294 310 326 342 358 374] [262 278 294 310 326 342 358 374]
[390 406 422 438 454 470 486 502] [390 406 422 438 454 470 486 502]
[  7  23  39  55  71  87 103 119] [  7  23  39  55  71  87 103 119]
[135 151 167 183 199 215 231 247] [135 151 167 183 199 215 231 247]
[263 279 295 311 327 343 359 375] [263 279 295 311 327 343 359 375]
[391 407 423 439 455 471 487 503] [391 407 423 439 455 471 487 503]
[  8  24  40  56  72  88 104 120] [  8  24  40  56  72  88 104 120]
[136 152 168 184 200 216 232 248] [136 152 168 184 200 216 232 248]
[264 280 296 312 328 344 360 376] [264 280 296 312 328 344 360 376]
[392 408 424 440 456 472 488 504] [392 408 424 440 456 472 488 504]
[  9  25  41  57  73  89 105 121] [  9  25  41  57  73  89 105 121]
[137 153 169 185 201 217 233 249] [137 153 169 185 201 217 233 249]
[265 281 297 313 329 345 361 377] [265 281 297 313 329 345 361 377]
[393 409 425 441 457 473 489 505] [393 409 425 441 457 473 489 505]
[ 10  26  42  58  74  90 106 122] [ 10  26  42  58  74  90 106 122]
[138 154 170 186 202 218 234 250] [138 154 170 186 202 218 234 250]
[266 282 298 314 330 346 362 378] [266 282 298 314 330 346 362 378]
[394 410 426 442 458 474 490 506] [394 410 426 442 458 474 490 506]
[ 11  27  43  59  75  91 107 123] [ 11  27  43  59  75  91 107 123]
[139 155 171 187 203 219 235 251] [139 155 171 187 203 219 235 251]
[267 283 299 315 331 347 363 379] [267 283 299 315 331 347 363 379]
[395 411 427 443 459 475 491 507] [395 411 427 443 459 475 491 507]
[ 12  28  44  60  76  92 108 124] [ 12  28  44  60  76  92 108 124]
[140 156 172 188 204 220 236 252] [140 156 172 188 204 220 236 252]
[268 284 300 316 332 348 364 380] [268 284 300 316 332 348 364 380]
[396 412 428 444 460 476 492 508] [396 412 428 444 460 476 492 508]
[ 13  29  45  61  77  93 109 125] [ 13  29  45  61  77  93 109 125]
[141 157 173 189 205 221 237 253] [141 157 173 189 205 221 237 253]
[269 285 301 317 333 349 365 381] [269 285 301 317 333 349 365 381]
[397 413 429 445 461 477 493 509] [397 413 429 445 461 477 493 509]
[ 14  30  46  62  78  94 110 126] [ 14  30  46  62  78  94 110 126]
[142 158 174 190 206 222 238 254] [142 158 174 190 206 222 238 254]
[270 286 302 318 334 350 366 382] [270 286 302 318 334 350 366 382]
[398 414 430 446 462 478 494 510] [398 414 430 446 462 478 494 510]
[ 15  31  47  63  79  95 111 127] [ 15  31  47  63  79  95 111 127]
[143 159 175 191 207 223 239 255] [143 159 175 191 207 223 239 255]
[271 287 303 319 335 351 367 383] [271 287 303 319 335 351 367 383]
[399 415 431 447 463 479 495 511] [399 415 431 447 463 479 495 511]

所以要用c代码的话:

//reg_1max=8,tmp用来存放连续的8个数
float* tmp = new float[reg_1max]; 
for (int m = 0; m < reg_1max; m++)
{
tmp[m] = out_box[k * reg_1max* num_grid_y * num_grid_x  + i * num_grid_x + j + m * num_grid_y * num_grid_x];
}

理解一下,kxkx4x8对应的是ik48+j48+k8,这是8个数的首地址,接差连取8个即可
4x8xkxk对应的是python: k,:,i,j,冒号就是全取,c对应的是:k8ij+ik+j+mkk ,这个m就是冒号这里的位置,取m=0,1,2,3,4,5,6,7,分别表示这一维度下的8个。

3、softmax改进

换了一个softmax的实现方式

#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <fstream>
#include <string>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
//#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>//using namespace cv;
//using namespace std;
//using namespace Ort;typedef struct BoxInfo
{float x1;float y1;float x2;float y2;float score;int label;
} BoxInfo;class PicoDet
{
public:PicoDet(std::string model_path, std::string classesFile, float nms_threshold, float objThreshold);void detect(cv::Mat& cv_image);
private:float score_threshold = 0.5;float nms_threshold = 0.5;std::vector<std::string> class_names;int num_class;cv::Mat resize_image(cv::Mat srcimg, int* newh, int* neww, int* top, int* left);std::vector<float> input_image_;void normalize_(cv::Mat img);inline float fast_exp(float x);template <typename _Tp>int activation_function_softmax(const _Tp* src, _Tp* dst, int length);//void softmax_(const float* x, float* y, int length);void generate_proposal(std::vector<BoxInfo>& generate_boxes, const int stride_, const float* out_score, const float* out_box);void nms(std::vector<BoxInfo>& input_boxes);const bool keep_ratio = false;int inpWidth;int inpHeight;int num_outs;int reg_max;std::vector<int> stride;//const float mean[3] = { 103.53, 116.28, 123.675 };//const float stds[3] = { 57.375, 57.12, 58.395 };const float mean[3] = { 0.0, 0.0, 0.0 };const float stds[3] = { 255.0, 255.0, 255.0 };Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "picodet");Ort::Session* ort_session = nullptr;Ort::SessionOptions sessionOptions = Ort::SessionOptions();std::vector<char*> input_names;std::vector<char*> output_names;std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputsstd::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
};
inline float PicoDet::fast_exp(float x) {union {uint32_t i;float f;} v{};v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);return v.f;
}template <typename _Tp>
int PicoDet::activation_function_softmax(const _Tp* src, _Tp* dst, int length) {const _Tp alpha = *std::max_element(src, src + length);_Tp denominator{ 0 };for (int i = 0; i < length; ++i) {dst[i] = fast_exp(src[i] - alpha);denominator += dst[i];}for (int i = 0; i < length; ++i) {dst[i] /= denominator;}return 0;
}
PicoDet::PicoDet(std::string model_path, std::string classesFile, float nms_threshold, float objThreshold)
{std::ifstream ifs(classesFile.c_str());std::string line;while (std::getline(ifs, line)) this->class_names.push_back(line);this->num_class = class_names.size();this->nms_threshold = nms_threshold;this->score_threshold = objThreshold;std::wstring widestr = std::wstring(model_path.begin(), model_path.end());//OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);ort_session = new Ort::Session(env, widestr.c_str(), sessionOptions);size_t numInputNodes = ort_session->GetInputCount();size_t numOutputNodes = ort_session->GetOutputCount();Ort::AllocatorWithDefaultOptions allocator;for (int i = 0; i < numInputNodes; i++){input_names.push_back(ort_session->GetInputName(i, allocator));Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();auto input_dims = input_tensor_info.GetShape();input_node_dims.push_back(input_dims);}for (int i = 0; i < numOutputNodes; i++){output_names.push_back(ort_session->GetOutputName(i, allocator));Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();auto output_dims = output_tensor_info.GetShape();output_node_dims.push_back(output_dims);/*for (int j = 0; j < output_dims.size(); j++){cout << output_dims[j] << ",";}cout << endl;*/}this->inpHeight = input_node_dims[0][2];this->inpWidth = input_node_dims[0][3];this->num_outs = int(numOutputNodes * 0.5);this->reg_max = output_node_dims[this->num_outs][output_node_dims[this->num_outs].size() - 1] / 4 - 1;for (int i = 0; i < this->num_outs; i++){stride.push_back(int(8 * pow(2, i)));}
}cv::Mat PicoDet::resize_image(cv::Mat srcimg, int* newh, int* neww, int* top, int* left)
{int srch = srcimg.rows, srcw = srcimg.cols;*newh = this->inpHeight;*neww = this->inpWidth;cv::Mat dstimg;if (this->keep_ratio && srch != srcw) {float hw_scale = (float)srch / srcw;if (hw_scale > 1) {*newh = this->inpHeight;*neww = int(this->inpWidth / hw_scale);resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);*left = int((this->inpWidth - *neww) * 0.5);copyMakeBorder(dstimg, dstimg, 0, 0, *left, this->inpWidth - *neww - *left, cv::BORDER_CONSTANT, 0);}else {*newh = (int)this->inpHeight * hw_scale;*neww = this->inpWidth;resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);*top = (int)(this->inpHeight - *newh) * 0.5;copyMakeBorder(dstimg, dstimg, *top, this->inpHeight - *newh - *top, 0, 0, cv::BORDER_CONSTANT, 0);}}else {cv::resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);}return dstimg;
}void PicoDet::normalize_(cv::Mat img)
{//    img.convertTo(img, CV_32F);int row = img.rows;int col = img.cols;this->input_image_.resize(row * col * img.channels());for (int c = 0; c < 3; c++){for (int i = 0; i < row; i++){for (int j = 0; j < col; j++){float pix = img.ptr<uchar>(i)[j * 3 + c];this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - mean[c] / 255.0) / (stds[c] / 255.0);//this->input_image_[c * row * col + i * col + j] = (pix - mean[c]) / stds[c];}}}
}
/*
void PicoDet::softmax_(const float* x, float* y, int length)
{float sum = 0;int i = 0;for (i = 0; i < length; i++){y[i] = exp(x[i]);sum += y[i];}for (i = 0; i < length; i++){y[i] /= sum;}
}
*/void PicoDet::generate_proposal(std::vector<BoxInfo>& generate_boxes, const int stride_, const float* out_score, const float* out_box)
{const int num_grid_y = (int)ceil((float)this->inpHeight / stride_);const int num_grid_x = (int)ceil((float)this->inpWidth / stride_);cout << "num_grid_x=" << num_grid_x << ",num_grid_y=" << num_grid_y << endl;const int reg_1max = reg_max + 1;//std::cout << "score:" << std::endl;for (int i = 0; i < num_grid_y; i++){for (int j = 0; j < num_grid_x; j++){int max_ind = 0;float max_score = 0;for (int k = 0; k < num_class; k++){   /*这个代码是原始的输出*///float score = out_score[i * num_grid_x * num_class + j * num_class + k];/*以下代码是去掉reshape和transpose的,用C来实现这个功能的这两部分代码选一个即可,可以理解成ijk对应kij*/float score = std::sqrt(out_score[k*num_grid_y*num_grid_x+i*num_grid_x+j]);//std::cout <<score << " ";if (score > max_score){max_score = score;max_ind = k;}}if (max_score >= score_threshold){std::cout << "box:" << std::endl;//const float* pbox = out_box + idx * reg_1max * 4;float dis_pred[4];float* y = new float[reg_1max];for (int k = 0; k < 4; k++){/*原始模型*///const float* tmp = out_box + i * num_grid_x * reg_1max * 4 + j * reg_1max * 4 + k * reg_1max;//std::cout << "r:" << *tmp << std::endl;/*换用没有reshape transpose的*/float* tmp = new float[reg_1max];for (int m = 0; m < reg_1max; m++){tmp[m] = out_box[k * num_grid_y * num_grid_x * reg_1max + i * num_grid_x + j + m * num_grid_y * num_grid_x];}//std::cout << "r:" << *tmp << std::endl;//softmax_(tmp, y, reg_1max);activation_function_softmax(tmp, y, reg_1max);float dis = 0.f;for (int l = 0; l < reg_1max; l++){dis += l * y[l];}dis_pred[k] = dis * stride_;}delete[] y;float pb_cx = (j + 0.5f) * stride_ - 0.5;float pb_cy = (i + 0.5f) * stride_ - 0.5;float x0 = pb_cx - dis_pred[0];float y0 = pb_cy - dis_pred[1];float x1 = pb_cx + dis_pred[2];float y1 = pb_cy + dis_pred[3];generate_boxes.push_back(BoxInfo{ x0, y0, x1, y1, max_score, max_ind });}}}
}void PicoDet::nms(std::vector<BoxInfo>& input_boxes)
{sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });std::vector<float> vArea(input_boxes.size());for (int i = 0; i < int(input_boxes.size()); ++i){vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);}std::vector<bool> isSuppressed(input_boxes.size(), false);for (int i = 0; i < int(input_boxes.size()); ++i){if (isSuppressed[i]) { continue; }for (int j = i + 1; j < int(input_boxes.size()); ++j){if (isSuppressed[j]) { continue; }float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);float w = (std::max)(float(0), xx2 - xx1 + 1);float h = (std::max)(float(0), yy2 - yy1 + 1);float inter = w * h;float ovr = inter / (vArea[i] + vArea[j] - inter);if (ovr >= this->nms_threshold){isSuppressed[j] = true;}}}// return post_nms;int idx_t = 0;input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end());
}void PicoDet::detect(cv::Mat& srcimg)
{int newh = 0, neww = 0, top = 0, left = 0;cv::Mat cv_image = srcimg.clone();cv::Mat dst = this->resize_image(cv_image, &newh, &neww, &top, &left);this->normalize_(dst);std::array<int64_t, 4> input_shape_{ 1, 3, this->inpHeight, this->inpWidth };auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);Ort::Value input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());std::vector<Ort::Value> ort_outputs = ort_session->Run(Ort::RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size());   // ???????/generate proposalsstd::vector<BoxInfo> generate_boxes;for (int i = 0; i < this->num_outs; i++){//auto cls_shape = this->output_node_dims[i];const float* cls_score = ort_outputs[i].GetTensorMutableData<float>();//std::vector<int64_t> new_cls_shape = { cls_shape[0],cls_shape[1],cls_shape[2] * cls_shape[3] };const float* bbox_pred = ort_outputs[i + this->num_outs].GetTensorMutableData<float>();//auto reg_shape = this->output_node_dims[i+this->num_outs];generate_proposal(generate_boxes, stride[i], cls_score, bbox_pred);} Perform non maximum suppression to eliminate redundant overlapping boxes with lower confidencesnms(generate_boxes);float ratioh = (float)cv_image.rows / newh;float ratiow = (float)cv_image.cols / neww;for (size_t i = 0; i < generate_boxes.size(); ++i){int xmin = (int)std::max((generate_boxes[i].x1 - left) * ratiow, 0.f);int ymin = (int)std::max((generate_boxes[i].y1 - top) * ratioh, 0.f);int xmax = (int)std::min((generate_boxes[i].x2 - left) * ratiow, (float)cv_image.cols);int ymax = (int)std::min((generate_boxes[i].y2 - top) * ratioh, (float)cv_image.rows);rectangle(srcimg, cv::Point(xmin, ymin), cv::Point(xmax, ymax), cv::Scalar(0, 0, 255), 2);std::string label = cv::format("%.2f", generate_boxes[i].score);label = this->class_names[generate_boxes[i].label] + ":" + label;putText(srcimg, label, cv::Point(xmin, ymin - 5), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 255, 0), 1);}
}int main()
{//PicoDet mynet("C:/Users/tl/Desktop/demo_ncnn/ncnn_our/picodet_xs_320_voc_256_20230405_shape.onnx", "C:/Users/tl/Desktop/demo_ncnn/ncnn_our/ball.names", 0.5, 0.5);  /// choice = ["picodet_m_320_coco.onnx", "picodet_m_416_coco.onnx", "picodet_s_320_coco.onnx", "picodet_s_416_coco.onnx"]PicoDet mynet("C:/Users/tl/Desktop/demo_ncnn/ncnn_our/picodet_xs_320_voc_256_20230405_shape_sim_prune.onnx", "C:/Users/tl/Desktop/demo_ncnn/ncnn_our/ball.names", 0.5, 0.5);std::string imgpath = "C:/Users/tl/Desktop/demo_ncnn/ncnn_our/test.jpg";cv::Mat srcimg = cv::imread(imgpath);mynet.detect(srcimg);cv::imwrite("C:/Users/tl/Desktop/demo_ncnn/ncnn_our/test_result.jpg", srcimg);static const std::string kWinName = "Deep learning object detection in ONNXRuntime";cv::namedWindow(kWinName, cv::WINDOW_NORMAL);cv::imshow(kWinName, srcimg);cv::waitKey(0);cv::destroyAllWindows();
}