卷积结果不同于caffe(convolution result different from that of caffe)

我尝试自己编写卷积函数，将结果与caffe结果进行比较。我在运行演示程序时使用了底部和顶部blob数据和权重。我非常确定提取的数据是正确的。这是我编写的卷积测试代码，但结果是不同的。在这个例子中，我有64个输入特征映射和64个输出特征映射，并使用3x3内核。该程序从64个文件读取底部数据，读取64x64 3x3内核和权重的权重和偏差，并使用它进行卷积并将顶部数据保存到64个文件中。非常简单的程序..如果有任何人可以在这里发现任何错误并让我知道，我将不胜感激。

// written by Chan Kim for simple convolution test /* layer 2 type Convolution num_bottoms 1 num_tops 1 layer 2 bottom 0 shape "1 64 600 800 (30720000)" layer 2 top 0 shape = "1 64 600 800 (30720000)" layer 2 kernel_size = 3 bottom size : 4*64*600*800 = 0x7530000 Bytes top size : 4*64*600*800 = 0x7530000 Bytes kernel size : 4*3*3*64*64 = 0x24000 Bytes in word size, bottom : 0x1d4c000 kernel : 0x9000 Words */ #include <stdio.h> #define NUM_IFM 64 #define NUM_OFM 64 #define HEIGHT 600 #define WIDTH 800 #define K 3 // 3x3 kernel float bottom[1][NUM_IFM][HEIGHT][WIDTH]; float top[1][NUM_IFM][HEIGHT][WIDTH]; float weights[NUM_OFM][NUM_IFM][K][K]; float bias[NUM_OFM]; float conv[1][NUM_IFM][HEIGHT][WIDTH]; // result char str[80]; float kern[K][K]; float in_square[K][K]; float sum; #define layer 2 // for test main() { FILE *file; int ifm_idx; int ofm_idx; int orix; int ocix; char c; int r; int rix, cix; char line[80]; char *ll; float v0, v1, v2, v3, v4, v5, v6, v7; int kyi, kxi; int orixm, orixp; int ocixm, ocixp; int kx, ky; int i; // -------------------------------------------------------- // reading blob data from files into blob memory // -------------------------------------------------------- printf("startnig!\n"); for(ifm_idx=0;ifm_idx<NUM_IFM;ifm_idx++) { sprintf(str, "/home/ckim/Neuro/convhw/ext-z840/L%02d_Convolution_B0_FN%03d.txt", layer, ifm_idx); file = fopen(str, "r"); c = fgetc(file); rix = -1; while (c != EOF) { if (c == '#') { ll = fgets(line, 80, file); c = fgetc(file); rix++; cix = 0; } else { r = ungetc(c, file); r = fscanf(file, "%f %f %f %f %f %f %f %f", &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7); bottom[0][ifm_idx][rix][cix++] = v0; bottom[0][ifm_idx][rix][cix++] = v1; bottom[0][ifm_idx][rix][cix++] = v2; bottom[0][ifm_idx][rix][cix++] = v3; bottom[0][ifm_idx][rix][cix++] = v4; bottom[0][ifm_idx][rix][cix++] = v5; bottom[0][ifm_idx][rix][cix++] = v6; bottom[0][ifm_idx][rix][cix++] = v7; c = fgetc(file); } } printf("file %s read.. \n",str); fclose(file); } // -------------------------------------------------------- // reading weights from files into blob memory // -------------------------------------------------------- sprintf(str, "/home/ckim/Neuro/convhw/ext1/L%02d_Convolution_Weights.txt", layer); file = fopen(str, "r"); ll = fgets(line, 80, file); // read the layer config line for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { //for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { ll = fgets(line, 80, file); // read the line '## For output map ofm_idx' for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { //for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { ll = fgets(line, 80, file); // read the line '## for input map ifm_idx' for(kyi = 0; kyi < K; kyi++) { fscanf(file, "%f %f %f", &v0, &v1, &v2); // K=3 always weights[ofm_idx][ifm_idx][kyi][0] = v0; weights[ofm_idx][ifm_idx][kyi][1] = v1; weights[ofm_idx][ifm_idx][kyi][2] = v2; ll = fgets(line, 80, file); // read off remaining line } } // read bias values } for(i = 0, ofm_idx = 0; i < NUM_OFM/8; i++){ ll = fgets(line, 80, file); r = fscanf(file, "%f %f %f %f %f %f %f %f", &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7); bias[ofm_idx++] = v0; bias[ofm_idx++] = v1; bias[ofm_idx++] = v2; bias[ofm_idx++] = v3; bias[ofm_idx++] = v4; bias[ofm_idx++] = v5; bias[ofm_idx++] = v6; bias[ofm_idx++] = v7; } fclose(file); // -------------------------------------------------------- // perform convolution // -------------------------------------------------------- printf("starting convolution..\n"); for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { printf("making OFM %d\n", ofm_idx); for(orix = 0; orix < HEIGHT; orix++) { for(ocix = 0; ocix < WIDTH; ocix++) { sum = 0.; // for single point // for each input maps for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { // get kernel kern[0][0] = weights[ofm_idx][ifm_idx][0][0]; kern[0][1] = weights[ofm_idx][ifm_idx][0][1]; kern[0][2] = weights[ofm_idx][ifm_idx][0][2]; kern[1][0] = weights[ofm_idx][ifm_idx][1][0]; kern[1][1] = weights[ofm_idx][ifm_idx][1][1]; kern[1][2] = weights[ofm_idx][ifm_idx][1][2]; kern[2][0] = weights[ofm_idx][ifm_idx][2][0]; kern[2][1] = weights[ofm_idx][ifm_idx][2][1]; kern[2][2] = weights[ofm_idx][ifm_idx][2][2]; // zero-value padding (used in caffe) in_square[0][0] = (orix == 0 ||ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[0][1] = (orix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[0][2] = (orix == 0 || ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[1][0] = (ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[1][1] = bottom[0][ifm_idx][orix][ocix]; in_square[1][2] = (ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[2][0] = (orix == HEIGHT-1 || ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[2][1] = (orix == HEIGHT-1) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[2][2] = (orix == HEIGHT-1 || ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix]; // apply kernel for(ky = 0; ky < K; ky++) { for(kx = 0; kx < K; kx++) { sum += in_square[ky][kx]*kern[ky][kx]; } } //// add bias //sum += bias[ifm_idx]; } // ifm_idx // add bias sum += bias[ofm_idx]; // store result conv[0][ofm_idx][orix][ocix] = sum; } // ocix } // orix printf("OFM %d\n", ofm_idx); sprintf(str, "./result/L%02d_Convolution_T0_FN%03d.txt", layer, ofm_idx); file = fopen(str, "w"); printf("writing convolution result to file %s..\n", str); for (rix = 0; rix < HEIGHT; rix++) { fprintf(file, "### kr = %d ##\n", rix); for (cix = 0; cix < WIDTH; cix++) { fprintf(file, "%f ",conv[0][ofm_idx][rix][cix]); if (cix %8 == 7) fprintf(file, "\n"); } } fclose(file); } printf("Convolution finished\n");

更新：我尝试了转置内核，并尝试反转提取内核，所有4种组合的输入输出关系，但它不匹配。也许在我从咖啡执行中提取价值的方式上有些问题。我添加了如何从caffe / src / caffe / net.cpp文件中的卷积图层提取底部，顶部和重量数据。在完成图层处理后提取它们。

//#define LayerShapeExtract //#define BlobExtract //#define WeightExtract #define LayerExtNum 2 #define EXT_ALL_LAYERS 0 int ccc; // for debug template <typename Dtype> Dtype Net<Dtype>::ForwardFromTo(int start, int end) { CHECK_GE(start, 0); CHECK_LT(end, layers_.size()); Dtype loss = 0; LOG(INFO) << "## : net_input_blobs_.size() : " << net_input_blobs_.size() << std::endl; if (debug_info_) { for (int i = 0; i < net_input_blobs_.size(); ++i) { InputDebugInfo(i); } } for (int i = start; i <= end; ++i) { ccc = i; printf("ccc = %d\n", ccc); // LOG(ERROR) << "Forwarding " << layer_names_[i]; Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]); // Layer::Forward loss += layer_loss; if (debug_info_) { ForwardDebugInfo(i); } #ifdef LayerShapeExtract // -------------------------------- start of shape extract --------------------- FILE *lsfp; lsfp = fopen("ext/layers.txt","w"); fprintf(lsfp, "####### Layer Shape ########\n"); fprintf(lsfp, "start = %d, end = %d\n", start,end); for (int li = start; li <= end; ++li) { fprintf(lsfp, "layer %d type %s num_bottoms %zu num_tops %zu\n", li, layers_[li]->type(), bottom_vecs_[li].size(), top_vecs_[li].size()); for (int bi = 0; bi < bottom_vecs_[li].size(); bi++) { fprintf(lsfp, "layer %d bottom %d shape \"%s\"\n", li, bi, bottom_vecs_[li][bi]->shape_string().c_str()); } for (int ti = 0; ti < top_vecs_[li].size(); ti++) { fprintf(lsfp, "layer %d top %d shape = \"%s\"\n", li, ti, top_vecs_[li][ti]->shape_string().c_str()); } if (strcmp(layers_[li]->type(),"Convolution") == 0) { fprintf(lsfp, "layer %d kernel_size = %d\n", li, layers_[li]->layer_param().convolution_param().kernel_size()); } } fclose(lsfp); //printf("##Net::ForwardFromTo ==> ####### End of Layer Shape ########\n"); // -------------------------------- end of shape extract --------------------- #endif #ifdef BlobExtract // -------------------------------- start of blob extract --------------------- if (i == LayerExtNum || EXT_ALL_LAYERS) { //print!! char fname[50]; FILE *extfp1; for (int j = 0; j < bottom_vecs_[i].size(); j++) { Blob<Dtype>* bp = bottom_vecs_[i][j]; const Dtype *dptr = bp->cpu_data(); if (bp->shape().size() == 4) { for (int kn = 0; kn < bp->shape()[0]; kn++) { // Blob for (int kd = 0; kd < bp->shape()[1]; kd++) { // Ni sprintf(fname, "ext/L%02d_%s_B%d_FN%03d.txt", i,layers_[i]->type(),j,kd); extfp1 = fopen(fname,"w"); for (int kr = 0; kr < bp->shape()[2]; kr++) { // Ny fprintf(extfp1, "### kr = %d ##\n", kr); for (int kc = 0; kc < bp->shape()[3]; kc++) { // Nx fprintf(extfp1, "%f ",*dptr++); if (kc %8 == 7 || kc == bp->shape()[3]-1) fprintf(extfp1, "\n"); } } fclose(extfp1); } } } // if size 4 else if (bp->shape().size() == 2) { for (int kn = 0; kn < bp->shape()[0]; kn++) { // Blob sprintf(fname, "ext/L%02d_%s_B%d_FN%03d.txt", i,layers_[i]->type(),j,kn); extfp1 = fopen(fname,"w"); for (int kd = 0; kd < bp->shape()[1]; kd++) { // Ni fprintf(extfp1, "%f ",*dptr++); if (kd %8 == 7 || kd == bp->shape()[1]-1) fprintf(extfp1, "\n"); } fclose(extfp1); } } // if size 2 else { printf("BSS != 4 or 2, i = %d, j = %d, sz = %zd\n",i,j,bp->shape().size()); } } // for j for (int j = 0; j < top_vecs_[i].size(); j++) { Blob<Dtype>* tp = top_vecs_[i][j]; const Dtype *dptr = tp->cpu_data(); if (tp->shape().size() == 4) { // Blob for (int kn = 0; kn < tp->shape()[0]; kn++) { // Blob for (int kd = 0; kd < tp->shape()[1]; kd++) { // Ni sprintf(fname, "ext/L%02d_%s_T%d_FN%03d.txt", i,layers_[i]->type(),j,kd); extfp1 = fopen(fname,"w"); for (int kr = 0; kr < tp->shape()[2]; kr++) { // Ny fprintf(extfp1, "### kr = %d ##\n", kr); for (int kc = 0; kc < tp->shape()[3]; kc++) { // Nx fprintf(extfp1, "%f ",*dptr++); if (kc %8 == 7 || kc == tp->shape()[3]-1) fprintf(extfp1, "\n"); } } fclose(extfp1); } } } // if size 4 else if (tp->shape().size() == 2) { for (int kn = 0; kn < tp->shape()[0]; kn++) { // Blob sprintf(fname, "ext/L%02d_%s_T%d_FN%03d.txt", i,layers_[i]->type(),j,kn); extfp1 = fopen(fname,"w"); for (int kd = 0; kd < tp->shape()[1]; kd++) { // Ni fprintf(extfp1, "%f ",*dptr++); if (kd %8 == 7) fprintf(extfp1, "\n"); } fclose(extfp1); } } // if size 2 else { printf("BSS != 4 or 2, i = %d, j = %d, sz = %zd\n",i,j,tp->shape().size()); } } // for j } // print!! // -------------------------------- end of blob extract --------------------- #endif #ifdef WeightExtract // -------------------------------- start of weight extract --------------------- if (i == LayerExtNum || EXT_ALL_LAYERS) { //print!! char fname[50]; FILE *extfp1; printf("#*#* Ext layer type = %s\n", layers_[i]->type()); if (strcmp(layers_[i]->type(),"Convolution") == 0){ printf("It's Convolution Layer!\n"); vector<shared_ptr<Blob<Dtype> > >& lyr_blobs = layers_[i]->blobs(); // Layer printf("layer blob size = %zd\n", lyr_blobs.size()); //const LayerParameter& lyr_param = layers()[i]->layer_param(); // vector shared_ptr Layer printf("lyr_blobs.size() = %zd\n", lyr_blobs.size()); printf("lyr_blobs[0].shape = %s\n", lyr_blobs[0]->shape_string().c_str()); printf("lyr_blobs[1].shape = %s\n", lyr_blobs[1]->shape_string().c_str()); // Blob Blob<Dtype> *wp = lyr_blobs[0].get(); // weight // shared_ptr Blob<Dtype> *bp = lyr_blobs[1].get(); // bias printf("No Ni ky kx = %d %d %d %d\n", wp->shape()[0], wp->shape()[1], wp->shape()[2], wp->shape()[3]); // Blob printf("Nb = %d\n", bp->shape()[0]); int No = wp->shape()[0]; int Ni = wp->shape()[1]; int Ky = wp->shape()[2]; int Kx = wp->shape()[3]; int Nb = bp->shape()[0]; const Dtype *wptr = wp->cpu_data(); const Dtype *bptr = bp->cpu_data(); // save weights first sprintf(fname, "ext/L%02d_%s_Weights.txt",i,layers_[i]->type()); extfp1 = fopen(fname, "w"); fprintf(extfp1, "## Layer 0 Conv Weights (No = %d, Ni = %d, Ky = %d, Kx = %d, Nb = %d\n", No, Ni, Ky, Kx, Nb); for (int n = 0; n < No; n++){ fprintf(extfp1, "## For output map %d ##\n", n); for (int c = 0; c < Ni; c++){ fprintf(extfp1, "## for input map %d ##\n", c); for (int y = 0; y < Ky; y++) { for (int x = 0; x < Kx; x++) { fprintf(extfp1, "%f ", *wptr++); } fprintf(extfp1, "\n"); } fprintf(extfp1, "\n"); } } fprintf(extfp1, "## Bias values for outputs ##\n"); for (int n = 0; n < No; n++){ fprintf(extfp1, "%f ",*bptr++); if (n % 8 == 7) fprintf(extfp1, "\n"); } fclose(extfp1); } } // print!! // -------------------------------- end of weight extract --------------------- #endif } return loss; }

ADD：我已经运行了来自py-faster-rcnn代码的VGG_ILSVRC_16_layers。从数据中提取的层的参数如下。：

layer { name: "conv1_2" type: "Convolution" bottom: "conv1_1" top: "conv1_2" convolution_param { num_output: 64 pad: 1 kernel_size: 3 } }

ADD：主持人删除了我的固定代码，所以我在这里添加了kerne权重的固定代码。

for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { ll = fgets(line, 80, file); // read the line '## For output map ofm_idx' for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { ll = fgets(line, 80, file); // read the line '## for input map ifm_idx' for(kyi = 0; kyi < K; kyi++) { fscanf(file, "%f %f %f\n", &v0, &v1, &v2); // K=3 always weights[ofm_idx][ifm_idx][kyi][0] = v0; weights[ofm_idx][ifm_idx][kyi][1] = v1; weights[ofm_idx][ifm_idx][kyi][2] = v2; } } // read bias values }

I tried writing the convolution function myself to compare the result with caffe result. I used bottom and top blob data and weights extracted while running a demo program. I'm very sure the extracted data are correct. Here is the convolution test code that I wrote but the result is different. In this example, I have 64 input feature maps and 64 output feature maps and use 3x3 kernel. This program reads the bottom data from 64 files, reads weights and bias for 64x64 3x3 kernel and weights and use it to do convolution and saves the top data into 64 files. very simple program.. I would appreciate if any one can notice any mistake here and let me know.

// written by Chan Kim for simple convolution test /* layer 2 type Convolution num_bottoms 1 num_tops 1 layer 2 bottom 0 shape "1 64 600 800 (30720000)" layer 2 top 0 shape = "1 64 600 800 (30720000)" layer 2 kernel_size = 3 bottom size : 4*64*600*800 = 0x7530000 Bytes top size : 4*64*600*800 = 0x7530000 Bytes kernel size : 4*3*3*64*64 = 0x24000 Bytes in word size, bottom : 0x1d4c000 kernel : 0x9000 Words */ #include <stdio.h> #define NUM_IFM 64 #define NUM_OFM 64 #define HEIGHT 600 #define WIDTH 800 #define K 3 // 3x3 kernel float bottom[1][NUM_IFM][HEIGHT][WIDTH]; float top[1][NUM_IFM][HEIGHT][WIDTH]; float weights[NUM_OFM][NUM_IFM][K][K]; float bias[NUM_OFM]; float conv[1][NUM_IFM][HEIGHT][WIDTH]; // result char str[80]; float kern[K][K]; float in_square[K][K]; float sum; #define layer 2 // for test main() { FILE *file; int ifm_idx; int ofm_idx; int orix; int ocix; char c; int r; int rix, cix; char line[80]; char *ll; float v0, v1, v2, v3, v4, v5, v6, v7; int kyi, kxi; int orixm, orixp; int ocixm, ocixp; int kx, ky; int i; // -------------------------------------------------------- // reading blob data from files into blob memory // -------------------------------------------------------- printf("startnig!\n"); for(ifm_idx=0;ifm_idx<NUM_IFM;ifm_idx++) { sprintf(str, "/home/ckim/Neuro/convhw/ext-z840/L%02d_Convolution_B0_FN%03d.txt", layer, ifm_idx); file = fopen(str, "r"); c = fgetc(file); rix = -1; while (c != EOF) { if (c == '#') { ll = fgets(line, 80, file); c = fgetc(file); rix++; cix = 0; } else { r = ungetc(c, file); r = fscanf(file, "%f %f %f %f %f %f %f %f", &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7); bottom[0][ifm_idx][rix][cix++] = v0; bottom[0][ifm_idx][rix][cix++] = v1; bottom[0][ifm_idx][rix][cix++] = v2; bottom[0][ifm_idx][rix][cix++] = v3; bottom[0][ifm_idx][rix][cix++] = v4; bottom[0][ifm_idx][rix][cix++] = v5; bottom[0][ifm_idx][rix][cix++] = v6; bottom[0][ifm_idx][rix][cix++] = v7; c = fgetc(file); } } printf("file %s read.. \n",str); fclose(file); } // -------------------------------------------------------- // reading weights from files into blob memory // -------------------------------------------------------- sprintf(str, "/home/ckim/Neuro/convhw/ext1/L%02d_Convolution_Weights.txt", layer); file = fopen(str, "r"); ll = fgets(line, 80, file); // read the layer config line for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { //for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { ll = fgets(line, 80, file); // read the line '## For output map ofm_idx' for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { //for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { ll = fgets(line, 80, file); // read the line '## for input map ifm_idx' for(kyi = 0; kyi < K; kyi++) { fscanf(file, "%f %f %f", &v0, &v1, &v2); // K=3 always weights[ofm_idx][ifm_idx][kyi][0] = v0; weights[ofm_idx][ifm_idx][kyi][1] = v1; weights[ofm_idx][ifm_idx][kyi][2] = v2; ll = fgets(line, 80, file); // read off remaining line } } // read bias values } for(i = 0, ofm_idx = 0; i < NUM_OFM/8; i++){ ll = fgets(line, 80, file); r = fscanf(file, "%f %f %f %f %f %f %f %f", &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7); bias[ofm_idx++] = v0; bias[ofm_idx++] = v1; bias[ofm_idx++] = v2; bias[ofm_idx++] = v3; bias[ofm_idx++] = v4; bias[ofm_idx++] = v5; bias[ofm_idx++] = v6; bias[ofm_idx++] = v7; } fclose(file); // -------------------------------------------------------- // perform convolution // -------------------------------------------------------- printf("starting convolution..\n"); for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { printf("making OFM %d\n", ofm_idx); for(orix = 0; orix < HEIGHT; orix++) { for(ocix = 0; ocix < WIDTH; ocix++) { sum = 0.; // for single point // for each input maps for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { // get kernel kern[0][0] = weights[ofm_idx][ifm_idx][0][0]; kern[0][1] = weights[ofm_idx][ifm_idx][0][1]; kern[0][2] = weights[ofm_idx][ifm_idx][0][2]; kern[1][0] = weights[ofm_idx][ifm_idx][1][0]; kern[1][1] = weights[ofm_idx][ifm_idx][1][1]; kern[1][2] = weights[ofm_idx][ifm_idx][1][2]; kern[2][0] = weights[ofm_idx][ifm_idx][2][0]; kern[2][1] = weights[ofm_idx][ifm_idx][2][1]; kern[2][2] = weights[ofm_idx][ifm_idx][2][2]; // zero-value padding (used in caffe) in_square[0][0] = (orix == 0 ||ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[0][1] = (orix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[0][2] = (orix == 0 || ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[1][0] = (ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[1][1] = bottom[0][ifm_idx][orix][ocix]; in_square[1][2] = (ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[2][0] = (orix == HEIGHT-1 || ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[2][1] = (orix == HEIGHT-1) ? 0 : bottom[0][ifm_idx][orix][ocix]; in_square[2][2] = (orix == HEIGHT-1 || ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix]; // apply kernel for(ky = 0; ky < K; ky++) { for(kx = 0; kx < K; kx++) { sum += in_square[ky][kx]*kern[ky][kx]; } } //// add bias //sum += bias[ifm_idx]; } // ifm_idx // add bias sum += bias[ofm_idx]; // store result conv[0][ofm_idx][orix][ocix] = sum; } // ocix } // orix printf("OFM %d\n", ofm_idx); sprintf(str, "./result/L%02d_Convolution_T0_FN%03d.txt", layer, ofm_idx); file = fopen(str, "w"); printf("writing convolution result to file %s..\n", str); for (rix = 0; rix < HEIGHT; rix++) { fprintf(file, "### kr = %d ##\n", rix); for (cix = 0; cix < WIDTH; cix++) { fprintf(file, "%f ",conv[0][ofm_idx][rix][cix]); if (cix %8 == 7) fprintf(file, "\n"); } } fclose(file); } printf("Convolution finished\n");

UPDATE : I tried transposing kernel and tried reversing input-output relationship for the extract kernel, all 4 combinations, but it doesn't match. maybe there's something wrong in the way I extracted the values from caffe execution. I'm adding how I extracted bottom, top and weight data from convolution layer in caffe/src/caffe/net.cpp file. They are extracted after the layer processing is done.

//#define LayerShapeExtract //#define BlobExtract //#define WeightExtract #define LayerExtNum 2 #define EXT_ALL_LAYERS 0 int ccc; // for debug template <typename Dtype> Dtype Net<Dtype>::ForwardFromTo(int start, int end) { CHECK_GE(start, 0); CHECK_LT(end, layers_.size()); Dtype loss = 0; LOG(INFO) << "## : net_input_blobs_.size() : " << net_input_blobs_.size() << std::endl; if (debug_info_) { for (int i = 0; i < net_input_blobs_.size(); ++i) { InputDebugInfo(i); } } for (int i = start; i <= end; ++i) { ccc = i; printf("ccc = %d\n", ccc); // LOG(ERROR) << "Forwarding " << layer_names_[i]; Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]); // Layer::Forward loss += layer_loss; if (debug_info_) { ForwardDebugInfo(i); } #ifdef LayerShapeExtract // -------------------------------- start of shape extract --------------------- FILE *lsfp; lsfp = fopen("ext/layers.txt","w"); fprintf(lsfp, "####### Layer Shape ########\n"); fprintf(lsfp, "start = %d, end = %d\n", start,end); for (int li = start; li <= end; ++li) { fprintf(lsfp, "layer %d type %s num_bottoms %zu num_tops %zu\n", li, layers_[li]->type(), bottom_vecs_[li].size(), top_vecs_[li].size()); for (int bi = 0; bi < bottom_vecs_[li].size(); bi++) { fprintf(lsfp, "layer %d bottom %d shape \"%s\"\n", li, bi, bottom_vecs_[li][bi]->shape_string().c_str()); } for (int ti = 0; ti < top_vecs_[li].size(); ti++) { fprintf(lsfp, "layer %d top %d shape = \"%s\"\n", li, ti, top_vecs_[li][ti]->shape_string().c_str()); } if (strcmp(layers_[li]->type(),"Convolution") == 0) { fprintf(lsfp, "layer %d kernel_size = %d\n", li, layers_[li]->layer_param().convolution_param().kernel_size()); } } fclose(lsfp); //printf("##Net::ForwardFromTo ==> ####### End of Layer Shape ########\n"); // -------------------------------- end of shape extract --------------------- #endif #ifdef BlobExtract // -------------------------------- start of blob extract --------------------- if (i == LayerExtNum || EXT_ALL_LAYERS) { //print!! char fname[50]; FILE *extfp1; for (int j = 0; j < bottom_vecs_[i].size(); j++) { Blob<Dtype>* bp = bottom_vecs_[i][j]; const Dtype *dptr = bp->cpu_data(); if (bp->shape().size() == 4) { for (int kn = 0; kn < bp->shape()[0]; kn++) { // Blob for (int kd = 0; kd < bp->shape()[1]; kd++) { // Ni sprintf(fname, "ext/L%02d_%s_B%d_FN%03d.txt", i,layers_[i]->type(),j,kd); extfp1 = fopen(fname,"w"); for (int kr = 0; kr < bp->shape()[2]; kr++) { // Ny fprintf(extfp1, "### kr = %d ##\n", kr); for (int kc = 0; kc < bp->shape()[3]; kc++) { // Nx fprintf(extfp1, "%f ",*dptr++); if (kc %8 == 7 || kc == bp->shape()[3]-1) fprintf(extfp1, "\n"); } } fclose(extfp1); } } } // if size 4 else if (bp->shape().size() == 2) { for (int kn = 0; kn < bp->shape()[0]; kn++) { // Blob sprintf(fname, "ext/L%02d_%s_B%d_FN%03d.txt", i,layers_[i]->type(),j,kn); extfp1 = fopen(fname,"w"); for (int kd = 0; kd < bp->shape()[1]; kd++) { // Ni fprintf(extfp1, "%f ",*dptr++); if (kd %8 == 7 || kd == bp->shape()[1]-1) fprintf(extfp1, "\n"); } fclose(extfp1); } } // if size 2 else { printf("BSS != 4 or 2, i = %d, j = %d, sz = %zd\n",i,j,bp->shape().size()); } } // for j for (int j = 0; j < top_vecs_[i].size(); j++) { Blob<Dtype>* tp = top_vecs_[i][j]; const Dtype *dptr = tp->cpu_data(); if (tp->shape().size() == 4) { // Blob for (int kn = 0; kn < tp->shape()[0]; kn++) { // Blob for (int kd = 0; kd < tp->shape()[1]; kd++) { // Ni sprintf(fname, "ext/L%02d_%s_T%d_FN%03d.txt", i,layers_[i]->type(),j,kd); extfp1 = fopen(fname,"w"); for (int kr = 0; kr < tp->shape()[2]; kr++) { // Ny fprintf(extfp1, "### kr = %d ##\n", kr); for (int kc = 0; kc < tp->shape()[3]; kc++) { // Nx fprintf(extfp1, "%f ",*dptr++); if (kc %8 == 7 || kc == tp->shape()[3]-1) fprintf(extfp1, "\n"); } } fclose(extfp1); } } } // if size 4 else if (tp->shape().size() == 2) { for (int kn = 0; kn < tp->shape()[0]; kn++) { // Blob sprintf(fname, "ext/L%02d_%s_T%d_FN%03d.txt", i,layers_[i]->type(),j,kn); extfp1 = fopen(fname,"w"); for (int kd = 0; kd < tp->shape()[1]; kd++) { // Ni fprintf(extfp1, "%f ",*dptr++); if (kd %8 == 7) fprintf(extfp1, "\n"); } fclose(extfp1); } } // if size 2 else { printf("BSS != 4 or 2, i = %d, j = %d, sz = %zd\n",i,j,tp->shape().size()); } } // for j } // print!! // -------------------------------- end of blob extract --------------------- #endif #ifdef WeightExtract // -------------------------------- start of weight extract --------------------- if (i == LayerExtNum || EXT_ALL_LAYERS) { //print!! char fname[50]; FILE *extfp1; printf("#*#* Ext layer type = %s\n", layers_[i]->type()); if (strcmp(layers_[i]->type(),"Convolution") == 0){ printf("It's Convolution Layer!\n"); vector<shared_ptr<Blob<Dtype> > >& lyr_blobs = layers_[i]->blobs(); // Layer printf("layer blob size = %zd\n", lyr_blobs.size()); //const LayerParameter& lyr_param = layers()[i]->layer_param(); // vector shared_ptr Layer printf("lyr_blobs.size() = %zd\n", lyr_blobs.size()); printf("lyr_blobs[0].shape = %s\n", lyr_blobs[0]->shape_string().c_str()); printf("lyr_blobs[1].shape = %s\n", lyr_blobs[1]->shape_string().c_str()); // Blob Blob<Dtype> *wp = lyr_blobs[0].get(); // weight // shared_ptr Blob<Dtype> *bp = lyr_blobs[1].get(); // bias printf("No Ni ky kx = %d %d %d %d\n", wp->shape()[0], wp->shape()[1], wp->shape()[2], wp->shape()[3]); // Blob printf("Nb = %d\n", bp->shape()[0]); int No = wp->shape()[0]; int Ni = wp->shape()[1]; int Ky = wp->shape()[2]; int Kx = wp->shape()[3]; int Nb = bp->shape()[0]; const Dtype *wptr = wp->cpu_data(); const Dtype *bptr = bp->cpu_data(); // save weights first sprintf(fname, "ext/L%02d_%s_Weights.txt",i,layers_[i]->type()); extfp1 = fopen(fname, "w"); fprintf(extfp1, "## Layer 0 Conv Weights (No = %d, Ni = %d, Ky = %d, Kx = %d, Nb = %d\n", No, Ni, Ky, Kx, Nb); for (int n = 0; n < No; n++){ fprintf(extfp1, "## For output map %d ##\n", n); for (int c = 0; c < Ni; c++){ fprintf(extfp1, "## for input map %d ##\n", c); for (int y = 0; y < Ky; y++) { for (int x = 0; x < Kx; x++) { fprintf(extfp1, "%f ", *wptr++); } fprintf(extfp1, "\n"); } fprintf(extfp1, "\n"); } } fprintf(extfp1, "## Bias values for outputs ##\n"); for (int n = 0; n < No; n++){ fprintf(extfp1, "%f ",*bptr++); if (n % 8 == 7) fprintf(extfp1, "\n"); } fclose(extfp1); } } // print!! // -------------------------------- end of weight extract --------------------- #endif } return loss; }

ADD : I've run for VGG_ILSVRC_16_layers which came from py-faster-rcnn code. The parameter of the layer I extractedd the data from is as follows. :

layer { name: "conv1_2" type: "Convolution" bottom: "conv1_1" top: "conv1_2" convolution_param { num_output: 64 pad: 1 kernel_size: 3 } }

ADD : A moderator deleted my fixed code, so I add the fixed code in kerne weight read here.

for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { ll = fgets(line, 80, file); // read the line '## For output map ofm_idx' for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { ll = fgets(line, 80, file); // read the line '## for input map ifm_idx' for(kyi = 0; kyi < K; kyi++) { fscanf(file, "%f %f %f\n", &v0, &v1, &v2); // K=3 always weights[ofm_idx][ifm_idx][kyi][0] = v0; weights[ofm_idx][ifm_idx][kyi][1] = v1; weights[ofm_idx][ifm_idx][kyi][2] = v2; } } // read bias values }

最满意答案

我认为这个错误在于执行卷积时获取底部数据（输入），正确的代码应该是：

//bottom data index for convolution int src_r, src_c; //pads and strides along row and column int row_pad = 1, col_pad = 1, row_stride = 1, col_stride = 1; for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { for(orix = 0; orix < HEIGHT; orix++) { for(ocix = 0; ocix < WIDTH; ocix++) { sum = 0.; for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { ... // zero-value padding (used in caffe) //The right indexing for bottom data, //your original code is wrong here src_r= orix * row_stride - row_pad; src_c= ocix * col_stride - col_pad; in_square[0][0] = (src_r < 0 || src_c < 0 || src_r >= HEIGHT || src_c >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r][src_c]; in_square[0][1] = (src_r < 0 || src_c + 1 < 0 || src_r >= HEIGHT || src_c + 1 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r][src_c + 1]; in_square[0][2] = (src_r < 0 || src_c + 2 < 0 || src_r >= HEIGHT || src_c + 2 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r][src_c + 2]; in_square[1][0] = (src_r + 1 < 0 || src_c < 0 || src_r + 1 >= HEIGHT || src_c >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 1][src_c]; in_square[1][1] = (src_r + 1 < 0 || src_c + 1 < 0 || src_r + 1 >= HEIGHT || src_c + 1 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 1][src_c + 1]; in_square[1][2] = (src_r + 1 < 0 || src_c + 2 < 0 || src_r + 1 >= HEIGHT || src_c + 2 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 1][src_c + 2]; in_square[2][0] = (src_r + 2 < 0 || src_c < 0 || src_r + 2 >= HEIGHT || src_c >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 2][src_c]; in_square[2][1] = (src_r + 2 < 0 || src_c + 1 < 0 || src_r + 2 >= HEIGHT || src_c + 1 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 2][src_c + 1]; in_square[2][2] = (src_r + 2 < 0 || src_c + 2 < 0 || src_r + 2 >= HEIGHT || src_c + 2 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 2][src_c + 2]; //apply kernel ... } } } }

I think the bug lies in fetching bottom data(input) when performing convolution, and the right code should be:

//bottom data index for convolution int src_r, src_c; //pads and strides along row and column int row_pad = 1, col_pad = 1, row_stride = 1, col_stride = 1; for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) { for(orix = 0; orix < HEIGHT; orix++) { for(ocix = 0; ocix < WIDTH; ocix++) { sum = 0.; for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) { ... // zero-value padding (used in caffe) //The right indexing for bottom data, //your original code is wrong here src_r= orix * row_stride - row_pad; src_c= ocix * col_stride - col_pad; in_square[0][0] = (src_r < 0 || src_c < 0 || src_r >= HEIGHT || src_c >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r][src_c]; in_square[0][1] = (src_r < 0 || src_c + 1 < 0 || src_r >= HEIGHT || src_c + 1 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r][src_c + 1]; in_square[0][2] = (src_r < 0 || src_c + 2 < 0 || src_r >= HEIGHT || src_c + 2 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r][src_c + 2]; in_square[1][0] = (src_r + 1 < 0 || src_c < 0 || src_r + 1 >= HEIGHT || src_c >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 1][src_c]; in_square[1][1] = (src_r + 1 < 0 || src_c + 1 < 0 || src_r + 1 >= HEIGHT || src_c + 1 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 1][src_c + 1]; in_square[1][2] = (src_r + 1 < 0 || src_c + 2 < 0 || src_r + 1 >= HEIGHT || src_c + 2 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 1][src_c + 2]; in_square[2][0] = (src_r + 2 < 0 || src_c < 0 || src_r + 2 >= HEIGHT || src_c >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 2][src_c]; in_square[2][1] = (src_r + 2 < 0 || src_c + 1 < 0 || src_r + 2 >= HEIGHT || src_c + 1 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 2][src_c + 1]; in_square[2][2] = (src_r + 2 < 0 || src_c + 2 < 0 || src_r + 2 >= HEIGHT || src_c + 2 >= WIDTH) ? 0 : bottom[0][ifm_idx][src_r + 2][src_c + 2]; //apply kernel ... } } } }

更多推荐

卷积结果不同于caffe(convolution result different from that of caffe)

最满意答案

发布评论取消回复

最近发表

热门文章

标签列表