Spevis: 4

Detection Kernel.

   1: __global__ void Compute_win(float*His_Img,float*Fea_vector)   2: {   3:     //Notice constant number here will change as window size changes   4:     __shared__ float cache_his[105][36];       5:     6:     //Thread index Index should be less     7:     unsigned int thread_index =  threadIdx.x + __umul24(threadIdx.y,Win_Attr.win_width);   8:     //Block Index    9:     unsigned int block_index = blockIdx.x + __umul24(blockIdx.y,gridDim.x);  10:    11:     unsigned int tid_x =  threadIdx.x + blockIdx.x;  12:     unsigned int tid_y =  threadIdx.y + blockIdx.y;  13:    14:    15:     if(tid_x + 1 < Img_Attr.Image_width&&tid_y  + 1< Img_Attr.Image_height&&threadIdx.x < Win_Attr.win_width&&threadIdx.y <Win_Attr.win_height)  16:     {  17:         unsigned int index_0 = tid_x  + tid_y*Img_Attr.Image_width;  18:         unsigned int index_1 = (tid_x + 1) + tid_y*Img_Attr.Image_width;  19:         unsigned int index_2 = (tid_x) + (tid_y + 1)*Img_Attr.Image_width;  20:         unsigned int index_3 = (tid_x + 1) + (tid_y + 1)*Img_Attr.Image_width;  21:           22:         float norm_2 = 0;  23:         unsigned int j = 0;  24:           25:         for(int Bin_id = 0; Bin_id < K ;Bin_id++)  26:         {  27:             cache_his[thread_index][j++] = His_Img[index_0 + Bin_id*Img_Attr.Image_size];  28:             cache_his[thread_index][j++] = His_Img[index_1 + Bin_id*Img_Attr.Image_size];  29:             cache_his[thread_index][j++] = His_Img[index_2 + Bin_id*Img_Attr.Image_size];  30:             cache_his[thread_index][j++] = His_Img[index_3 + Bin_id*Img_Attr.Image_size];  31:         }  32:         for(int i = 0; i <  K*BLOCK_SIZE*BLOCK_SIZE; i++)  33:             norm_2 += cache_his[thread_index][i]*cache_his[thread_index][i];  34:         norm_2 = sqrtf(norm_2);  35:           36:         unsigned int index = block_index*Win_Attr.win_width*Win_Attr.win_height*K*BLOCK_SIZE*BLOCK_SIZE+ thread_index*K*BLOCK_SIZE*BLOCK_SIZE;  37:         for(int i = 0; i <  K*BLOCK_SIZE*BLOCK_SIZE; i++)  38:         {  39:               40:             //cache_his[thread_index][i] = cache_his[thread_index][j]/norm_2;              41:             if(norm_2 >= 0.001f)  42:                 Fea_vector[index] = cache_his[thread_index][i]/norm_2;  43:             else   44:                 Fea_vector[index] = 0.0f;  45:    46:             index ++;  47:         }  48:    49:     }  50:    51:    52: }

SVM integration seems to be harder than I expected. I have to write extra code to generate data to fit the requirements of libsvm .

HOG feature looks good, however I can not verify with complete confidence, though tested with some artificial examples.

Spevis

Monday, April 18, 2011

4

No comments:

Post a Comment