Monday, April 18, 2011

4

Detection Kernel.

   1: __global__ void Compute_win(float*His_Img,float*Fea_vector)
   2: {
   3:     //Notice constant number here will change as window size changes
   4:     __shared__ float cache_his[105][36];    
   5:  
   6:     //Thread index Index should be less  
   7:     unsigned int thread_index =  threadIdx.x + __umul24(threadIdx.y,Win_Attr.win_width);
   8:     //Block Index 
   9:     unsigned int block_index = blockIdx.x + __umul24(blockIdx.y,gridDim.x);
  10:  
  11:     unsigned int tid_x =  threadIdx.x + blockIdx.x;
  12:     unsigned int tid_y =  threadIdx.y + blockIdx.y;
  13:  
  14:  
  15:     if(tid_x + 1 < Img_Attr.Image_width&&tid_y  + 1< Img_Attr.Image_height&&threadIdx.x < Win_Attr.win_width&&threadIdx.y <Win_Attr.win_height)
  16:     {
  17:         unsigned int index_0 = tid_x  + tid_y*Img_Attr.Image_width;
  18:         unsigned int index_1 = (tid_x + 1) + tid_y*Img_Attr.Image_width;
  19:         unsigned int index_2 = (tid_x) + (tid_y + 1)*Img_Attr.Image_width;
  20:         unsigned int index_3 = (tid_x + 1) + (tid_y + 1)*Img_Attr.Image_width;
  21:         
  22:         float norm_2 = 0;
  23:         unsigned int j = 0;
  24:         
  25:         for(int Bin_id = 0; Bin_id < K ;Bin_id++)
  26:         {
  27:             cache_his[thread_index][j++] = His_Img[index_0 + Bin_id*Img_Attr.Image_size];
  28:             cache_his[thread_index][j++] = His_Img[index_1 + Bin_id*Img_Attr.Image_size];
  29:             cache_his[thread_index][j++] = His_Img[index_2 + Bin_id*Img_Attr.Image_size];
  30:             cache_his[thread_index][j++] = His_Img[index_3 + Bin_id*Img_Attr.Image_size];
  31:         }
  32:         for(int i = 0; i <  K*BLOCK_SIZE*BLOCK_SIZE; i++)
  33:             norm_2 += cache_his[thread_index][i]*cache_his[thread_index][i];
  34:         norm_2 = sqrtf(norm_2);
  35:         
  36:         unsigned int index = block_index*Win_Attr.win_width*Win_Attr.win_height*K*BLOCK_SIZE*BLOCK_SIZE+ thread_index*K*BLOCK_SIZE*BLOCK_SIZE;
  37:         for(int i = 0; i <  K*BLOCK_SIZE*BLOCK_SIZE; i++)
  38:         {
  39:             
  40:             //cache_his[thread_index][i] = cache_his[thread_index][j]/norm_2;            
  41:             if(norm_2 >= 0.001f)
  42:                 Fea_vector[index] = cache_his[thread_index][i]/norm_2;
  43:             else 
  44:                 Fea_vector[index] = 0.0f;
  45:  
  46:             index ++;
  47:         }
  48:  
  49:     }
  50:  
  51:  
  52: }

SVM integration seems to be harder than I expected. I have to write extra code to generate data to fit the requirements of libsvm .


HOG feature looks good, however I can not verify with complete confidence, though tested with some artificial examples.


Capture

No comments:

Post a Comment