Detection Kernel.
1: __global__ void Compute_win(float*His_Img,float*Fea_vector)
2: {3: //Notice constant number here will change as window size changes
4: __shared__ float cache_his[105][36];
5: 6: //Thread index Index should be less
7: unsigned int thread_index = threadIdx.x + __umul24(threadIdx.y,Win_Attr.win_width);
8: //Block Index
9: unsigned int block_index = blockIdx.x + __umul24(blockIdx.y,gridDim.x);
10: 11: unsigned int tid_x = threadIdx.x + blockIdx.x;
12: unsigned int tid_y = threadIdx.y + blockIdx.y;
13: 14: 15: if(tid_x + 1 < Img_Attr.Image_width&&tid_y + 1< Img_Attr.Image_height&&threadIdx.x < Win_Attr.win_width&&threadIdx.y <Win_Attr.win_height)
16: {17: unsigned int index_0 = tid_x + tid_y*Img_Attr.Image_width;
18: unsigned int index_1 = (tid_x + 1) + tid_y*Img_Attr.Image_width;
19: unsigned int index_2 = (tid_x) + (tid_y + 1)*Img_Attr.Image_width;
20: unsigned int index_3 = (tid_x + 1) + (tid_y + 1)*Img_Attr.Image_width;
21: 22: float norm_2 = 0;
23: unsigned int j = 0;
24: 25: for(int Bin_id = 0; Bin_id < K ;Bin_id++)
26: { 27: cache_his[thread_index][j++] = His_Img[index_0 + Bin_id*Img_Attr.Image_size]; 28: cache_his[thread_index][j++] = His_Img[index_1 + Bin_id*Img_Attr.Image_size]; 29: cache_his[thread_index][j++] = His_Img[index_2 + Bin_id*Img_Attr.Image_size]; 30: cache_his[thread_index][j++] = His_Img[index_3 + Bin_id*Img_Attr.Image_size]; 31: }32: for(int i = 0; i < K*BLOCK_SIZE*BLOCK_SIZE; i++)
33: norm_2 += cache_his[thread_index][i]*cache_his[thread_index][i]; 34: norm_2 = sqrtf(norm_2); 35: 36: unsigned int index = block_index*Win_Attr.win_width*Win_Attr.win_height*K*BLOCK_SIZE*BLOCK_SIZE+ thread_index*K*BLOCK_SIZE*BLOCK_SIZE;
37: for(int i = 0; i < K*BLOCK_SIZE*BLOCK_SIZE; i++)
38: { 39: 40: //cache_his[thread_index][i] = cache_his[thread_index][j]/norm_2;
41: if(norm_2 >= 0.001f)
42: Fea_vector[index] = cache_his[thread_index][i]/norm_2;43: else
44: Fea_vector[index] = 0.0f; 45: 46: index ++; 47: } 48: 49: } 50: 51: 52: }SVM integration seems to be harder than I expected. I have to write extra code to generate data to fit the requirements of libsvm .
HOG feature looks good, however I can not verify with complete confidence, though tested with some artificial examples.
No comments:
Post a Comment