Detection Kernel.
1: __global__ void Compute_win(float*His_Img,float*Fea_vector)
2: {
3: //Notice constant number here will change as window size changes
4: __shared__ float cache_his[105][36];
5:
6: //Thread index Index should be less
7: unsigned int thread_index = threadIdx.x + __umul24(threadIdx.y,Win_Attr.win_width);
8: //Block Index
9: unsigned int block_index = blockIdx.x + __umul24(blockIdx.y,gridDim.x);
10:
11: unsigned int tid_x = threadIdx.x + blockIdx.x;
12: unsigned int tid_y = threadIdx.y + blockIdx.y;
13:
14:
15: if(tid_x + 1 < Img_Attr.Image_width&&tid_y + 1< Img_Attr.Image_height&&threadIdx.x < Win_Attr.win_width&&threadIdx.y <Win_Attr.win_height)
16: {
17: unsigned int index_0 = tid_x + tid_y*Img_Attr.Image_width;
18: unsigned int index_1 = (tid_x + 1) + tid_y*Img_Attr.Image_width;
19: unsigned int index_2 = (tid_x) + (tid_y + 1)*Img_Attr.Image_width;
20: unsigned int index_3 = (tid_x + 1) + (tid_y + 1)*Img_Attr.Image_width;
21:
22: float norm_2 = 0;
23: unsigned int j = 0;
24:
25: for(int Bin_id = 0; Bin_id < K ;Bin_id++)
26: {
27: cache_his[thread_index][j++] = His_Img[index_0 + Bin_id*Img_Attr.Image_size];
28: cache_his[thread_index][j++] = His_Img[index_1 + Bin_id*Img_Attr.Image_size];
29: cache_his[thread_index][j++] = His_Img[index_2 + Bin_id*Img_Attr.Image_size];
30: cache_his[thread_index][j++] = His_Img[index_3 + Bin_id*Img_Attr.Image_size];
31: }
32: for(int i = 0; i < K*BLOCK_SIZE*BLOCK_SIZE; i++)
33: norm_2 += cache_his[thread_index][i]*cache_his[thread_index][i];
34: norm_2 = sqrtf(norm_2);
35:
36: unsigned int index = block_index*Win_Attr.win_width*Win_Attr.win_height*K*BLOCK_SIZE*BLOCK_SIZE+ thread_index*K*BLOCK_SIZE*BLOCK_SIZE;
37: for(int i = 0; i < K*BLOCK_SIZE*BLOCK_SIZE; i++)
38: {
39:
40: //cache_his[thread_index][i] = cache_his[thread_index][j]/norm_2;
41: if(norm_2 >= 0.001f)
42: Fea_vector[index] = cache_his[thread_index][i]/norm_2;
43: else
44: Fea_vector[index] = 0.0f;
45:
46: index ++;
47: }
48:
49: }
50:
51:
52: }
SVM integration seems to be harder than I expected. I have to write extra code to generate data to fit the requirements of libsvm .
HOG feature looks good, however I can not verify with complete confidence, though tested with some artificial examples.
No comments:
Post a Comment