How to synchronize cuda threads when they are in the same loop and we need to synchronize them to ex
Posted
by Vickey
on Stack Overflow
See other posts from Stack Overflow
or by Vickey
Published on 2010-05-12T07:03:39Z
Indexed on
2010/05/12
7:44 UTC
Read the original article
Hit count: 264
Hi all, I have written a code and Now I want to implement this on cuda GPU but I'm new to synchronization so please help me with this, It's little urgent to me. Below I'm presenting the code and I want to that LOOP1 to be executed by all threads (heance I want to this portion to take advantage of cuda and the remaining portion (the portion other from the LOOP1) is to be executed by only a single thread.
do{
point_set = master_Q[(*num_mas) - 1].q;
List* temp = point_set;
List* pa = point_set;
if(master_Q[num_mas[0] - 1].max)
max_level = (int) (ceilf(il2 * log(master_Q[num_mas[0] - 1].max)));
*num_mas = (*num_mas) - 1;
while(point_set){
List* insert_ele = temp;
while(temp){
insert_ele = temp;
if((insert_ele->dist[insert_ele->dist_index-1] <= pow(2, max_level-1)) || (top_level == max_level)){
if(point_set == temp){
point_set = temp->next;
pa = temp->next;
}
else{
pa->next = temp->next;
}
temp = NULL;
List* new_point_set = point_set;
float maximum_dist = 0;
if(parent->p_index != insert_ele->point_index){
List* tmp = new_point_set;
float *b = &(data[(insert_ele->point_index)*point_len]);
**LOOP 1:** while(tmp){
float *c = &(data[(tmp->point_index)*point_len]);
float sum = 0.;
for(int j = 0; j < point_len; j+=2){
float d1 = b[j] - c[j];
float d2 = b[j+1] - c[j+1];
d1 *= d1;
d2 *= d2;
sum = sum + d1 + d2;
}
tmp->dist[tmp->dist_index] = sqrt(sum);
if(maximum_dist < tmp->dist[tmp->dist_index])
maximum_dist = tmp->dist[tmp->dist_index];
tmp->dist_index = tmp->dist_index+1;
tmp = tmp->next;
}
max_distance = maximum_dist;
}
while(new_point_set || insert_ele){
List* far, *par, *tmp, *tmp_new;
far = NULL;
tmp = new_point_set;
tmp_new = NULL;
float level_dist = pow(2, max_level-1);
float maxdist = 0, maxp = 0;
while(tmp){
if(tmp->dist[(tmp->dist_index)-1] > level_dist){
if(maxdist < tmp->dist[tmp->dist_index-1])
maxdist = tmp->dist[tmp->dist_index-1];
if(tmp == new_point_set){
new_point_set = tmp->next;
par = tmp->next;
}
else{
par->next = tmp->next;
}
if(far == NULL){
far = tmp;
tmp_new = far;
}
else{
tmp_new->next = tmp;
tmp_new = tmp;
}
if(parent->p_index != insert_ele->point_index)
tmp->dist_index = tmp->dist_index - 1;
tmp = tmp->next;
tmp_new->next = NULL;
}
else{
par = tmp;
if(maxp < tmp->dist[(tmp->dist_index)-1])
maxp = tmp->dist[(tmp->dist_index)-1];
tmp = tmp->next;
}
}
if(0 == maxp){
tmp = new_point_set;
aloc_mem[*tree_index].p_index = insert_ele->point_index;
aloc_mem[*tree_index].no_child = 0;
aloc_mem[*tree_index].level = max_level--;
parent->children_index[parent->no_child++] = *tree_index;
parent = &(aloc_mem[*tree_index]);
tree_index[0] = tree_index[0]+1;
while(tmp){
aloc_mem[*tree_index].p_index = tmp->point_index;
aloc_mem[(*tree_index)].no_child = 0;
aloc_mem[(*tree_index)].level = master_Q[(*cur_count_Q)-1].level;
parent->children_index[parent->no_child] = *tree_index;
parent->no_child = parent->no_child + 1;
(*tree_index)++;
tmp = tmp->next;
}
cur_count_Q[0] = cur_count_Q[0]-1;
new_point_set = NULL;
}
master_Q[*num_mas].q = far;
master_Q[*num_mas].parent = parent;
master_Q[*num_mas].valid = true;
master_Q[*num_mas].max = maxdist;
master_Q[*num_mas].level = max_level;
num_mas[0] = num_mas[0]+1;
if(0 != maxp){
aloc_mem[*tree_index].p_index = insert_ele->point_index;
aloc_mem[*tree_index].no_child = 0;
aloc_mem[*tree_index].level = max_level;
parent->children_index[parent->no_child++] = *tree_index;
parent = &(aloc_mem[*tree_index]);
tree_index[0] = tree_index[0]+1;
if(maxp){
int new_level = ((int) (ceilf(il2 * log(maxp)))) +1;
if (new_level < (max_level-1))
max_level = new_level;
else
max_level--;
}
else
max_level--;
}
if( 0 == maxp )
insert_ele = NULL;
}
}
else{
if(NULL == temp->next){
master_Q[*num_mas].q = point_set;
master_Q[*num_mas].parent = parent;
master_Q[*num_mas].valid = true;
master_Q[*num_mas].level = max_level;
num_mas[0] = num_mas[0]+1;
}
pa = temp;
temp = temp->next;
}
}
if((*num_mas) > 1){
List *temp2 = master_Q[(*num_mas)-1].q;
while(temp2){
List* temp3 = master_Q[(*num_mas)-2].q;
master_Q[(*num_mas)-2].q = temp2;
if((master_Q[(*num_mas)-1].parent)->p_index != (master_Q[(*num_mas)-2].parent)->p_index){
temp2->dist_index = temp2->dist_index - 1;
}
temp2 = temp2->next;
master_Q[(*num_mas)-2].q->next = temp3;
}
num_mas[0] = num_mas[0]-1;
}
point_set = master_Q[(*num_mas)-1].q;
temp = point_set;
pa = point_set;
parent = master_Q[(*num_mas)-1].parent;
max_level = master_Q[(*num_mas)-1].level;
if(master_Q[(*num_mas)-1].max)
if( max_level > ((int) (ceilf(il2 * log(master_Q[(*num_mas)-1].max)))) +1)
max_level = ((int) (ceilf(il2 * log(master_Q[(*num_mas)-1].max)))) +1;
num_mas[0] = num_mas[0]-1;
}
}while(*num_mas > 0);
© Stack Overflow or respective owner