/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 891 - 1134
--------------------------------------------------------------------------------

891:       #pragma omp parallel private(i,my_thread_num,num_threads,thread_start,thread_stop,cnt_nz,cnt_nz_offd,i1,j,j1,j_start,j_end,k1,k,P_marker,P_marker_offd)
[...]
900:           my_thread_num = hypre_GetThreadNum();
901:           num_threads = hypre_NumActiveThreads();
902:           thread_start = (pass_length/num_threads)*my_thread_num;
903:           if (my_thread_num == num_threads-1)
904:           {  thread_stop = pass_length; }
905:           else
906:           {  thread_stop = (pass_length/num_threads)*(my_thread_num+1); }
907:           thread_start += pass_pointer[pass];
908:           thread_stop += pass_pointer[pass];
909: 
910:           /* Local initializations */
911:           cnt_nz = 0;
912:           cnt_nz_offd = 0;
913:        
914:           /* This block of code is to go to the top of the parallel region starting before 
915:            * the loop over num_passes. */
916:           P_marker = hypre_CTAlloc(HYPRE_Int, n_coarse); /* marks points to see if they're counted */
917:           for (i=0; i < n_coarse; i++)
918:           {   P_marker[i] = -1; }
919:           if (new_num_cols_offd == local_index+1) 
920:           {
921:              P_marker_offd = hypre_CTAlloc(HYPRE_Int,new_num_cols_offd);
922:              for (i=0; i < new_num_cols_offd; i++)
923:              {   P_marker_offd[i] = -1; }
924:           }
925:           else if (n_coarse_offd) 
926:           {
927:              P_marker_offd = hypre_CTAlloc(HYPRE_Int, n_coarse_offd);
[...]
939:           for (i=thread_start; i < thread_stop; i++)
940:           {
941:              i1 = pass_array[i];
942:              P_diag_start[i1] = cnt_nz;
943:              P_offd_start[i1] = cnt_nz_offd;
944:              for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
945:              {
946:                 j1 = S_diag_j[j];
947:                 if (assigned[j1] == pass-1)
948:                 {
949:                    j_start = P_diag_start[j1];
950:                    j_end = j_start+P_diag_i[j1+1];
951:                    for (k=j_start; k < j_end; k++)
952:                    {
953:                       k1 = P_diag_pass[pass-1][k];
954:                       if (P_marker[k1] != i1)
955:                       {
956:                          cnt_nz++;
957:                          P_diag_i[i1+1]++;
958:                          P_marker[k1] = i1;
959:                       }
960:                    }
961:                    j_start = P_offd_start[j1];
962:                    j_end = j_start+P_offd_i[j1+1];
963:                    for (k=j_start; k < j_end; k++)
964:                    {
965:                       k1 = P_offd_pass[pass-1][k];
966:                       if (P_marker_offd[k1] != i1)
967:                       {
968:                          cnt_nz_offd++;
969:                          P_offd_i[i1+1]++;
970:                          P_marker_offd[k1] = i1;
[...]
976:              for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
977:              {
978:                 j1 = S_offd_j[j];
979:                 if (assigned_offd[j1] == pass-1)
980:                 {
981:                    j_start = Pext_start[j1];
982:                    j_end = j_start+Pext_i[j1+1];
983:                    for (k=j_start; k < j_end; k++)
984:                    {
985:                       k1 = Pext_pass[pass][k];
986:                       if (k1 < 0)
987:                       {
988:                          if (P_marker[-k1-1] != i1)
989:                          {
990:                             cnt_nz++;
991:                             P_diag_i[i1+1]++;
992:                             P_marker[-k1-1] = i1;
993:                          }
994:                       }
995:                       else if (P_marker_offd[k1] != i1)
996:                       {
997:                          cnt_nz_offd++;
998:                          P_offd_i[i1+1]++;
999:                          P_marker_offd[k1] = i1;
[...]
1008:           if(my_thread_num == 0)
1009:           {   max_num_threads[0] = num_threads; }
1010:           cnt_nz_offd_per_thread[my_thread_num] = cnt_nz_offd;
1011:           cnt_nz_per_thread[my_thread_num] = cnt_nz;
1012: #ifdef HYPRE_USING_OPENMP
1013: #pragma omp barrier
1014: #endif
1015:           if(my_thread_num == 0)
1016:           {
1017:               for(i = 1; i < max_num_threads[0]; i++)
1018:               {
1019:                   cnt_nz_offd_per_thread[i] += cnt_nz_offd_per_thread[i-1];
1020:                   cnt_nz_per_thread[i] += cnt_nz_per_thread[i-1];
1021:               }
1022:           }
1023: #ifdef HYPRE_USING_OPENMP
1024: #pragma omp barrier
1025: #endif
1026:           if(my_thread_num > 0)
1027:           {
1028:               /* update this thread's section of P_diag_start and P_offd_start 
1029:                * with the num of nz's counted by previous threads */
1030:               for (i=thread_start; i < thread_stop; i++)
1031:                {
1032:                    i1 = pass_array[i];
1033:                    P_diag_start[i1] += cnt_nz_per_thread[my_thread_num-1];
1034:                    P_offd_start[i1] += cnt_nz_offd_per_thread[my_thread_num-1];
[...]
1040:               cnt_nz = cnt_nz_per_thread[max_num_threads[0]-1];
1041:               cnt_nz_offd = cnt_nz_offd_per_thread[max_num_threads[0]-1];
1042:               
1043:               /* Updated total nz count */
1044:               total_nz += cnt_nz;
1045:               total_nz_offd += cnt_nz_offd;
1046:               
1047:               /* Allocate P_diag_pass and P_offd_pass for all threads */
1048:               P_diag_pass[pass] = hypre_CTAlloc(HYPRE_Int, cnt_nz);
1049:               if (cnt_nz_offd)
1050:                  P_offd_pass[pass] = hypre_CTAlloc(HYPRE_Int, cnt_nz_offd);
1051:               else if (num_procs > 1)
1052:                  P_offd_pass[pass] = NULL;
1053:           }
1054: #ifdef HYPRE_USING_OPENMP
1055: #pragma omp barrier
1056: #endif
1057: 
1058:           /* offset cnt_nz and cnt_nz_offd to point to the starting
1059:            * point in P_diag_pass and P_offd_pass for each thread */
1060:           if(my_thread_num > 0)
1061:           {
1062:               cnt_nz = cnt_nz_per_thread[my_thread_num-1];
1063:               cnt_nz_offd = cnt_nz_offd_per_thread[my_thread_num-1];
1064:           }
1065:           else
1066:           {
1067:               cnt_nz = 0;
1068:               cnt_nz_offd = 0;
1069:           }
1070: 
1071:           /* Set P_diag_pass and P_offd_pass */
1072:           for (i=thread_start; i < thread_stop; i++)
1073:           {
1074:              i1 = pass_array[i];
1075:              for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
1076:              {
1077:                 j1 = S_diag_j[j];
1078:                 if (assigned[j1] == pass-1)
1079:                 {
1080:                    j_start = P_diag_start[j1];
1081:                    j_end = j_start+P_diag_i[j1+1];
1082:                    for (k=j_start; k < j_end; k++)
1083:                    {
1084:                       k1 = P_diag_pass[pass-1][k];
1085:                       if (P_marker[k1] != -i1-1)
1086:                       {
1087:                           P_diag_pass[pass][cnt_nz++] = k1;
1088:                           P_marker[k1] = -i1-1;
1089:                       }
1090:                    }
1091:                    j_start = P_offd_start[j1];
1092:                    j_end = j_start+P_offd_i[j1+1];
1093:                    for (k=j_start; k < j_end; k++)
1094:                    {
1095:                       k1 = P_offd_pass[pass-1][k];
1096:                       if (P_marker_offd[k1] != -i1-1)
1097:                       {
1098:                          P_offd_pass[pass][cnt_nz_offd++] = k1;
1099:                          P_marker_offd[k1] = -i1-1;
1100:                       }
1101:                    }
1102:                 }
1103:              }
1104:              for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
1105:              {
1106:                 j1 = S_offd_j[j];
1107:                 if (assigned_offd[j1] == pass-1)
1108:                 {
1109:                    j_start = Pext_start[j1];
1110:                    j_end = j_start+Pext_i[j1+1];
1111:                    for (k=j_start; k < j_end; k++)
1112:                    {
1113:                       k1 = Pext_pass[pass][k];
1114:                       if (k1 < 0)
1115:                       {
1116:                          if (P_marker[-k1-1] != -i1-1)
1117:                          {
1118:                             P_diag_pass[pass][cnt_nz++] = -k1-1;
1119:                             P_marker[-k1-1] = -i1-1;
1120:                          }
1121:                       }
1122:                       else if (P_marker_offd[k1] != -i1-1)
1123:                       {
1124:                          P_offd_pass[pass][cnt_nz_offd++] = k1;
1125:                          P_marker_offd[k1] = -i1-1;
[...]
1132:           hypre_TFree(P_marker);
1133:           if ( (n_coarse_offd) || (new_num_cols_offd  == local_index+1) )
1134:           {    hypre_TFree(P_marker_offd); }
