/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 41 - 2061
--------------------------------------------------------------------------------

41: {
42: #ifdef HYPRE_PROFILE
43:    hypre_profile_times[HYPRE_TIMER_ID_MULTIPASS_INTERP] -= hypre_MPI_Wtime();
44: #endif
45: 
46:    MPI_Comm	           comm = hypre_ParCSRMatrixComm(A); 
47:    hypre_ParCSRCommPkg    *comm_pkg = hypre_ParCSRMatrixCommPkg(S);
48:    hypre_ParCSRCommHandle *comm_handle;
49:    hypre_ParCSRCommPkg    *tmp_comm_pkg;
50: 
51:    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
52:    HYPRE_Real      *A_diag_data = hypre_CSRMatrixData(A_diag);
53:    HYPRE_Int             *A_diag_i = hypre_CSRMatrixI(A_diag);
54:    HYPRE_Int             *A_diag_j = hypre_CSRMatrixJ(A_diag);
55: 
56:    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
57:    HYPRE_Real      *A_offd_data = NULL;
58:    HYPRE_Int             *A_offd_i = hypre_CSRMatrixI(A_offd);
59:    HYPRE_Int             *A_offd_j = NULL;
60:    HYPRE_Int		   *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
61:    HYPRE_Int		    num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
62: 
63:    hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
64:    HYPRE_Int             *S_diag_i = hypre_CSRMatrixI(S_diag);
65:    HYPRE_Int             *S_diag_j = hypre_CSRMatrixJ(S_diag);
66: 
67:    hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
68:    HYPRE_Int             *S_offd_i = hypre_CSRMatrixI(S_offd);
69:    HYPRE_Int             *S_offd_j = NULL;
70:    HYPRE_Int		   *col_map_offd_S = hypre_ParCSRMatrixColMapOffd(S);
71:    HYPRE_Int		    num_cols_offd_S = hypre_CSRMatrixNumCols(S_offd);
[...]
83:    HYPRE_Real      *P_offd_data = NULL;
84:    HYPRE_Int             *P_offd_i; /*at first counter of nonzero cols for each row,
85: 				finally will be pointer to start of row */
86:    HYPRE_Int             *P_offd_j = NULL;
87: 
88:    HYPRE_Int              num_sends = 0;
89:    HYPRE_Int             *int_buf_data = NULL;
90:    HYPRE_Int             *send_map_start;
91:    HYPRE_Int             *send_map_elmt;
92:    HYPRE_Int             *send_procs;
93:    HYPRE_Int              num_recvs = 0;
94:    HYPRE_Int             *recv_vec_start;
95:    HYPRE_Int             *recv_procs;
96:    HYPRE_Int             *new_recv_vec_start = NULL;
97:    HYPRE_Int            **Pext_send_map_start = NULL;
98:    HYPRE_Int            **Pext_recv_vec_start = NULL;
99:    HYPRE_Int             *Pext_start = NULL;
100:    HYPRE_Int             *P_ncols = NULL;
101:    
102:    HYPRE_Int             *CF_marker_offd = NULL;
103:    HYPRE_Int             *dof_func_offd = NULL;
104:    HYPRE_Int             *P_marker;
105:    HYPRE_Int             *P_marker_offd = NULL;
106:    HYPRE_Int             *C_array;
107:    HYPRE_Int             *C_array_offd = NULL;
108:    HYPRE_Int             *pass_array = NULL; /* contains points ordered according to pass */
[...]
114:    HYPRE_Int            **P_offd_pass = NULL;
115:    HYPRE_Int            **Pext_pass = NULL;
116:    HYPRE_Int            **new_elmts = NULL; /* new neighbors generated in each pass */
117:    HYPRE_Int             *new_counter = NULL; /* contains no. of new neighbors for
118: 					each pass */
119:    HYPRE_Int             *loc = NULL; /* contains locations for new neighbor 
120: 			connections in int_o_buffer to avoid searching */
121:    HYPRE_Int             *Pext_i = NULL; /*contains P_diag_i and P_offd_i info for nonzero
122: 				cols of off proc neighbors */
123:    HYPRE_Int             *Pext_send_buffer = NULL; /* used to collect global nonzero
124: 				col ids in P_diag for send_map_elmts */
125: 
126:    HYPRE_Int             *map_S_to_new = NULL;
127:    /*HYPRE_Int             *map_A_to_new = NULL;*/
128:    HYPRE_Int             *map_A_to_S = NULL;
129:    HYPRE_Int             *new_col_map_offd = NULL;
130:    HYPRE_Int             *col_map_offd_P = NULL;
131:    HYPRE_Int             *permute = NULL;
[...]
146:    HYPRE_Int             *fine_to_coarse = NULL;
147:    HYPRE_Int             *fine_to_coarse_offd = NULL;
148: 
149:    HYPRE_Int             *assigned = NULL;
150:    HYPRE_Int             *assigned_offd = NULL;
[...]
185:    HYPRE_Int              local_index = -1;
186:    HYPRE_Int              new_num_cols_offd = 0;
[...]
194:    HYPRE_Int * max_num_threads = hypre_CTAlloc(HYPRE_Int, 1);
[...]
202:    max_num_threads[0] = hypre_NumThreads();
203:    cnt_nz_per_thread = hypre_CTAlloc(HYPRE_Int, max_num_threads[0]);
204:    cnt_nz_offd_per_thread = hypre_CTAlloc(HYPRE_Int, max_num_threads[0]);
205:    for(i=0; i < max_num_threads[0]; i++)
206:    {
207:        cnt_nz_offd_per_thread[i] = 0;
208:        cnt_nz_per_thread[i] = 0;
[...]
216:    hypre_MPI_Comm_size(comm,&num_procs);
217:    hypre_MPI_Comm_rank(comm,&my_id);
218: 
219: #ifdef HYPRE_NO_GLOBAL_PARTITION
220:    my_first_cpt = num_cpts_global[0];
221:    /*   total_global_cpts = 0; */
222:     if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
223:     hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_INT, num_procs-1, comm); 
[...]
229:    if (!comm_pkg)
230:    {
231:       comm_pkg = hypre_ParCSRMatrixCommPkg(A);
232:       if (!comm_pkg)
233:       {
234:           hypre_MatvecCommPkgCreate(A);
235: 
236:           comm_pkg = hypre_ParCSRMatrixCommPkg(A);
237:       }
238:       col_offd_S_to_A = NULL;
239:    }
240: 
241:    if (col_offd_S_to_A)
[...]
252:    if (num_cols_offd_A)
253:    {
254:       A_offd_data = hypre_CSRMatrixData(A_offd);
255:       A_offd_j    = hypre_CSRMatrixJ(A_offd);
256:    }
257: 
258:    if (num_cols_offd)
259:       S_offd_j    = hypre_CSRMatrixJ(S_offd);
260: 
261:    n_fine = hypre_CSRMatrixNumRows(A_diag);
[...]
267:    if (n_fine) fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine);
268: 
269:    n_coarse = 0;
270:    n_SF = 0;
271: #ifdef HYPRE_USING_OPENMP
272: #pragma omp parallel for private(i) reduction(+:n_coarse,n_SF ) HYPRE_SMP_SCHEDULE
[...]
278:    pass_array_size = n_fine-n_coarse-n_SF;
279:    if (pass_array_size) pass_array = hypre_CTAlloc(HYPRE_Int, pass_array_size);
280:    pass_pointer = hypre_CTAlloc(HYPRE_Int, max_num_passes+1);
281:    if (n_fine) assigned = hypre_CTAlloc(HYPRE_Int, n_fine);
282:    P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1);
283:    P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1);
284:    if (n_coarse) C_array = hypre_CTAlloc(HYPRE_Int, n_coarse);
285: 
286:    if (num_cols_offd)
287:    {
288:       CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
289:       if (num_functions > 1) dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
290:    }
291: 
292:    if (num_procs > 1)
293:    {
294:       num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
295:       send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
296:       send_map_start = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
297:       send_map_elmt = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
298:       num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
299:       recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
300:       recv_vec_start = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
301:       if (send_map_start[num_sends])
302:          int_buf_data = hypre_CTAlloc(HYPRE_Int, send_map_start[num_sends]);
303:    }
304: 
305:    
306:    index = 0;
307:    for (i=0; i < num_sends; i++)
308:    {
309:       start = send_map_start[i];
310:       for (j = start; j < send_map_start[i+1]; j++)
311:          int_buf_data[index++] = CF_marker[send_map_elmt[j]];
312:    }
313:    if (num_procs > 1)
314:    {
315:       comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
316: 	 CF_marker_offd);
317:       hypre_ParCSRCommHandleDestroy(comm_handle);
318:    }
319: 
320:    if (num_functions > 1)
321:    {
322:       index = 0;
323:       for (i=0; i < num_sends; i++)
324:       {
325:          start = send_map_start[i];
326:          for (j = start; j < send_map_start[i+1]; j++)
327: 	    int_buf_data[index++] = dof_func[send_map_elmt[j]];
328:       }
329:       if (num_procs > 1)
330:       {
331:          comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
332: 	    dof_func_offd);
333:          hypre_ParCSRCommHandleDestroy(comm_handle);
334:       }
335:    }
336: 
337:    n_coarse_offd = 0;
338:    n_SF_offd = 0;
339: #ifdef HYPRE_USING_OPENMP
340: #pragma omp parallel for private(i) reduction(+:n_coarse_offd,n_SF_offd) HYPRE_SMP_SCHEDULE
[...]
346:    if (num_cols_offd)
347:    {
348:       assigned_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
349:       map_S_to_new = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
350:       fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
351:       new_col_map_offd = hypre_CTAlloc(HYPRE_Int, n_coarse_offd);
[...]
381:    cnt = 0;
382:    p_cnt = pass_array_size-1;
383:    P_diag_i[0] = 0;
384:    P_offd_i[0] = 0;
385:    for (i = 0; i < n_fine; i++)
386:    {
387:       if (CF_marker[i] == 1)
388:       {
389:          fine_to_coarse[i] = cnt; /* this C point is assigned index
390:                                      coarse_counter on coarse grid,
391:                                      and in column of P */
392:          C_array[cnt++] = i;
393:          assigned[i] = 0;
394:          P_diag_i[i+1] = 1; /* one element in row i1 of P */
395:          P_offd_i[i+1] = 0;
396:       }
397:       else if (CF_marker[i] == -1)
398:       {
399:          pass_array[p_cnt--] = i;
400:          P_diag_i[i+1] = 0;
401:          P_offd_i[i+1] = 0;
402:          assigned[i] = -1;
403:          fine_to_coarse[i] = -1;
404:       }
405:       else
406:       {
407:          P_diag_i[i+1] = 0;
408:          P_offd_i[i+1] = 0;
409:          assigned[i] = -1;
410:          fine_to_coarse[i] = -1;
411:       }
412:    }
413: 
414:    index = 0;
415:    for (i=0; i < num_sends; i++)
416:    {
417:       start = send_map_start[i];
418:       for (j = start; j < send_map_start[i+1]; j++)
419:       {
420:          int_buf_data[index] = fine_to_coarse[send_map_elmt[j]];
421:          if (int_buf_data[index] > -1) 
422:             int_buf_data[index] += my_first_cpt;
423:          index++;
424:       }
425:    }
426:    if (num_procs > 1)
427:    {
428:       comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
429:          fine_to_coarse_offd);
430:       hypre_ParCSRCommHandleDestroy(comm_handle);
431:    }
432: 
433:    new_recv_vec_start = hypre_CTAlloc(HYPRE_Int,num_recvs+1);
434: 
435:    if (n_coarse_offd)
436:       C_array_offd = hypre_CTAlloc(HYPRE_Int,n_coarse_offd);
437: 
438:    cnt = 0;
439:    new_recv_vec_start[0] = 0;
440:    for (j = 0; j < num_recvs; j++)
441:    {
442:       for (i = recv_vec_start[j]; i < recv_vec_start[j+1]; i++)
443:       {
444:          if (CF_marker_offd[i] == 1)
445:          {
446:             map_S_to_new[i] = cnt;
447:             C_array_offd[cnt] = i;
448:             new_col_map_offd[cnt++] = fine_to_coarse_offd[i];
449:             assigned_offd[i] = 0; 
450:          }
451:          else
452:          {
453:             assigned_offd[i] = -1;
454:             map_S_to_new[i] = -1;
455:          }
456:       }
457:       new_recv_vec_start[j+1] = cnt;
458:    }
459: 
460:    cnt = 0;
461:    hypre_TFree(fine_to_coarse_offd);
462: 
463:    if (col_offd_S_to_A)
464:    {
465:       map_A_to_S = hypre_CTAlloc(HYPRE_Int,num_cols_offd_A);
466:       for (i=0; i < num_cols_offd_A; i++)
467:       {
468:         if (cnt < num_cols_offd && col_map_offd_A[i] == col_map_offd[cnt])
469:            map_A_to_S[i] = cnt++;
[...]
479:    pass_pointer[0] = 0;
480:    pass_pointer[1] = 0;
481:    total_nz = n_coarse;  /* accumulates total number of nonzeros in P_diag */
482:    total_nz_offd = 0; /* accumulates total number of nonzeros in P_offd */
483: 
484:    cnt = 0;
485:    cnt_offd = 0;
486:    cnt_nz = 0;
487:    cnt_nz_offd = 0;
488:    for (i = pass_array_size-1; i > cnt-1; i--)
489:    {
490:      i1 = pass_array[i];
491:      for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
492:      {
493:         j1 = S_diag_j[j];
494:         if (CF_marker[j1] == 1)
495:         {
496:            P_diag_i[i1+1]++;
497:            cnt_nz++;
498:            assigned[i1] = 1;
499:         }
500:      }
501:      for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
502:      {
503:         j1 = S_offd_j[j];
504:         if (CF_marker_offd[j1] == 1)
505:         {
506:            P_offd_i[i1+1]++;
507:            cnt_nz_offd++;
508:            assigned[i1] = 1;
509:         }
510:      }
511:      if (assigned[i1] == 1)
512:      {
513:         pass_array[i++] = pass_array[cnt];
514:         pass_array[cnt++] = i1;
515:      }
516:    }
517: 
518:    pass_pointer[2] = cnt;
[...]
525:    index = 0;
526:    for (i=0; i < num_sends; i++)
527:    {
528:       start = send_map_start[i];
529:       for (j = start; j < send_map_start[i+1]; j++)
530:       {    int_buf_data[index++] = assigned[send_map_elmt[j]]; }
531:    }
532:    if (num_procs > 1)
533:    {
534:       comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
535:                      assigned_offd);
536:       hypre_ParCSRCommHandleDestroy(comm_handle);
[...]
544:    pass = 2;
545:    local_pass_array_size = pass_array_size - cnt;
546:    hypre_MPI_Allreduce(&local_pass_array_size, &global_pass_array_size, 1, HYPRE_MPI_INT,
547:                        hypre_MPI_SUM, comm);
548:    while (global_pass_array_size && pass < max_num_passes)
549:    {
550:       for (i = pass_array_size-1; i > cnt-1; i--)
551:       {
552:          i1 = pass_array[i];
553:          no_break = 1;
554:          for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
555:          {
556:             j1 = S_diag_j[j];
557:             if (assigned[j1] == pass-1)
[...]
568:             for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
569:             {
570:                j1 = S_offd_j[j];
571:                if (assigned_offd[j1] == pass-1)
572:                {
573:                   pass_array[i++] = pass_array[cnt];
574:                   pass_array[cnt++] = i1; 
575:                   assigned[i1] = pass;
576:                   break;
[...]
583:       pass++;
584:       pass_pointer[pass] = cnt;
585: 
586:       local_pass_array_size = pass_array_size - cnt;
587:       hypre_MPI_Allreduce(&local_pass_array_size, &global_pass_array_size, 1, HYPRE_MPI_INT,
588:                           hypre_MPI_SUM, comm);
589:       index = 0;
590:       for (i=0; i < num_sends; i++)
591:       {
592:          start = send_map_start[i];
593:          for (j = start; j < send_map_start[i+1]; j++)
594:          {   int_buf_data[index++] = assigned[send_map_elmt[j]]; }
595:       }
596:       if (num_procs > 1)
597:       {
598:          comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
599:                        assigned_offd);
600:          hypre_ParCSRCommHandleDestroy(comm_handle);
601:       }
602:    }
603: 
604:    hypre_TFree(int_buf_data);
605: 
606:    num_passes = pass;
607: 
608:    P_diag_pass = hypre_CTAlloc(HYPRE_Int*,num_passes); /* P_diag_pass[i] will contain
609:                                  all column numbers for points of pass i */
610: 
611:    P_diag_pass[1] = hypre_CTAlloc(HYPRE_Int,cnt_nz);
612: 
613:    P_diag_start = hypre_CTAlloc(HYPRE_Int, n_fine); /* P_diag_start[i] contains
614:            pointer to begin of column numbers in P_pass for point i,
615:            P_diag_i[i+1] contains number of columns for point i */
616: 
617:    P_offd_start = hypre_CTAlloc(HYPRE_Int, n_fine);
618: 
619:    if (num_procs > 1)
620:    {
621:       P_offd_pass = hypre_CTAlloc(HYPRE_Int*,num_passes);
622: 
623:       if (cnt_nz_offd)
624:          P_offd_pass[1] = hypre_CTAlloc(HYPRE_Int,cnt_nz_offd);
625:       else
626:          P_offd_pass[1] = NULL;
627: 
628:       new_elmts = hypre_CTAlloc(HYPRE_Int*,num_passes);
629: 
630:       new_counter = hypre_CTAlloc(HYPRE_Int, num_passes+1);
631: 
632:       new_counter[0] = 0;
633:       new_counter[1] = n_coarse_offd;
634:       new_num_cols_offd = n_coarse_offd;
635: 
636:       new_elmts[0] = new_col_map_offd;
[...]
643:    cnt_nz = 0;
644:    cnt_nz_offd = 0;
645:    /* JBS: Possible candidate for threading */
646:    for (i=pass_pointer[1]; i < pass_pointer[2]; i++)
647:    {
648:       i1 = pass_array[i];
649:       P_diag_start[i1] = cnt_nz;
650:       P_offd_start[i1] = cnt_nz_offd;
651:       for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
652:       {
653:          j1 = S_diag_j[j];
654:          if (CF_marker[j1] == 1)
655:          {   P_diag_pass[1][cnt_nz++] = fine_to_coarse[j1]; }
656:       }
657:       for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
658:       {
659:          j1 = S_offd_j[j];
660:          if (CF_marker_offd[j1] == 1)
661:          {   P_offd_pass[1][cnt_nz_offd++] = map_S_to_new[j1]; }
662:       }
663:    }
664: 
665: 
666:    total_nz += cnt_nz;
667:    total_nz_offd += cnt_nz_offd;
668: 
669:    if (num_procs > 1)
670:    {
671:       tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
672:       Pext_send_map_start = hypre_CTAlloc(HYPRE_Int*,num_passes);
673:       Pext_recv_vec_start = hypre_CTAlloc(HYPRE_Int*,num_passes);
674:       Pext_pass = hypre_CTAlloc(HYPRE_Int*,num_passes);
675:       Pext_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd+1);
676:       if (num_cols_offd) Pext_start = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
677:       if (send_map_start[num_sends])
678:          P_ncols = hypre_CTAlloc(HYPRE_Int,send_map_start[num_sends]);
679: #ifdef HYPRE_USING_OPENMP
680: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
681: #endif 
682:       for (i=0; i < num_cols_offd+1; i++)
683:       {   Pext_i[i] = 0; }
684: #ifdef HYPRE_USING_OPENMP
685: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
[...]
691:    old_Pext_send_size = 0;
692:    old_Pext_recv_size = 0;
693:    for (pass=2; pass < num_passes; pass++)
694:    {
695:       
696:       if (num_procs > 1)
697:       {
698:          Pext_send_map_start[pass] = hypre_CTAlloc(HYPRE_Int, num_sends+1);
699:          Pext_recv_vec_start[pass] = hypre_CTAlloc(HYPRE_Int, num_recvs+1);
700:          Pext_send_size = 0;
701:          Pext_send_map_start[pass][0] = 0;
702:          
703:          for (i=0; i < num_sends; i++)
704:          {
705: #ifdef HYPRE_USING_OPENMP
706: #pragma omp parallel for private(j,j1) reduction(+:Pext_send_size) HYPRE_SMP_SCHEDULE
[...]
717:             Pext_send_map_start[pass][i+1] = Pext_send_size;
718:          }
719: 
720:          comm_handle = hypre_ParCSRCommHandleCreate (11, comm_pkg,
721:                                               P_ncols, &Pext_i[1]);
722:          hypre_ParCSRCommHandleDestroy(comm_handle);
723: 
724:          if (Pext_send_size > old_Pext_send_size)
725:          {
726:             hypre_TFree(Pext_send_buffer);
727:             Pext_send_buffer = hypre_CTAlloc(HYPRE_Int, Pext_send_size);
728:          }
729:          old_Pext_send_size = Pext_send_size;
730:       }
731: 
732:       cnt_offd = 0;
733:       for (i=0; i < num_sends; i++)
734:       {
735:          for (j=send_map_start[i]; j < send_map_start[i+1]; j++)
736:          {
737:             j1 = send_map_elmt[j];
738:             if (assigned[j1] == pass-1)
739:             {
740:                j_start = P_diag_start[j1];
741:                j_end = j_start+P_diag_i[j1+1];
742:                for (k=j_start; k < j_end; k++)
743:                {
744:                 Pext_send_buffer[cnt_offd++] = my_first_cpt
745:                     +P_diag_pass[pass-1][k];
746:                }
747:                j_start = P_offd_start[j1];
748:                j_end = j_start+P_offd_i[j1+1];
749:                for (k=j_start; k < j_end; k++)
750:                {
751:                  k1 = P_offd_pass[pass-1][k];
752:                  k3 = 0;
753:                  while (k3 < pass-1)
754:                  {
755:                     if (k1 < new_counter[k3+1])
756:                     {
757:                        k2 = k1-new_counter[k3];
758:                        Pext_send_buffer[cnt_offd++] = new_elmts[k3][k2];
759:                        break;
[...]
768:       if (num_procs > 1)
769:       {
770:          Pext_recv_size = 0;
771:          Pext_recv_vec_start[pass][0] = 0;
772:          cnt_offd = 0;
773:          for (i=0; i < num_recvs; i++)
774:          {
775:             for (j=recv_vec_start[i]; j<recv_vec_start[i+1]; j++)
776:             {
777:                if (assigned_offd[j] == pass-1)
778:                {
779:                   Pext_start[j] = cnt_offd;
780:                   cnt_offd += Pext_i[j+1];
781:                }
782:             }
783:             Pext_recv_size = cnt_offd;
784:             Pext_recv_vec_start[pass][i+1] = Pext_recv_size;
785:          }
786: 
787:          hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
788:          hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
789:          hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = send_procs;
790:          hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = 
791:             Pext_send_map_start[pass];
792:          hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
793:          hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = recv_procs;
794:          hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = 
795:             Pext_recv_vec_start[pass];
796: 
797:          if (Pext_recv_size)
798:          {
799:             Pext_pass[pass] = hypre_CTAlloc(HYPRE_Int, Pext_recv_size);
800:             new_elmts[pass-1] = hypre_CTAlloc(HYPRE_Int,Pext_recv_size);
801:          }
802:          else
803:          {
804:             Pext_pass[pass] = NULL;
805:             new_elmts[pass-1] = NULL;
806:          }
807: 
808:          comm_handle = hypre_ParCSRCommHandleCreate (11, tmp_comm_pkg,
809:             Pext_send_buffer, Pext_pass[pass]);
810:          hypre_ParCSRCommHandleDestroy(comm_handle);
811: 
812:          if (Pext_recv_size > old_Pext_recv_size)
813:          {
814:             hypre_TFree(loc);
815:             loc = hypre_CTAlloc(HYPRE_Int,Pext_recv_size);
816:          }
817:          old_Pext_recv_size = Pext_recv_size;
[...]
823:       for (i=0; i < num_recvs; i++)
824:       {
825:          for (j=recv_vec_start[i]; j < recv_vec_start[i+1]; j++)
826:          {
827:             if (assigned_offd[j] == pass-1)
828:             {
829:                for (j1 = cnt_offd; j1 < cnt_offd+Pext_i[j+1]; j1++)
830:                {
831:                   k1 = Pext_pass[pass][j1];
832:                   k2 = k1 - my_first_cpt;
833:                   if (k2 > -1 && k2 < n_coarse)
834:                   {  Pext_pass[pass][j1] = -k2-1; }
835:                   else
836:                   {
837:                      not_found = 1;
838:                      k3 = 0;
839:                      while (k3 < pass-1 && not_found)
840:                      {
841:                         k2 = hypre_BinarySearch(new_elmts[k3], k1, 
842:                                 (new_counter[k3+1]-new_counter[k3]));
843:                         if (k2 > -1)
844:                         {
845:                             Pext_pass[pass][j1] = k2 + new_counter[k3];
[...]
853:                      if (not_found)
854:                      {
855:                          new_elmts[pass-1][cnt_new] = Pext_pass[pass][j1];
856:                          loc[cnt_new++] = j1;
857:                      }
858:                   }
859:                }
860:                cnt_offd += Pext_i[j+1];
861:             }
862:          }
863:       }
864: 
865:       if (cnt_new)
866:       {
867:          hypre_qsort2i(new_elmts[pass-1],loc,0,cnt_new-1);
868:          cnt = 0;
869:          local_index = new_counter[pass-1];
870:          Pext_pass[pass][loc[0]] = local_index;
871:          
872:          for (i=1; i < cnt_new; i++)
873:          {
874:             if (new_elmts[pass-1][i] > new_elmts[pass-1][cnt])
875:             {
876:                new_elmts[pass-1][++cnt] = new_elmts[pass-1][i];
877:                local_index++;
878:             }
879:             Pext_pass[pass][loc[i]] = local_index;
880:          }
881:          new_counter[pass] = local_index+1;
882:       }
883:       else if (num_procs > 1)
884:          new_counter[pass] = new_counter[pass-1];
885:       
886:       if (new_num_cols_offd < local_index+1)
887:       {    new_num_cols_offd = local_index+1; }
888: 
889:       pass_length = pass_pointer[pass+1] - pass_pointer[pass];
890:       #ifdef HYPRE_USING_OPENMP
891:       #pragma omp parallel private(i,my_thread_num,num_threads,thread_start,thread_stop,cnt_nz,cnt_nz_offd,i1,j,j1,j_start,j_end,k1,k,P_marker,P_marker_offd)
[...]
1140:    hypre_TFree(loc);
1141:    hypre_TFree(P_ncols);
1142:    hypre_TFree(Pext_send_buffer);
1143:    hypre_TFree(new_recv_vec_start);
1144:    hypre_TFree(cnt_nz_per_thread);
1145:    hypre_TFree(cnt_nz_offd_per_thread);
1146:    hypre_TFree(max_num_threads);
1147:    
1148:    P_diag_j = hypre_CTAlloc(HYPRE_Int,total_nz);
1149:    P_diag_data = hypre_CTAlloc(HYPRE_Real,total_nz);
1150: 
1151:    
1152:    if (total_nz_offd)
1153:    {
1154:       P_offd_j = hypre_CTAlloc(HYPRE_Int,total_nz_offd);
1155:       P_offd_data = hypre_CTAlloc(HYPRE_Real,total_nz_offd);
1156:    }
1157: 
1158:    for (i=0; i < n_fine; i++)
1159:    {
1160:       P_diag_i[i+1] += P_diag_i[i];
1161:       P_offd_i[i+1] += P_offd_i[i];
[...]
1167: #pragma omp parallel for private(i,i1) HYPRE_SMP_SCHEDULE
[...]
1177:    if (weight_option) /*if this is set, weights are separated into
1178:                         negative and positive offdiagonals and accumulated 
1179:                         accordingly */
1180:    {
1181:         
1182:        pass_length = pass_pointer[2]-pass_pointer[1];
1183: #ifdef HYPRE_USING_OPENMP
1184: #pragma omp parallel private(thread_start,thread_stop,my_thread_num,num_threads,P_marker,P_marker_offd,i,i1,sum_C_pos,sum_C_neg,sum_N_pos,sum_N_neg,j_start,j_end,j,k1,cnt,j1,cnt_offd,diagonal,alfa,beta)
[...]
1304:       if (n_coarse) hypre_TFree(C_array);
1305:       hypre_TFree(C_array_offd);
1306:       hypre_TFree(P_diag_pass[1]);
1307:       if (num_procs > 1) hypre_TFree(P_offd_pass[1]);
1308:   
1309: 
1310:       for (pass = 2; pass < num_passes; pass++)
1311:       {
1312: 
1313:          if (num_procs > 1)
1314:          {
1315:             Pext_send_size = Pext_send_map_start[pass][num_sends];
1316:             if (Pext_send_size > old_Pext_send_size)
1317:             {
1318:                hypre_TFree(Pext_send_data);
1319:                Pext_send_data = hypre_CTAlloc(HYPRE_Real, Pext_send_size);
1320:             }
1321:             old_Pext_send_size = Pext_send_size;
1322: 
1323:             cnt_offd = 0;
1324:             for (i=0; i < num_sends; i++)
1325:             {
1326:                for (j=send_map_start[i]; j < send_map_start[i+1]; j++)
1327:                {
1328:                   j1 = send_map_elmt[j];
1329:                   if (assigned[j1] == pass-1)
1330:                   {
1331:                      j_start = P_diag_i[j1];
1332:                      j_end = P_diag_i[j1+1];
1333:                      for (k=j_start; k < j_end; k++)
1334:                      {   Pext_send_data[cnt_offd++] = P_diag_data[k]; }
1335:                      j_start = P_offd_i[j1];
1336:                      j_end = P_offd_i[j1+1];
1337:                      for (k=j_start; k < j_end; k++)
1338:                      {  Pext_send_data[cnt_offd++] = P_offd_data[k]; }
1339:                   }
1340:                }
1341:             }
1342:  
1343:             hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
1344:             hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = 
1345:                 Pext_send_map_start[pass];
1346:             hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
1347:             hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = 
1348:                 Pext_recv_vec_start[pass];
1349: 
1350:             Pext_recv_size = Pext_recv_vec_start[pass][num_recvs];
1351: 
1352:             if (Pext_recv_size > old_Pext_recv_size)
1353:             {
1354:                hypre_TFree(Pext_data);
1355:                Pext_data = hypre_CTAlloc(HYPRE_Real, Pext_recv_size);
1356:             }
1357:             old_Pext_recv_size = Pext_recv_size;
1358: 
1359:             comm_handle = hypre_ParCSRCommHandleCreate (1, tmp_comm_pkg,
1360:                 Pext_send_data, Pext_data);
1361:             hypre_ParCSRCommHandleDestroy(comm_handle);
1362: 
1363:             hypre_TFree(Pext_send_map_start[pass]);
1364:             hypre_TFree(Pext_recv_vec_start[pass]);
1365:          }
1366:          
1367:          pass_length = pass_pointer[pass+1]-pass_pointer[pass];
1368: #ifdef HYPRE_USING_OPENMP
1369: #pragma omp parallel private(thread_start,thread_stop,my_thread_num,num_threads,P_marker,P_marker_offd,i,i1,sum_C_neg,sum_C_pos,sum_N_neg,sum_N_pos,j_start,j_end,cnt,j,k1,cnt_offd,j1,k,alfa,beta,diagonal,C_array,C_array_offd)
[...]
1562:          hypre_TFree(P_diag_pass[pass]);
1563:          if (num_procs > 1)
1564:          {
1565:             hypre_TFree(P_offd_pass[pass]);
1566:             hypre_TFree(Pext_pass[pass]);
[...]
1573:        pass_length = pass_pointer[2]-pass_pointer[1];
1574: #ifdef HYPRE_USING_OPENMP
1575: #pragma omp parallel private(thread_start,thread_stop,my_thread_num,num_threads,k,k1,i,i1,j,j1,sum_C,sum_N,j_start,j_end,cnt,tmp_marker,tmp_marker_offd,cnt_offd,diagonal,alfa)
[...]
1669:       if (n_coarse) hypre_TFree(C_array);
1670:       hypre_TFree(C_array_offd);
1671:       hypre_TFree(P_diag_pass[1]);
1672:       if (num_procs > 1) hypre_TFree(P_offd_pass[1]);
1673:       
1674:       for (pass = 2; pass < num_passes; pass++)
1675:       {
1676: 
1677:          if (num_procs > 1)
1678:          {
1679:             Pext_send_size = Pext_send_map_start[pass][num_sends];
1680:             if (Pext_send_size > old_Pext_send_size)
1681:             {
1682:                hypre_TFree(Pext_send_data);
1683:                Pext_send_data = hypre_CTAlloc(HYPRE_Real, Pext_send_size);
1684:             }
1685:             old_Pext_send_size = Pext_send_size;
1686: 
1687:             cnt_offd = 0;
1688:             for (i=0; i < num_sends; i++)
1689:             {
1690:                for (j=send_map_start[i]; j < send_map_start[i+1]; j++)
1691:                {
1692:                   j1 = send_map_elmt[j];
1693:                   if (assigned[j1] == pass-1)
1694:                   {
1695:                      j_start = P_diag_i[j1];
1696:                      j_end = P_diag_i[j1+1];
1697:                      for (k=j_start; k < j_end; k++)
1698:                      {
1699:                         Pext_send_data[cnt_offd++] = P_diag_data[k];
1700:                      }
1701:                      j_start = P_offd_i[j1];
1702:                      j_end = P_offd_i[j1+1];
1703:                      for (k=j_start; k < j_end; k++)
1704:                      {
1705:                         Pext_send_data[cnt_offd++] = P_offd_data[k];
[...]
1711:             hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
1712:             hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = 
1713:                 Pext_send_map_start[pass];
1714:             hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
1715:             hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = 
1716:                 Pext_recv_vec_start[pass];
1717: 
1718:             Pext_recv_size = Pext_recv_vec_start[pass][num_recvs];
1719: 
1720:             if (Pext_recv_size > old_Pext_recv_size)
1721:             {
1722:                hypre_TFree(Pext_data);
1723:                Pext_data = hypre_CTAlloc(HYPRE_Real, Pext_recv_size);
1724:             }
1725:             old_Pext_recv_size = Pext_recv_size;
1726: 
1727:             comm_handle = hypre_ParCSRCommHandleCreate (1, tmp_comm_pkg,
1728:                 Pext_send_data, Pext_data);
1729:             hypre_ParCSRCommHandleDestroy(comm_handle);
1730: 
1731:             hypre_TFree(Pext_send_map_start[pass]);
1732:             hypre_TFree(Pext_recv_vec_start[pass]);
1733:          }
1734: 
1735:          pass_length = pass_pointer[pass+1]-pass_pointer[pass];
1736: #ifdef HYPRE_USING_OPENMP
1737: #pragma omp parallel private(thread_start,thread_stop,my_thread_num,num_threads,k,k1,i,i1,j,j1,sum_C,sum_N,j_start,j_end,cnt,tmp_marker,tmp_marker_offd,cnt_offd,diagonal,alfa,tmp_array,tmp_array_offd)
[...]
1884:          hypre_TFree(P_diag_pass[pass]);
1885:          if (num_procs > 1)
1886:          {
1887:             hypre_TFree(P_offd_pass[pass]);
1888:             hypre_TFree(Pext_pass[pass]);
1889:          }
1890:       }
1891:    }
1892: 
1893:    hypre_TFree(CF_marker_offd);
1894:    hypre_TFree(Pext_send_map_start);
1895:    hypre_TFree(Pext_recv_vec_start);
1896:    hypre_TFree(dof_func_offd);
1897:    hypre_TFree(Pext_send_data);
1898:    hypre_TFree(Pext_data);
1899:    hypre_TFree(P_diag_pass);
1900:    hypre_TFree(P_offd_pass);
1901:    hypre_TFree(Pext_pass);
1902:    hypre_TFree(P_diag_start);
1903:    hypre_TFree(P_offd_start);
1904:    hypre_TFree(Pext_start);
1905:    hypre_TFree(Pext_i);
1906:    hypre_TFree(fine_to_coarse);
1907:    hypre_TFree(assigned);
1908:    hypre_TFree(assigned_offd);
1909:    hypre_TFree(pass_pointer);
1910:    hypre_TFree(pass_array);
1911:    hypre_TFree(map_S_to_new);
1912:    hypre_TFree(map_A_to_S);
1913:    if (num_procs > 1) hypre_TFree(tmp_comm_pkg);
1914: 
1915:     P = hypre_ParCSRMatrixCreate(comm,
[...]
1922:                                 P_offd_i[n_fine]);
1923:    P_diag = hypre_ParCSRMatrixDiag(P);
1924:    hypre_CSRMatrixData(P_diag) = P_diag_data;
1925:    hypre_CSRMatrixI(P_diag) = P_diag_i;
1926:    hypre_CSRMatrixJ(P_diag) = P_diag_j;
1927:    P_offd = hypre_ParCSRMatrixOffd(P);
1928:    hypre_CSRMatrixData(P_offd) = P_offd_data;
1929:    hypre_CSRMatrixI(P_offd) = P_offd_i;
1930:    hypre_CSRMatrixJ(P_offd) = P_offd_j;
1931:    hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
1932: 
1933:    /* Compress P, removing coefficients smaller than trunc_factor * Max 
1934:       and/or keep yat most <P_max_elmts> per row absolutely maximal coefficients */
1935: 
1936:    if (trunc_factor != 0.0 || P_max_elmts != 0)
1937:    {
1938:       hypre_BoomerAMGInterpTruncation(P, trunc_factor, P_max_elmts);
[...]
1944:       P_offd_j = hypre_CSRMatrixJ(P_offd);
1945:    }
1946:    P_offd_size = P_offd_i[n_fine];
1947: 
1948:    num_cols_offd_P = 0;
1949:    if (P_offd_size)
1950:    {
1951:       if (new_num_cols_offd > num_cols_offd)
1952:       {   P_marker_offd = hypre_CTAlloc(HYPRE_Int,new_num_cols_offd); }
1953:       else
1954:       {   P_marker_offd = hypre_CTAlloc(HYPRE_Int,num_cols_offd); }
1955: #ifdef HYPRE_USING_OPENMP
1956: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
1957: #endif
1958:       for (i=0; i < new_num_cols_offd; i++)
1959:       {   P_marker_offd[i] = 0; }
1960:  
1961:       num_cols_offd_P = 0;
1962:       for (i=0; i < P_offd_size; i++)
1963:       {
1964:          index = P_offd_j[i];
1965:          if (!P_marker_offd[index])
1966:          {
1967:             num_cols_offd_P++;
1968:             P_marker_offd[index] = 1;
1969:          }
1970:       }
1971: 
1972:       col_map_offd_P = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P);
1973:       permute = hypre_CTAlloc(HYPRE_Int, new_counter[num_passes-1]);
1974: 
1975: #ifdef HYPRE_USING_OPENMP
1976: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
1977: #endif
1978:       for (i=0; i < new_counter[num_passes-1]; i++)
1979:          permute[i] = -1;
1980: 
1981:       cnt = 0;
1982:       for (i=0; i < num_passes-1; i++)
1983:       {
1984:          for (j=new_counter[i]; j < new_counter[i+1]; j++)
1985:          {
1986:             if (P_marker_offd[j])
1987:             {
1988:                col_map_offd_P[cnt] = new_elmts[i][j-new_counter[i]];
1989:                permute[j] = col_map_offd_P[cnt++];
1990:             }
1991:          }
1992:       }
1993: 
1994:       hypre_qsort0(col_map_offd_P,0,num_cols_offd_P-1);
1995: 
1996: #ifdef HYPRE_USING_OPENMP
1997: #pragma omp parallel for private(i,k1) HYPRE_SMP_SCHEDULE
[...]
2007: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
2008: #endif
2009:       for (i=0; i < P_offd_size; i++)
2010:       {   P_offd_j[i] = permute[P_offd_j[i]]; }
2011: 
2012:       hypre_TFree(P_marker_offd);
2013:    }
2014:    if (num_procs > 1)
2015:    {
2016:       for (i=0; i < num_passes-1; i++)
2017:          hypre_TFree(new_elmts[i]);
2018:    }
2019:    hypre_TFree(permute);
2020:    hypre_TFree(new_elmts);
2021:    hypre_TFree(new_counter);
2022: 
2023:    if (num_cols_offd_P)
2024:    {
2025:         hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
2026:         hypre_CSRMatrixNumCols(P_offd) = num_cols_offd_P;
2027:    }
2028: 
2029:    if (n_SF)
2030:    {
2031: #ifdef HYPRE_USING_OPENMP
2032: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
[...]
2038:    if (num_procs > 1)
2039:    {
2040:         hypre_MatvecCommPkgCreate(P);
2041:    }
2042: 
2043:    *P_ptr = P;
[...]
2061: }
