/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_lr_interp.c: 1196 - 1757
--------------------------------------------------------------------------------

1196: #pragma omp parallel private(i,my_thread_num,num_threads,start,stop,coarse_counter,jj_counter,jj_counter_offd, P_marker, P_marker_offd,jj,kk,i1,k1,loc_col,jj_begin_row,jj_begin_row_offd,jj_end_row,jj_end_row_offd,diagonal,sum,sgn,jj1,i2,distribute,strong_f_marker)
[...]
1217:        strong_f_marker = -2;
1218:        coarse_counter = 0;
1219:        jj_counter = start_indexing;
1220:        jj_counter_offd = start_indexing;
1221:        if (n_fine)
1222:        {  
1223:            P_marker = hypre_CTAlloc(HYPRE_Int, n_fine);
1224:            for (i = 0; i < n_fine; i++)     
1225:            {  P_marker[i] = -1; }
1226:        }
1227:        if (full_off_procNodes)
1228:        {  
1229:          P_marker_offd = hypre_CTAlloc(HYPRE_Int, full_off_procNodes); 
1230:          for (i = 0; i < full_off_procNodes; i++)
1231:          {  P_marker_offd[i] = -1;}
1232:        }
1233: 
1234:        /* this thread's row range */
1235:        my_thread_num = hypre_GetThreadNum();
1236:        num_threads = hypre_NumActiveThreads();
1237:        start = (n_fine/num_threads)*my_thread_num;
1238:        if (my_thread_num == num_threads-1)
1239:        {  stop = n_fine; }
1240:        else
1241:        {  stop = (n_fine/num_threads)*(my_thread_num+1); }
1242: 
1243:        /* loop over rows */
1244:        for (i = start; i < stop; i++)
1245:        {
1246:          P_diag_i[i] = jj_counter;
1247:          if (num_procs > 1)
1248:            P_offd_i[i] = jj_counter_offd;
1249:          
1250:          if (CF_marker[i] >= 0)
1251:          {
1252:            jj_counter++;
1253:            fine_to_coarse[i] = coarse_counter;
1254:            coarse_counter++;
[...]
1262:          else if (CF_marker[i] != -3)
1263:          {
1264:            for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
1265:            {
1266:              i1 = S_diag_j[jj];           
1267:              if (CF_marker[i1] >= 0)
1268:              { /* i1 is a C point */
1269:                if (P_marker[i1] < P_diag_i[i])
1270:                {
1271:                  P_marker[i1] = jj_counter;
1272:                  jj_counter++;
1273:                }
1274:              }
1275:              else if (CF_marker[i1] != -3)
1276:              { /* i1 is a F point, loop through it's strong neighbors */
1277:                for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
1278:                {
1279:                  k1 = S_diag_j[kk];
1280:                  if (CF_marker[k1] >= 0)
1281:                  {
1282:                    if(P_marker[k1] < P_diag_i[i])
1283:                    {
1284:                      P_marker[k1] = jj_counter;
1285:                      jj_counter++;
1286:                    }
1287:                  } 
1288:                }
1289:                if(num_procs > 1)
1290:                {
1291:                  for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
1292:                  {
1293:                    if(col_offd_S_to_A)
1294:                      k1 = col_offd_S_to_A[S_offd_j[kk]];
1295:                    else
1296:                      k1 = S_offd_j[kk];
1297:                    if (CF_marker_offd[k1] >= 0)
1298:                    {
1299:                      if(P_marker_offd[k1] < P_offd_i[i])
1300:                      {
1301:                        tmp_CF_marker_offd[k1] = 1;
1302:                        P_marker_offd[k1] = jj_counter_offd;
1303:                        jj_counter_offd++;
[...]
1311:            if (num_procs > 1)
1312:            {
1313:              for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1314:              {
1315:                i1 = S_offd_j[jj];           
1316:                if(col_offd_S_to_A)
1317:                  i1 = col_offd_S_to_A[i1];
1318:                if (CF_marker_offd[i1] >= 0)
1319:                {
1320:                  if(P_marker_offd[i1] < P_offd_i[i])
1321:                  {
1322:                    tmp_CF_marker_offd[i1] = 1;
1323:                    P_marker_offd[i1] = jj_counter_offd;
1324:                    jj_counter_offd++;
1325:                  }
1326:                }
1327:                else if (CF_marker_offd[i1] != -3)
1328:                { /* F point; look at neighbors of i1. Sop contains global col
1329:                   * numbers and entries that could be in S_diag or S_offd or
1330:                   * neither. */
1331:                  for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
1332:                  {
1333:                    k1 = Sop_j[kk];
1334:                    if(k1 >= col_1 && k1 < col_n)
1335:                    { /* In S_diag */
1336:                      loc_col = k1-col_1;
1337:                      if(P_marker[loc_col] < P_diag_i[i])
1338:                      {
1339:                        P_marker[loc_col] = jj_counter;
1340:                        jj_counter++;
1341:                      }
1342:                    }
1343:                    else
1344:                    {
1345:                      loc_col = -k1 - 1; 
1346:                      if(P_marker_offd[loc_col] < P_offd_i[i])
1347:                      {
1348:                        P_marker_offd[loc_col] = jj_counter_offd;
1349:                        tmp_CF_marker_offd[loc_col] = 1;
1350:                        jj_counter_offd++;
[...]
1363: #pragma omp barrier
1364: #endif
1365:        P_diag_i[stop] = jj_counter; 
1366:        P_offd_i[stop] = jj_counter_offd;
1367:        fine_to_coarse_offset[my_thread_num] = coarse_counter;
1368:        diag_offset[my_thread_num] = jj_counter;
1369:        offd_offset[my_thread_num] = jj_counter_offd;
1370: 
1371:        /* Stitch P_diag_i, P_offd_i and fine_to_coarse together */
1372: #ifdef HYPRE_USING_OPENMP
1373: #pragma omp barrier
1374: #endif
1375:        if(my_thread_num == 0)
1376:        { 
1377:          /* Calculate the offset for P_diag_i and P_offd_i for each thread */
1378:          for (i = 1; i < num_threads; i++)
1379:          { 
1380:            diag_offset[i] = diag_offset[i-1] + diag_offset[i];
1381:            fine_to_coarse_offset[i] = fine_to_coarse_offset[i-1] + fine_to_coarse_offset[i];
1382:            offd_offset[i] = offd_offset[i-1] + offd_offset[i];
1383:          }
1384:        }
1385: #ifdef HYPRE_USING_OPENMP
1386: #pragma omp barrier
1387: #endif
1388:        
1389:        if(my_thread_num > 0)
1390:        {
1391:          /* update row pointer array with offset,
1392:           * making sure to update the row stop index */
1393:          for (i = start+1; i <= stop; i++)
1394:          {
1395:             P_diag_i[i] += diag_offset[my_thread_num-1];
1396:             P_offd_i[i] += offd_offset[my_thread_num-1];
1397:          }
1398:          /* update fine_to_coarse by offsetting with the offset 
1399:           * from the preceding thread */
1400:          for (i = start; i < stop; i++)
1401:          {  
1402:            if(fine_to_coarse[i] >= 0)
1403:            { fine_to_coarse[i] += fine_to_coarse_offset[my_thread_num-1]; }
1404:          }
1405:        }
1406: #ifdef HYPRE_USING_OPENMP
1407: #pragma omp barrier
1408: #endif
1409:       
1410:        if(my_thread_num == 0)
1411:        {
1412:          if (debug_flag==4)
1413:          {
1414:             wall_time = time_getWallclockSeconds() - wall_time;
1415:             hypre_printf("Proc = %d     determine structure    %f\n",
1416:                           my_id, wall_time);
1417:             fflush(NULL);
[...]
1423:          if (debug_flag== 4) wall_time = time_getWallclockSeconds();
1424: 
1425:          P_diag_size =  P_diag_i[n_fine];
1426:          P_offd_size = P_offd_i[n_fine];
1427: 
1428:          if (P_diag_size)
1429:          {   
1430:             P_diag_j    = hypre_CTAlloc(HYPRE_Int, P_diag_size);
1431:             P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size);
1432:          }
1433: 
1434:          if (P_offd_size)
1435:          {   
1436:             P_offd_j    = hypre_CTAlloc(HYPRE_Int, P_offd_size);
1437:             P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size);
1438:          }
1439:        }
1440: 
1441:        /* Fine to coarse mapping */
1442:        if(num_procs > 1   &&   my_thread_num == 0)
1443:        {
1444:          for (i = 0; i < n_fine; i++)
1445:            fine_to_coarse[i] += my_first_cpt;
1446:          
1447:          hypre_alt_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse, 
1448:                   full_off_procNodes, 
1449:                   fine_to_coarse_offd);
1450: 
1451:          for (i = 0; i < n_fine; i++)
1452:            fine_to_coarse[i] -= my_first_cpt;
1453:        }
1454: 
1455:        for (i = 0; i < n_fine; i++)     
1456:          P_marker[i] = -1;
1457:          
1458:        for (i = 0; i < full_off_procNodes; i++)
1459:          P_marker_offd[i] = -1;
[...]
1467: #pragma omp barrier
1468: #endif
1469:        for (i = start; i < stop; i++)
1470:        {
1471:          jj_begin_row = P_diag_i[i];        
1472:          jj_begin_row_offd = P_offd_i[i];
1473:          jj_counter = jj_begin_row;
1474:          jj_counter_offd = jj_begin_row_offd;
[...]
1480:          if (CF_marker[i] >= 0)
1481:          {
1482:            P_diag_j[jj_counter]    = fine_to_coarse[i];
1483:            P_diag_data[jj_counter] = one;
[...]
1491:          else if (CF_marker[i] != -3)
1492:          {
1493:            strong_f_marker--;
1494:            for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
[...]
1503:              if (CF_marker[i1] >= 0)
1504:              {
1505:                if (P_marker[i1] < jj_begin_row)
1506:                {
1507:                  P_marker[i1] = jj_counter;
1508:                  P_diag_j[jj_counter]    = fine_to_coarse[i1];
1509:                  P_diag_data[jj_counter] = zero;
1510:                  jj_counter++;
1511:                }
1512:              }
1513:              else  if (CF_marker[i1] != -3)
1514:              {
1515:                P_marker[i1] = strong_f_marker;
1516:                for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
1517:                {
1518:                  k1 = S_diag_j[kk];
1519:                  if (CF_marker[k1] >= 0)
1520:                  {
1521:                    if(P_marker[k1] < jj_begin_row)
1522:                    {
1523:                      P_marker[k1] = jj_counter;
1524:                      P_diag_j[jj_counter] = fine_to_coarse[k1];
1525:                      P_diag_data[jj_counter] = zero;
1526:                      jj_counter++;
1527:                    }
1528:                  }
1529:                }
1530:                if(num_procs > 1)
1531:                {
1532:                  for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
1533:                  {
1534:                    if(col_offd_S_to_A)
1535:                     k1 = col_offd_S_to_A[S_offd_j[kk]];
1536:                    else
1537:                     k1 = S_offd_j[kk];
1538:                    if(CF_marker_offd[k1] >= 0)
1539:                    {
1540:                      if(P_marker_offd[k1] < jj_begin_row_offd)
1541:                      {
1542:                        P_marker_offd[k1] = jj_counter_offd;
1543:                        P_offd_j[jj_counter_offd] = k1;
1544:                        P_offd_data[jj_counter_offd] = zero;
1545:                        jj_counter_offd++;
[...]
1553:            if ( num_procs > 1)
1554:            {
1555:              for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1556:              {
1557:                i1 = S_offd_j[jj];
1558:                if(col_offd_S_to_A)
1559:                  i1 = col_offd_S_to_A[i1];
1560:                if ( CF_marker_offd[i1] >= 0)
1561:                {
1562:                  if(P_marker_offd[i1] < jj_begin_row_offd)
1563:                  {
1564:                    P_marker_offd[i1] = jj_counter_offd;
1565:                    P_offd_j[jj_counter_offd] = i1;
1566:                    P_offd_data[jj_counter_offd] = zero;
1567:                    jj_counter_offd++;
1568:                  }
1569:                }
1570:                else if (CF_marker_offd[i1] != -3)
1571:                {
1572:                  P_marker_offd[i1] = strong_f_marker;
1573:                  for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
1574:                  {
1575:                    k1 = Sop_j[kk];
1576:                    /* Find local col number */
1577:                    if(k1 >= col_1 && k1 < col_n)
1578:                    {
1579:                      loc_col = k1-col_1;
1580:                      if(P_marker[loc_col] < jj_begin_row)
1581:                      {
1582:                        P_marker[loc_col] = jj_counter;
1583:                        P_diag_j[jj_counter] = fine_to_coarse[loc_col];
1584:                        P_diag_data[jj_counter] = zero;
1585:                        jj_counter++;
1586:                      }
1587:                    }
1588:                    else
1589:                    { 
1590:                      loc_col = -k1 - 1;
1591:                      if(P_marker_offd[loc_col] < jj_begin_row_offd)
1592:                      {
1593:                        P_marker_offd[loc_col] = jj_counter_offd;
1594:                        P_offd_j[jj_counter_offd]=loc_col;
1595:                        P_offd_data[jj_counter_offd] = zero;
1596:                        jj_counter_offd++;
[...]
1607:            diagonal = A_diag_data[A_diag_i[i]];
1608:            
1609:            for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1610:            { /* i1 is a c-point and strongly influences i, accumulate
1611:               * a_(i,i1) into interpolation weight */
1612:              i1 = A_diag_j[jj];
1613:              if (P_marker[i1] >= jj_begin_row)
1614:              {
1615:                P_diag_data[P_marker[i1]] += A_diag_data[jj];
1616:              }
1617:              else if(P_marker[i1] == strong_f_marker)
1618:              {
1619:                sum = zero;
1620:                sgn = 1;
1621:                if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
1622:                /* Loop over row of A for point i1 and calculate the sum
1623:                 * of the connections to c-points that strongly influence i. */
1624:                for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
1625:                {
1626:                  i2 = A_diag_j[jj1];
1627:                  if((P_marker[i2] >= jj_begin_row || i2 == i) && (sgn*A_diag_data[jj1]) < 0)
1628:                    sum += A_diag_data[jj1];
1629:                }
1630:                if(num_procs > 1)
1631:                {
1632:                  for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++)
1633:                  {
1634:                    i2 = A_offd_j[jj1];
1635:                    if(P_marker_offd[i2] >= jj_begin_row_offd &&
1636:                       (sgn*A_offd_data[jj1]) < 0)
1637:                       sum += A_offd_data[jj1];
1638:                  }
1639:                }
1640:                if(sum != 0)
1641:                {
1642:                  distribute = A_diag_data[jj]/sum;
1643:                  /* Loop over row of A for point i1 and do the distribution */
1644:                  for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
1645:                  {
1646:                    i2 = A_diag_j[jj1];
1647:                    if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
1648:                       P_diag_data[P_marker[i2]] += 
1649:                       distribute*A_diag_data[jj1];
1650:                        if(i2 == i && (sgn*A_diag_data[jj1]) < 0)
1651:                          diagonal += distribute*A_diag_data[jj1];
1652:                  }
1653:                  if(num_procs > 1)
1654:                  {
1655:                    for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
1656:                    {
1657:                      i2 = A_offd_j[jj1];
1658:                      if(P_marker_offd[i2] >= jj_begin_row_offd &&
1659:                                 (sgn*A_offd_data[jj1]) < 0)
1660:                        P_offd_data[P_marker_offd[i2]] +=
[...]
1667:                  diagonal += A_diag_data[jj];
1668:                }
1669:              }
1670:              /* neighbor i1 weakly influences i, accumulate a_(i,i1) into
1671:               * diagonal */
1672:              else if (CF_marker[i1] != -3)
1673:              {
1674:                if(num_functions == 1 || dof_func[i] == dof_func[i1])
1675:                  diagonal += A_diag_data[jj];
1676:              }
1677:            }
1678:            if(num_procs > 1)
1679:            {
1680:              for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1681:              {
1682:                i1 = A_offd_j[jj];
1683:                if(P_marker_offd[i1] >= jj_begin_row_offd)
1684:                  P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
1685:                else if(P_marker_offd[i1] == strong_f_marker)
1686:                {
1687:                  sum = zero;
1688:                  for(jj1 = A_ext_i[i1]; jj1 < A_ext_i[i1+1]; jj1++)
1689:                  {
1690:                    k1 = A_ext_j[jj1];
1691:                    if(k1 >= col_1 && k1 < col_n)
1692:                    { /* diag */
1693:                      loc_col = k1 - col_1;
1694:                      if(P_marker[loc_col] >= jj_begin_row || loc_col == i)
1695:                        sum += A_ext_data[jj1];
1696:                    }
1697:                    else
1698:                    { 
1699:                      loc_col = -k1 - 1;
1700:                      if(P_marker_offd[loc_col] >= jj_begin_row_offd)
1701:                        sum += A_ext_data[jj1];
1702:                    }
1703:                  }
1704:                  if(sum != 0)
1705:                  {
1706:                    distribute = A_offd_data[jj] / sum;
1707:                    for(jj1 = A_ext_i[i1]; jj1 < A_ext_i[i1+1]; jj1++)
1708:                    {
1709:                      k1 = A_ext_j[jj1];
1710:                      if(k1 >= col_1 && k1 < col_n)
1711:                      { /* diag */
1712:                        loc_col = k1 - col_1;
1713:                        if(P_marker[loc_col] >= jj_begin_row)
1714:                          P_diag_data[P_marker[loc_col]] += distribute*
1715:                           A_ext_data[jj1];
1716:                                if(loc_col == i)
1717:                                  diagonal += distribute*A_ext_data[jj1];
1718:                      }
1719:                      else
1720:                      { 
1721:                        loc_col = -k1 - 1;
1722:                        if(P_marker_offd[loc_col] >= jj_begin_row_offd)
1723:                          P_offd_data[P_marker_offd[loc_col]] += distribute*
[...]
1730:                    diagonal += A_offd_data[jj];
1731:                  }
1732:                }
1733:                else if (CF_marker_offd[i1] != -3)
1734:                {
1735:                  if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
1736:                    diagonal += A_offd_data[jj];
1737:                }
1738:              }
1739:            }
1740:            if (diagonal)
1741:            {
1742:              for(jj = jj_begin_row; jj < jj_end_row; jj++)
1743:                 P_diag_data[jj] /= -diagonal;
1744:              for(jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
1745:                 P_offd_data[jj] /= -diagonal;
1746:            }
1747:          }
1748:          strong_f_marker--;
[...]
1754:        if (n_fine)
1755:        {  hypre_TFree(P_marker); }  
1756:        if (full_off_procNodes)
1757:        {  hypre_TFree(P_marker_offd); }
