/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_strength.c: 1668 - 2230
--------------------------------------------------------------------------------

1668: #pragma omp parallel private(i1,i2,i3,jj1,jj2,index)
1669: #endif
1670:    {
1671:       HYPRE_Int my_thread_num = hypre_GetThreadNum();
1672: 
1673:       HYPRE_Int i1_begin, i1_end;
1674:       hypre_GetSimpleThreadPartition(&i1_begin, &i1_end, num_cols_diag_S);
1675: 
1676:       HYPRE_Int *C_temp_diag_j = NULL, *C_temp_offd_j = NULL;
1677:       HYPRE_Int *C_temp_diag_data = NULL, *C_temp_offd_data = NULL;
1678: 
1679:       if (num_paths > 1)
1680:       {
1681:          C_temp_diag_j = C_temp_diag_j_array + num_coarse*my_thread_num;
1682:          C_temp_offd_j = C_temp_offd_j_array + num_cols_offd_C*my_thread_num;
1683: 
1684:          C_temp_diag_data = C_temp_diag_data_array + num_coarse*my_thread_num;
1685:          C_temp_offd_data = C_temp_offd_data_array + num_cols_offd_C*my_thread_num;
1686:       }
1687: 
1688:       HYPRE_Int *S_marker = NULL, *S_marker_offd = NULL;
1689:       if (num_coarse) S_marker = S_marker_array + num_coarse*my_thread_num;
1690:       if (num_cols_offd_C) S_marker_offd = S_marker_offd_array + num_cols_offd_C*my_thread_num;
1691:       for (i1 = 0; i1 < num_coarse; i1++)
1692:       {
1693:          S_marker[i1] = -1;
1694:       }
1695:       for (i1 = 0; i1 < num_cols_offd_C; i1++)
1696:       {
1697:          S_marker_offd[i1] = -1;
[...]
1705:       HYPRE_Int num_nonzeros_diag = 0;
1706:       HYPRE_Int num_nonzeros_offd = 0;
1707: 
1708:       HYPRE_Int ic_begin = num_coarse_prefix_sum[my_thread_num];
1709:       HYPRE_Int ic_end = num_coarse_prefix_sum[my_thread_num + 1];
1710:       HYPRE_Int ic;
1711: 
1712:       if (num_paths == 1)
1713:       {
1714:          for (ic = ic_begin; ic < ic_end; ic++)
[...]
1720:              HYPRE_Int i1 = coarse_to_fine[ic];
1721:        
1722:              HYPRE_Int jj_row_begin_diag = num_nonzeros_diag;
1723:              HYPRE_Int jj_row_begin_offd = num_nonzeros_offd;
1724: 
1725:              C_diag_i[ic] = num_nonzeros_diag;
1726:              if (num_cols_offd_C)
1727:              {
1728:                 C_offd_i[ic] = num_nonzeros_offd;
1729:              }
1730: 
1731:              for (jj1 = S_diag_i[i1]; jj1 < S_diag_i[i1+1]; jj1++)
1732:              {
1733:                  i2 = S_diag_j[jj1];
1734:                  if (CF_marker[i2] > 0)
1735:                  {
1736:                     index = fine_to_coarse[i2];
1737:                     if (S_marker[index] < jj_row_begin_diag)
1738:                     {
1739:                        S_marker[index] = num_nonzeros_diag;
1740:                        num_nonzeros_diag++;
1741:                     }
1742:                  }
1743:                  for (jj2 = S_diag_i[i2]; jj2 < S_diag_i[i2+1]; jj2++)
1744:                  {
1745:                     i3 = S_diag_j[jj2];
1746:                     if (CF_marker[i3] > 0)
1747:                     {
1748:                        index = fine_to_coarse[i3];
1749:                        if (index != ic && S_marker[index] < jj_row_begin_diag)
1750:                        {
1751:                           S_marker[index] = num_nonzeros_diag;
1752:                           num_nonzeros_diag++;
1753:                        }
1754:                     }
1755:                  }
1756:                  for (jj2 = S_offd_i[i2]; jj2 < S_offd_i[i2+1]; jj2++)
1757:                  {
1758:                     i3 = S_offd_j[jj2];
1759:                     if (CF_marker_offd[i3] > 0)
1760:                     {
1761:                        index = map_S_to_C[i3];
1762:                        if (S_marker_offd[index] < jj_row_begin_offd)
1763:                        {
1764:                           S_marker_offd[index] = num_nonzeros_offd;
1765:                           num_nonzeros_offd++;
1766:                        }
1767:                     }
1768:                  }
1769:              }
1770:              for (jj1 = S_offd_i[i1]; jj1 < S_offd_i[i1+1]; jj1++)
1771:              {
1772:                  i2 = S_offd_j[jj1];
1773:                  if (CF_marker_offd[i2] > 0)
1774:                  {
1775:                     index = map_S_to_C[i2];
1776:                     if (S_marker_offd[index] < jj_row_begin_offd)
1777:                     {
1778:                        S_marker_offd[index] = num_nonzeros_offd;
1779:                        num_nonzeros_offd++;
1780:                     }
1781:                  }
1782:                  for (jj2 = S_ext_diag_i[i2]; jj2 < S_ext_diag_i[i2+1]; jj2++)
1783:                  {
1784:                     i3 = S_ext_diag_j[jj2];
1785:                     if (i3 != ic && S_marker[i3] < jj_row_begin_diag)
1786:                     {
1787:                        S_marker[i3] = num_nonzeros_diag;
1788:                        num_nonzeros_diag++;
1789:                     }
1790:                  }
1791:                  for (jj2 = S_ext_offd_i[i2]; jj2 < S_ext_offd_i[i2+1]; jj2++)
1792:                  {
1793:                     i3 = S_ext_offd_j[jj2];
1794:                     if (S_marker_offd[i3] < jj_row_begin_offd)
1795:                     {
1796:                        S_marker_offd[i3] = num_nonzeros_offd;
1797:                        num_nonzeros_offd++;
[...]
1806:          for (ic = ic_begin; ic < ic_end; ic++)
[...]
1812:              HYPRE_Int i1 = coarse_to_fine[ic];
1813:        
1814:              HYPRE_Int jj_row_begin_diag = jj_count_diag;
1815:              HYPRE_Int jj_row_begin_offd = jj_count_offd;
1816: 
1817:              C_diag_i[ic] = num_nonzeros_diag;
1818:              if (num_cols_offd_C)
1819:              {
1820:                 C_offd_i[ic] = num_nonzeros_offd;
1821:              }
1822: 
1823:              for (jj1 = S_diag_i[i1]; jj1 < S_diag_i[i1+1]; jj1++)
1824:              {
1825:                  i2 = S_diag_j[jj1];
1826:                  if (CF_marker[i2] > 0)
1827:                  {
1828:                     index = fine_to_coarse[i2];
1829:                     if (S_marker[index] < jj_row_begin_diag)
1830:                     {
1831:                        S_marker[index] = jj_count_diag;
1832:                        C_temp_diag_data[jj_count_diag - jj_row_begin_diag] = 2;
1833:                        jj_count_diag++;
1834:                     }
1835:                     else
1836:                     {
1837:                        C_temp_diag_data[S_marker[index] - jj_row_begin_diag] += 2;
1838:                     }
1839:                  }
1840:                  for (jj2 = S_diag_i[i2]; jj2 < S_diag_i[i2+1]; jj2++)
1841:                  {
1842:                     i3 = S_diag_j[jj2];
1843:                     if (CF_marker[i3] > 0 && fine_to_coarse[i3] != ic)
1844:                     {
1845:                        index = fine_to_coarse[i3];
1846:                        if (S_marker[index] < jj_row_begin_diag)
1847:                        {
1848:                           S_marker[index] = jj_count_diag;
1849:                           C_temp_diag_data[jj_count_diag - jj_row_begin_diag] = 1;
1850:                           jj_count_diag++;
1851:                        }
1852:                        else
1853:                        {
1854:                           C_temp_diag_data[S_marker[index] - jj_row_begin_diag]++;
1855:                        }
1856:                     }
1857:                  }
1858:                  for (jj2 = S_offd_i[i2]; jj2 < S_offd_i[i2+1]; jj2++)
1859:                  {
1860:                     i3 = S_offd_j[jj2];
1861:                     if (CF_marker_offd[i3] > 0)
1862:                     {
1863:                        index = map_S_to_C[i3];
1864:                        if (S_marker_offd[index] < jj_row_begin_offd)
1865:                        {
1866:                           S_marker_offd[index] = jj_count_offd;
1867:                           C_temp_offd_data[jj_count_offd - jj_row_begin_offd] = 1;
1868:                           jj_count_offd++;
1869:                        }
1870:                        else
1871:                        {
1872:                           C_temp_offd_data[S_marker_offd[index] - jj_row_begin_offd]++;
1873:                        }
1874:                     }
1875:                  }
1876:              }
1877:              for (jj1 = S_offd_i[i1]; jj1 < S_offd_i[i1+1]; jj1++)
1878:              {
1879:                  i2 = S_offd_j[jj1];
1880:                  if (CF_marker_offd[i2] > 0)
1881:                  {
1882:                     index = map_S_to_C[i2];
1883:                     if (S_marker_offd[index] < jj_row_begin_offd)
1884:                     {
1885:                        S_marker_offd[index] = jj_count_offd;
1886:                        C_temp_offd_data[jj_count_offd - jj_row_begin_offd] = 2;
1887:                        jj_count_offd++;
1888:                     }
1889:                     else
1890:                     {
1891:                        C_temp_offd_data[S_marker_offd[index] - jj_row_begin_offd] += 2;
1892:                     }
1893:                  }
1894:                  for (jj2 = S_ext_diag_i[i2]; jj2 < S_ext_diag_i[i2+1]; jj2++)
1895:                  {
1896:                     i3 = S_ext_diag_j[jj2];
1897:                     if (i3 != ic)
1898:                     {
1899:                        if (S_marker[i3] < jj_row_begin_diag)
1900:                        {
1901:                           S_marker[i3] = jj_count_diag;
1902:                           C_temp_diag_data[jj_count_diag - jj_row_begin_diag] = 1;
1903:                           jj_count_diag++;
1904:                        }
1905:                        else
1906:                        {
1907:                           C_temp_diag_data[S_marker[i3] - jj_row_begin_diag]++;
1908:                        }
1909:                     }
1910:                  }
1911:                  for (jj2 = S_ext_offd_i[i2]; jj2 < S_ext_offd_i[i2+1]; jj2++)
1912:                  {
1913:                     i3 = S_ext_offd_j[jj2];
1914:                     if (S_marker_offd[i3] < jj_row_begin_offd)
1915:                     {
1916:                        S_marker_offd[i3] = jj_count_offd;
1917:                        C_temp_offd_data[jj_count_offd - jj_row_begin_offd] = 1;
1918:                        jj_count_offd++;
1919:                     }
1920:                     else
1921:                     {
1922:                        C_temp_offd_data[S_marker_offd[i3] - jj_row_begin_offd]++;
1923:                     }
1924:                  }
1925:              }
1926: 
1927:              for (jj1 = jj_row_begin_diag; jj1 < jj_count_diag; jj1++)
1928:              {
1929:                  if (C_temp_diag_data[jj1 - jj_row_begin_diag] >= num_paths)
1930:                  {
1931:                     ++num_nonzeros_diag;
1932:                  }
1933:                  C_temp_diag_data[jj1 - jj_row_begin_diag] = 0;
1934:              }
1935:              for (jj1 = jj_row_begin_offd; jj1 < jj_count_offd; jj1++)
1936:              {
1937:                  if (C_temp_offd_data[jj1 - jj_row_begin_offd] >= num_paths)
1938:                  {
1939:                     ++num_nonzeros_offd;
1940:                  }
1941:                  C_temp_offd_data[jj1 - jj_row_begin_offd] = 0;
1942:              }
1943:          } /* for each row */
1944:       } /* num_paths > 1 */
1945: 
1946:       hypre_prefix_sum_pair(
1947:          &num_nonzeros_diag, &C_diag_i[num_coarse],
1948:          &num_nonzeros_offd, &C_offd_i[num_coarse],
1949:          prefix_sum_workspace);
1950: 
1951:       for (i1 = 0; i1 < num_coarse; i1++)
1952:       {
1953:          S_marker[i1] = -1;
1954:       }
1955:       for (i1 = 0; i1 < num_cols_offd_C; i1++)
1956:       {
1957:          S_marker_offd[i1] = -1;
1958:       }
1959: 
1960: #ifdef HYPRE_USING_OPENMP
1961: #pragma omp barrier
1962: #pragma omp master
1963: #endif
1964:       {
1965:          if (C_diag_i[num_coarse])
1966:          {
1967:             C_diag_j = hypre_TAlloc(HYPRE_Int, C_diag_i[num_coarse]);
1968:          }
1969:          if (C_offd_i[num_coarse])
1970:          {
1971:             C_offd_j = hypre_TAlloc(HYPRE_Int, C_offd_i[num_coarse]);
[...]
1978:       for (ic = ic_begin; ic < ic_end - 1; ic++)
1979:       {
1980:          if (C_diag_i[ic+1] == C_diag_i[ic] && C_offd_i[ic+1] == C_offd_i[ic])
1981:             CF_marker[coarse_to_fine[ic]] = 2;
1982: 
1983:          C_diag_i[ic] += num_nonzeros_diag;
1984:          C_offd_i[ic] += num_nonzeros_offd;
1985:       }
1986:       if (ic_begin < ic_end)
1987:       {
1988:          C_diag_i[ic] += num_nonzeros_diag;
1989:          C_offd_i[ic] += num_nonzeros_offd;
1990: 
1991:          HYPRE_Int next_C_diag_i = prefix_sum_workspace[2*(my_thread_num + 1)];
1992:          HYPRE_Int next_C_offd_i = prefix_sum_workspace[2*(my_thread_num + 1) + 1];
1993: 
1994:          if (next_C_diag_i == C_diag_i[ic] && next_C_offd_i == C_offd_i[ic])
1995:             CF_marker[coarse_to_fine[ic]] = 2;
1996:       }
1997: 
1998:       if (num_paths == 1)
1999:       {
2000:          for (ic = ic_begin; ic < ic_end; ic++)
[...]
2006:              HYPRE_Int i1 = coarse_to_fine[ic];
2007:        
2008:              HYPRE_Int jj_row_begin_diag = num_nonzeros_diag;
2009:              HYPRE_Int jj_row_begin_offd = num_nonzeros_offd;
2010: 
2011:              for (jj1 = S_diag_i[i1]; jj1 < S_diag_i[i1+1]; jj1++)
2012:              {
2013:                  i2 = S_diag_j[jj1];
2014:                  if (CF_marker[i2] > 0)
2015:                  {
2016:                     index = fine_to_coarse[i2];
2017:                     if (S_marker[index] < jj_row_begin_diag)
2018:                     {
2019:                        S_marker[index] = num_nonzeros_diag;
2020:                        C_diag_j[num_nonzeros_diag] = index;
2021:                        num_nonzeros_diag++;
2022:                     }
2023:                  }
2024:                  for (jj2 = S_diag_i[i2]; jj2 < S_diag_i[i2+1]; jj2++)
2025:                  {
2026:                     i3 = S_diag_j[jj2];
2027:                     if (CF_marker[i3] > 0)
2028:                     {
2029:                        index = fine_to_coarse[i3];
2030:                        if (index != ic && S_marker[index] < jj_row_begin_diag)
2031:                        {
2032:                           S_marker[index] = num_nonzeros_diag;
2033:                           C_diag_j[num_nonzeros_diag] = index;
2034:                           num_nonzeros_diag++;
2035:                        }
2036:                     }
2037:                  }
2038:                  for (jj2 = S_offd_i[i2]; jj2 < S_offd_i[i2+1]; jj2++)
2039:                  {
2040:                     i3 = S_offd_j[jj2];
2041:                     if (CF_marker_offd[i3] > 0)
2042:                     {
2043:                        index = map_S_to_C[i3];
2044:                        if (S_marker_offd[index] < jj_row_begin_offd)
2045:                        {
2046:                           S_marker_offd[index] = num_nonzeros_offd;
2047:                           C_offd_j[num_nonzeros_offd] = index;
2048:                           num_nonzeros_offd++;
2049:                        }
2050:                     }
2051:                  }
2052:              }
2053:              for (jj1 = S_offd_i[i1]; jj1 < S_offd_i[i1+1]; jj1++)
2054:              {
2055:                  i2 = S_offd_j[jj1];
2056:                  if (CF_marker_offd[i2] > 0)
2057:                  {
2058:                     index = map_S_to_C[i2];
2059:                     if (S_marker_offd[index] < jj_row_begin_offd)
2060:                     {
2061:                        S_marker_offd[index] = num_nonzeros_offd;
2062:                        C_offd_j[num_nonzeros_offd] = index;
2063:                        num_nonzeros_offd++;
2064:                     }
2065:                  }
2066:                  for (jj2 = S_ext_diag_i[i2]; jj2 < S_ext_diag_i[i2+1]; jj2++)
2067:                  {
2068:                     i3 = S_ext_diag_j[jj2];
2069:                     if (i3 != ic && S_marker[i3] < jj_row_begin_diag)
2070:                     {
2071:                        S_marker[i3] = num_nonzeros_diag;
2072:                        C_diag_j[num_nonzeros_diag] = i3;
2073:                        num_nonzeros_diag++;
2074:                     }
2075:                  }
2076:                  for (jj2 = S_ext_offd_i[i2]; jj2 < S_ext_offd_i[i2+1]; jj2++)
2077:                  {
2078:                     i3 = S_ext_offd_j[jj2];
2079:                     if (S_marker_offd[i3] < jj_row_begin_offd)
2080:                     {
2081:                        S_marker_offd[i3] = num_nonzeros_offd;
2082:                        C_offd_j[num_nonzeros_offd] = i3;
2083:                        num_nonzeros_offd++;
[...]
2095:          for (ic = ic_begin; ic < ic_end; ic++)
[...]
2101:              HYPRE_Int i1 = coarse_to_fine[ic];
2102:        
2103:              HYPRE_Int jj_row_begin_diag = jj_count_diag;
2104:              HYPRE_Int jj_row_begin_offd = jj_count_offd;
2105: 
2106:              for (jj1 = S_diag_i[i1]; jj1 < S_diag_i[i1+1]; jj1++)
2107:              {
2108:                  i2 = S_diag_j[jj1];
2109:                  if (CF_marker[i2] > 0)
2110:                  {
2111:                     index = fine_to_coarse[i2];
2112:                     if (S_marker[index] < jj_row_begin_diag)
2113:                     {
2114:                        S_marker[index] = jj_count_diag;
2115:                        C_temp_diag_j[jj_count_diag - jj_row_begin_diag] = index;
2116:                        C_temp_diag_data[jj_count_diag - jj_row_begin_diag] = 2;
2117:                        jj_count_diag++;
2118:                     }
2119:                     else
2120:                     {
2121:                        C_temp_diag_data[S_marker[index] - jj_row_begin_diag] += 2;
2122:                     }
2123:                  }
2124:                  for (jj2 = S_diag_i[i2]; jj2 < S_diag_i[i2+1]; jj2++)
2125:                  {
2126:                     i3 = S_diag_j[jj2];
2127:                     if (CF_marker[i3] > 0 && fine_to_coarse[i3] != ic)
2128:                     {
2129:                        index = fine_to_coarse[i3];
2130:                        if (S_marker[index] < jj_row_begin_diag)
2131:                        {
2132:                           S_marker[index] = jj_count_diag;
2133:                           C_temp_diag_j[jj_count_diag - jj_row_begin_diag] = index;
2134:                           C_temp_diag_data[jj_count_diag - jj_row_begin_diag] = 1;
2135:                           jj_count_diag++;
2136:                        }
2137:                        else
2138:                        {
2139:                           C_temp_diag_data[S_marker[index] - jj_row_begin_diag]++;
2140:                        }
2141:                     }
2142:                  }
2143:                  for (jj2 = S_offd_i[i2]; jj2 < S_offd_i[i2+1]; jj2++)
2144:                  {
2145:                     i3 = S_offd_j[jj2];
2146:                     if (CF_marker_offd[i3] > 0)
2147:                     {
2148:                        index = map_S_to_C[i3];
2149:                        if (S_marker_offd[index] < jj_row_begin_offd)
2150:                        {
2151:                           S_marker_offd[index] = jj_count_offd;
2152:                           C_temp_offd_j[jj_count_offd - jj_row_begin_offd] = index;
2153:                           C_temp_offd_data[jj_count_offd - jj_row_begin_offd] = 1;
2154:                           jj_count_offd++;
2155:                        }
2156:                        else
2157:                        {
2158:                           C_temp_offd_data[S_marker_offd[index] - jj_row_begin_offd]++;
2159:                        }
2160:                     }
2161:                  }
2162:              }
2163:              for (jj1 = S_offd_i[i1]; jj1 < S_offd_i[i1+1]; jj1++)
2164:              {
2165:                  i2 = S_offd_j[jj1];
2166:                  if (CF_marker_offd[i2] > 0)
2167:                  {
2168:                     index = map_S_to_C[i2];
2169:                     if (S_marker_offd[index] < jj_row_begin_offd)
2170:                     {
2171:                        S_marker_offd[index] = jj_count_offd;
2172:                        C_temp_offd_j[jj_count_offd - jj_row_begin_offd] = index;
2173:                        C_temp_offd_data[jj_count_offd - jj_row_begin_offd] = 2;
2174:                        jj_count_offd++;
2175:                     }
2176:                     else
2177:                     {
2178:                        C_temp_offd_data[S_marker_offd[index] - jj_row_begin_offd] += 2;
2179:                     }
2180:                  }
2181:                  for (jj2 = S_ext_diag_i[i2]; jj2 < S_ext_diag_i[i2+1]; jj2++)
2182:                  {
2183:                     i3 = S_ext_diag_j[jj2];
2184:                     if (i3 != ic)
2185:                     {
2186:                        if (S_marker[i3] < jj_row_begin_diag)
2187:                        {
2188:                           S_marker[i3] = jj_count_diag;
2189:                           C_temp_diag_j[jj_count_diag - jj_row_begin_diag] = i3;
2190:                           C_temp_diag_data[jj_count_diag - jj_row_begin_diag] = 1;
2191:                           jj_count_diag++;
2192:                        }
2193:                        else
2194:                        {
2195:                           C_temp_diag_data[S_marker[i3] - jj_row_begin_diag]++;
2196:                        }
2197:                     }
2198:                  }
2199:                  for (jj2 = S_ext_offd_i[i2]; jj2 < S_ext_offd_i[i2+1]; jj2++)
2200:                  {
2201:                     i3 = S_ext_offd_j[jj2];
2202:                     if (S_marker_offd[i3] < jj_row_begin_offd)
2203:                     {
2204:                        S_marker_offd[i3] = jj_count_offd;
2205:                        C_temp_offd_j[jj_count_offd - jj_row_begin_offd] = i3;
2206:                        C_temp_offd_data[jj_count_offd - jj_row_begin_offd] = 1;
2207:                        jj_count_offd++;
2208:                     }
2209:                     else
2210:                     {
2211:                        C_temp_offd_data[S_marker_offd[i3] - jj_row_begin_offd]++;
2212:                     }
2213:                  }
2214:              }
2215: 
2216:              for (jj1 = jj_row_begin_diag; jj1 < jj_count_diag; jj1++)
2217:              {
2218:                  if (C_temp_diag_data[jj1 - jj_row_begin_diag] >= num_paths)
2219:                  {
2220:                     C_diag_j[num_nonzeros_diag++] = C_temp_diag_j[jj1 - jj_row_begin_diag];
2221:                  }
2222:                  C_temp_diag_data[jj1 - jj_row_begin_diag] = 0;
2223:              }
2224:              for (jj1 = jj_row_begin_offd; jj1 < jj_count_offd; jj1++)
2225:              {
2226:                  if (C_temp_offd_data[jj1 - jj_row_begin_offd] >= num_paths)
2227:                  {
2228:                     C_offd_j[num_nonzeros_offd++] = C_temp_offd_j[jj1 - jj_row_begin_offd];
2229:                  }
2230:                  C_temp_offd_data[jj1 - jj_row_begin_offd] = 0;
