_cqa_text_report = {
  paths = {
    {
      hint = {
        {
          details = "Calling (and then returning from) a function prevents many compiler optimizations (like vectorization), breaks control flow (which reduces pipeline performance) and executes extra instructions to save/restore the registers used inside it, which is very expensive (dozens of cycles). Consider to inline small functions.\n - GOMP_atomic_start@plt: 1 occurrences\n - omp_get_num_threads@plt: 1 occurrences\n - omp_get_thread_num@plt: 1 occurrences\n",
          title = "CALL instructions",
          txt = "Detected function call instructions.\n",
        },
        {
          details = "These instructions generate more than one micro-operation and only one of them can be decoded during a cycle and the extra micro-operations increase pressure on execution units.\n - IDIV: 1 occurrences\n - VPINSRQ: 1 occurrences\n - VZEROUPPER: 1 occurrences\n",
          title = "Complex instructions",
          txt = "Detected COMPLEX INSTRUCTIONS.\n",
        },
        {
          workaround = "Avoid mixing data with different types. In particular, check if the type of constants is the same as array elements.",
          details = " - CQTO: 1 occurrences\n",
          title = "Conversion instructions",
          txt = "Detected expensive conversion instructions.",
        },
        {
          title = "Type of elements and instruction set",
          txt = "",
        },
        {
          title = "Matching between your function (in the source code) and the binary function",
          txt = "The binary function does not contain any FP arithmetical operations.\nThe binary function is loading 560 bytes.\nThe binary function is storing 32 bytes.",
        },
      },
      expert = {
        {
          title = "General properties",
          txt = "nb instructions    : 239\nnb uops            : 302\nloop length        : 1128\nused x86 registers : 14\nused mmx registers : 0\nused xmm registers : 16\nused ymm registers : 9\nused zmm registers : 16\nnb stack references: 2\n",
        },
        {
          title = "Front-end",
          txt = "MACRO FUSION NOT POSSIBLE\nFIT IN UOP CACHE\nmicro-operation queue: 75.50 cycles\nfront end            : 75.50 cycles\n",
        },
        {
          title = "Back-end",
          txt = "       | P0    | P1    | P2    | P3    | P4   | P5    | P6    | P7\n--------------------------------------------------------------------\nuops   | 57.50 | 57.50 | 10.00 | 10.00 | 9.00 | 57.50 | 57.50 | 9.00\ncycles | 57.50 | 57.50 | 10.00 | 10.00 | 9.00 | 57.50 | 57.50 | 9.00\n\nCycles executing div or sqrt instructions: 24.00-90.00\n",
        },
        {
          title = "Front-end and detailed OoO resources (UFS)",
          txt = "FE+BE cycles: 60.96-104.55\nStall cycles: 0.00-43.50\n",
        },
        {
          title = "Cycles summary",
          txt = "Front-end : 75.50\nDispatch  : 57.50\nDIV/SQRT  : 24.00-90.00\nOverall L1: 75.50-90.00\n",
        },
        {
          title = "Vectorization ratios",
          txt = "all     : 68%\nload    : 90%\nstore   : 100%\nmul     : NA (no mul vectorizable/vectorized instructions)\nadd-sub : 100%\nfma     : NA (no fma vectorizable/vectorized instructions)\ndiv/sqrt: 0%\nother   : 55%\n",
        },
        {
          title = "Vector efficiency ratios",
          txt = "all     : 53%\nload    : 73%\nstore   : 25%\nmul     : NA (no mul vectorizable/vectorized instructions)\nadd-sub : 67%\nfma     : NA (no fma vectorizable/vectorized instructions)\ndiv/sqrt: 12%\nother   : 47%\n",
        },
        {
          title = "Cycles and memory resources usage",
          txt = "Detected masked instructions: assuming all mask elements are active.\nAssuming all data fit into the L1 cache, each call to the function takes 90.00 cycles. At this rate:\n - 4% of peak load performance is reached (6.22 out of 128.00 bytes loaded per cycle (GB/s @ 1GHz))\n - 0% of peak store performance is reached (0.36 out of 64.00 bytes stored per cycle (GB/s @ 1GHz))\n",
        },
        {
          title = "Front-end bottlenecks",
          txt = "Found no such bottlenecks.",
        },
        {
          title = "ASM code",
          txt = "In the binary file, the address of the function is: 474950\n\nInstruction                                                | Nb FU | P0    | P1    | P2   | P3   | P4 | P5    | P6    | P7   | Latency | Recip. throughput\n----------------------------------------------------------------------------------------------------------------------------------------------------------\nPUSH %RBP                                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 3       | 1\nMOV %RSP,%RBP                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nPUSH %R13                                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 3       | 1\nPUSH %R12                                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 3       | 1\nPUSH %RBX                                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 3       | 1\nMOV %RDI,%RBX                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nAND $-0x40,%RSP                                            | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSUB $0x40,%RSP                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOV (%RDI),%R12                                            | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCALL 40f0b0 <omp_get_num_threads@plt>                      | 2     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 1     | 0.33 | 0       | 1\nMOV %EAX,%R13D                                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nCALL 40f1f0 <omp_get_thread_num@plt>                       | 2     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 1     | 0.33 | 0       | 1\nMOVSXD %R13D,%RSI                                          | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOVSXD %EAX,%RCX                                           | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOV 0x8(%RBX),%RAX                                         | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCQTO                                                       | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nIDIV %RSI                                                  | 57    | 14.25 | 14.25 | 0    | 0    | 0  | 14.25 | 14.25 | 0    | 42-95   | 24-90\nCMP %RDX,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJL 474ed0 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x580>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nIMUL %RAX,%RCX                                             | 1     | 0     | 1     | 0    | 0    | 0  | 0     | 0     | 0    | 3       | 1\nVPXOR %XMM3,%XMM3,%XMM3                                    | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nADD %RCX,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nLEA (%RAX,%RDX,1),%RDI                                     | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP %RDI,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJGE 474e9c <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x54c> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nLEA -0x1(%RAX),%R9                                         | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nMOV %RDX,%R8                                               | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nCMP $0x6,%R9                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJBE 474ee8 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x598> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nMOV %RAX,%RCX                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nLEA (%R12,%RDX,8),%R13                                     | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPXOR %XMM0,%XMM0,%XMM0                                    | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nMOV $0x1,%ESI                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSHR $0x3,%RCX                                              | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nVMOVDQA64 %ZMM0,%ZMM2                                      | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPBROADCASTQ %RSI,%ZMM5                                    | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nMOV $-0x3,%R9                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSAL $0x6,%RCX                                              | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nVPTERNLOGD $-0x1,%ZMM3,%ZMM3,%ZMM3                         | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPBROADCASTQ %R9,%ZMM4                                     | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nLEA (%RCX,%R13,1),%R10                                     | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nSUB $0x40,%RCX                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSHR $0x6,%RCX                                              | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nINC %RCX                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nAND $0x7,%ECX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474b99 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x249>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x1,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474b5f <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x20f>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x2,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474b2e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x1de>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x3,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474afd <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x1ad>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x4,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474acc <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x17c>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x5,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474a9b <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x14b>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x6,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474a6a <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x11a>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nVMOVDQU64 (%R13),%ZMM6                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM6,%K1                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM6,%K2                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM2{%K1}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM7{%K2}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM2,%ZMM0,%ZMM2                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM7,%ZMM0,%ZMM0                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM8                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM8,%K3                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM8,%K4                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM9{%K3}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM10{%K4}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM9,%ZMM2,%ZMM2                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM10,%ZMM0,%ZMM0                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM11                                    | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM11,%K5                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM11,%K6                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM12{%K5}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM13{%K6}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM12,%ZMM2,%ZMM2                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM13,%ZMM0,%ZMM0                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM14                                    | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM14,%K7                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM14,%K1                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM15{%K7}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM1{%K1}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM15,%ZMM2,%ZMM2                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM1,%ZMM0,%ZMM0                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM7                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM7,%K2                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM7,%K3                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM6{%K2}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM8{%K3}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM6,%ZMM2,%ZMM2                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM8,%ZMM0,%ZMM0                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM9                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM9,%K4                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM9,%K5                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM10{%K4}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM11{%K5}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM10,%ZMM2,%ZMM2                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM11,%ZMM0,%ZMM0                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM12                                    | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM12,%K6                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM12,%K7                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM13{%K6}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM14{%K7}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM13,%ZMM2,%ZMM2                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM14,%ZMM0,%ZMM0                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP %R10,%R13                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474d11 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x3c1>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nVEXTRACTI64X4 $0x1,%ZMM0,%YMM3                             | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nVEXTRACTI64X4 $0x1,%ZMM2,%YMM10                            | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nMOV %RAX,%R11                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPADDQ %YMM3,%YMM0,%YMM1                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nAND $-0x8,%R11                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVEXTRACTI64X2 $0x1,%YMM1,%XMM15                            | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nADD %R11,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPADDQ %XMM15,%XMM1,%XMM0                                  | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPSRLDQ $0x8,%XMM0,%XMM7                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPADDQ %XMM7,%XMM0,%XMM8                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPADDQ %YMM10,%YMM2,%YMM0                                  | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVEXTRACTI64X2 $0x1,%YMM0,%XMM6                             | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nVMOVQ %XMM8,%RCX                                           | 1     | 1     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 2       | 1\nVPADDQ %XMM6,%XMM0,%XMM12                                  | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPSRLDQ $0x8,%XMM12,%XMM13                                 | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPADDQ %XMM13,%XMM12,%XMM2                                 | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVMOVQ %XMM2,%R10                                           | 1     | 1     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 2       | 1\nCMP %R11,%RAX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nSUB %R11,%RAX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nLEA -0x1(%RAX),%R13                                        | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP $0x3,%R13                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJBE 474df7 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x4a7> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nADD %R8,%R11                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOV $0x1,%R8D                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVMOVDQU (%R12,%R11,8),%YMM14                               | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nVPBROADCASTQ %R8,%YMM5                                     | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nMOV $-0x3,%R11                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPBROADCASTQ %R11,%YMM15                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %YMM5,%YMM14,%YMM3                                | 1     | 0.50  | 0.50  | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.50\nVPSUBQ %YMM3,%YMM0,%YMM4                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPCMPEQQ %YMM15,%YMM14,%YMM0                               | 1     | 0.50  | 0.50  | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.50\nVEXTRACTI64X2 $0x1,%YMM4,%XMM12                            | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nVPADDQ %XMM12,%XMM4,%XMM13                                 | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPSUBQ %YMM0,%YMM1,%YMM1                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPSRLDQ $0x8,%XMM13,%XMM2                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVEXTRACTI64X2 $0x1,%YMM1,%XMM8                             | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nVPADDQ %XMM2,%XMM13,%XMM14                                 | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPADDQ %XMM8,%XMM1,%XMM9                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVMOVQ %XMM14,%R10                                          | 1     | 1     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 2       | 1\nVPSRLDQ $0x8,%XMM9,%XMM10                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPADDQ %XMM10,%XMM9,%XMM11                                 | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVMOVQ %XMM11,%RCX                                          | 1     | 1     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 2       | 1\nTEST $0x3,%AL                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nAND $-0x4,%RAX                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nADD %RAX,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOV (%R12,%RDX,8),%RAX                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nLEA (,%RDX,8),%RSI                                         | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP $0x1,%RAX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474f00 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x5b0>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $-0x3,%RAX                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSETE %R9B                                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nMOVZX %R9B,%R13D                                           | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.25\nADD %R13,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nLEA 0x1(%RDX),%R8                                          | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP %R8,%RDI                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJLE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nMOV 0x8(%R12,%RSI,1),%R11                                  | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCMP $0x1,%R11                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474f10 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x5c0>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $-0x3,%R11                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSETE %AL                                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nMOVZX %AL,%R9D                                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.25\nADD %R9,%RCX                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nLEA 0x2(%RDX),%R13                                         | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP %R13,%RDI                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJLE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nMOV 0x10(%R12,%RSI,1),%R8                                  | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCMP $0x1,%R8                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474ee0 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x590>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $-0x3,%R8                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSETE %R11B                                                 | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nMOVZX %R11B,%EAX                                           | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.25\nADD %RAX,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nADD $0x3,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nCMP %RDX,%RDI                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJLE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nMOV 0x18(%R12,%RSI,1),%R12                                 | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCMP $0x1,%R12                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474f20 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x5d0>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $-0x3,%R12                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSETE %DL                                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nMOVZX %DL,%EDI                                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.25\nADD %RDI,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVMOVQ %RCX,%XMM5                                           | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPINSRQ $0x1,%R10,%XMM5,%XMM3                              | 2     | 0     | 0     | 0    | 0    | 0  | 2     | 0     | 0    | 3       | 2\nVZEROUPPER                                                 | 4     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 1\nVMOVDQA %XMM3,0x30(%RSP)                                   | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 4       | 1\nCALL 40f2d0 <GOMP_atomic_start@plt>                        | 2     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 1     | 0.33 | 0       | 1\nVMOVDQA 0x30(%RSP),%XMM4                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4       | 0.50\nVPADDQ 0x10(%RBX),%XMM4,%XMM15                             | 1     | 0.33  | 0.33  | 0.50 | 0.50 | 0  | 0.33  | 0     | 0    | 1       | 0.50\nVMOVDQU %XMM15,0x10(%RBX)                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 4       | 1\nLEA -0x18(%RBP),%RSP                                       | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nPOP %RBX                                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 2       | 0.50\nPOP %R12                                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 2       | 0.50\nPOP %R13                                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 2       | 0.50\nPOP %RBP                                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 2       | 0.50\nJMP 40f260 <GOMP_atomic_end@plt>                           | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPW %CS:(%RAX,%RAX,1)                                     | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %RAX                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nXOR %EDX,%EDX                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nJMP 47498c <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x3c>  | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPW (%RAX,%RAX,1)                                         | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %R10                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJMP 474e69 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x519> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX)                                                | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPXOR %XMM1,%XMM1,%XMM1                                    | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nXOR %R11D,%R11D                                            | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nXOR %ECX,%ECX                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nXOR %R10D,%R10D                                            | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA %YMM1,%YMM0                                        | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nJMP 474d70 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x420> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX)                                                | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %R10                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJMP 474e1c <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x4cc> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX,%RAX,1)                                         | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %R10                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJMP 474e42 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x4f2> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX,%RAX,1)                                         | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %R10                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJMP 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX,%RAX,1)                                         | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\n",
        },
      },
      header = {
        "Warnings:\nDetected a function call instruction: ignoring called function instructions.\nRerun with --follow-calls=append to include them to analysis  or with --follow-calls=inline to simulate inlining.",
        "0% of peak computational performance is used (0.00 out of 64.00 FLOP per cycle (GFLOPS @ 1GHz))",
      },
      brief = {
      },
      gain = {
        {
          workaround = " - Try to reorganize arrays of structures to structures of arrays\n - Consider to permute loops (see vectorization gain report)\n",
          title = "Code clean check",
          txt = "Detected a slowdown caused by scalar integer instructions (typically used for address computation).\nBy removing them, you can lower the cost of an iteration from 90.00 to 31.75 cycles (2.83x speedup).",
        },
        {
          workaround = " - Try another compiler or update/tune your current one\n - Make array accesses unit-stride:\n  * If your function streams arrays of structures (AoS), try to use structures of arrays instead (SoA):\nfor(i) a[i].x = b[i].x; (slow, non stride 1) => for(i) a.x[i] = b.x[i]; (fast, stride 1)\n",
          details = "68% of SSE/AVX instructions are used in vector version (process two or more data elements in vector registers):\n - 90% of SSE/AVX loads are used in vector version.\n - 0% of SSE/AVX divide and square root instructions are used in vector version.\n - 55% of SSE/AVX instructions that are not load, store, addition, subtraction nor multiply instructions are used in vector version.\nSince your execution units are vector units, only a fully vectorized function can use their full power.\n",
          title = "Vectorization",
          txt = "Your function is partially vectorized.\nOnly 53% of vector register length is used (average across all SSE/AVX instructions).\nBy fully vectorizing your function, you can lower the cost of an iteration from 90.00 to 60.53 cycles (1.49x speedup).",
        },
        {
          workaround = "Reduce the number of division or square root instructions:\n - If denominator is constant over iterations, use reciprocal (replace x/y with x*(1/y)). Check precision impact. This will be done by your compiler with ffast-math or Ofast\n",
          title = "Execution units bottlenecks",
          txt = "Performance is limited by execution of divide and square root operations (the divide/square root unit is a bottleneck).\n\nBy removing all these bottlenecks, you can lower the cost of an iteration from 90.00 to 75.50 cycles (1.19x speedup).\n",
        },
      },
      potential = {
        {
          title = "Expensive FP math instructions/calls",
          txt = "Detected performance impact from expensive FP math instructions/calls.\nBy removing/reexpressing them, you can lower the cost of an iteration from 90.00 to 61.50 cycles (1.46x speedup).",
        },
        {
          workaround = "If your function is irregular, try to remove or hoist conditional structures out of your function. If it mixes elements of different sizes, try to uniformize them.",
          details = "Vector registers are partially exploited, which is expected if your function is irregular or mixes elements of different sizes.",
          title = "Masked instructions",
          txt = "Detected masked instructions.",
        },
      },
    },
  },
  AVG = {
      hint = {
        {
          details = "Calling (and then returning from) a function prevents many compiler optimizations (like vectorization), breaks control flow (which reduces pipeline performance) and executes extra instructions to save/restore the registers used inside it, which is very expensive (dozens of cycles). Consider to inline small functions.\n - GOMP_atomic_start@plt: 1 occurrences\n - omp_get_num_threads@plt: 1 occurrences\n - omp_get_thread_num@plt: 1 occurrences\n",
          title = "CALL instructions",
          txt = "Detected function call instructions.\n",
        },
        {
          details = "These instructions generate more than one micro-operation and only one of them can be decoded during a cycle and the extra micro-operations increase pressure on execution units.\n - IDIV: 1 occurrences\n - VPINSRQ: 1 occurrences\n - VZEROUPPER: 1 occurrences\n",
          title = "Complex instructions",
          txt = "Detected COMPLEX INSTRUCTIONS.\n",
        },
        {
          workaround = "Avoid mixing data with different types. In particular, check if the type of constants is the same as array elements.",
          details = " - CQTO: 1 occurrences\n",
          title = "Conversion instructions",
          txt = "Detected expensive conversion instructions.",
        },
        {
          title = "Type of elements and instruction set",
          txt = "",
        },
        {
          title = "Matching between your function (in the source code) and the binary function",
          txt = "The binary function does not contain any FP arithmetical operations.\nThe binary function is loading 560 bytes.\nThe binary function is storing 32 bytes.",
        },
      },
      expert = {
        {
          title = "General properties",
          txt = "nb instructions    : 239\nnb uops            : 302\nloop length        : 1128\nused x86 registers : 14\nused mmx registers : 0\nused xmm registers : 16\nused ymm registers : 9\nused zmm registers : 16\nnb stack references: 2\n",
        },
        {
          title = "Front-end",
          txt = "MACRO FUSION NOT POSSIBLE\nFIT IN UOP CACHE\nmicro-operation queue: 75.50 cycles\nfront end            : 75.50 cycles\n",
        },
        {
          title = "Back-end",
          txt = "       | P0    | P1    | P2    | P3    | P4   | P5    | P6    | P7\n--------------------------------------------------------------------\nuops   | 57.50 | 57.50 | 10.00 | 10.00 | 9.00 | 57.50 | 57.50 | 9.00\ncycles | 57.50 | 57.50 | 10.00 | 10.00 | 9.00 | 57.50 | 57.50 | 9.00\n\nCycles executing div or sqrt instructions: 24.00-90.00\n",
        },
        {
          title = "Front-end and detailed OoO resources (UFS)",
          txt = "FE+BE cycles: 60.96-104.55\nStall cycles: 0.00-43.50\n",
        },
        {
          title = "Cycles summary",
          txt = "Front-end : 75.50\nDispatch  : 57.50\nDIV/SQRT  : 24.00-90.00\nOverall L1: 75.50-90.00\n",
        },
        {
          title = "Vectorization ratios",
          txt = "all     : 68%\nload    : 90%\nstore   : 100%\nmul     : NA (no mul vectorizable/vectorized instructions)\nadd-sub : 100%\nfma     : NA (no fma vectorizable/vectorized instructions)\ndiv/sqrt: 0%\nother   : 55%\n",
        },
        {
          title = "Vector efficiency ratios",
          txt = "all     : 53%\nload    : 73%\nstore   : 25%\nmul     : NA (no mul vectorizable/vectorized instructions)\nadd-sub : 67%\nfma     : NA (no fma vectorizable/vectorized instructions)\ndiv/sqrt: 12%\nother   : 47%\n",
        },
        {
          title = "Cycles and memory resources usage",
          txt = "Detected masked instructions: assuming all mask elements are active.\nAssuming all data fit into the L1 cache, each call to the function takes 90.00 cycles. At this rate:\n - 4% of peak load performance is reached (6.22 out of 128.00 bytes loaded per cycle (GB/s @ 1GHz))\n - 0% of peak store performance is reached (0.36 out of 64.00 bytes stored per cycle (GB/s @ 1GHz))\n",
        },
        {
          title = "Front-end bottlenecks",
          txt = "Found no such bottlenecks.",
        },
        {
          title = "ASM code",
          txt = "In the binary file, the address of the function is: 474950\n\nInstruction                                                | Nb FU | P0    | P1    | P2   | P3   | P4 | P5    | P6    | P7   | Latency | Recip. throughput\n----------------------------------------------------------------------------------------------------------------------------------------------------------\nPUSH %RBP                                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 3       | 1\nMOV %RSP,%RBP                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nPUSH %R13                                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 3       | 1\nPUSH %R12                                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 3       | 1\nPUSH %RBX                                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 3       | 1\nMOV %RDI,%RBX                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nAND $-0x40,%RSP                                            | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSUB $0x40,%RSP                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOV (%RDI),%R12                                            | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCALL 40f0b0 <omp_get_num_threads@plt>                      | 2     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 1     | 0.33 | 0       | 1\nMOV %EAX,%R13D                                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nCALL 40f1f0 <omp_get_thread_num@plt>                       | 2     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 1     | 0.33 | 0       | 1\nMOVSXD %R13D,%RSI                                          | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOVSXD %EAX,%RCX                                           | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOV 0x8(%RBX),%RAX                                         | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCQTO                                                       | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nIDIV %RSI                                                  | 57    | 14.25 | 14.25 | 0    | 0    | 0  | 14.25 | 14.25 | 0    | 42-95   | 24-90\nCMP %RDX,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJL 474ed0 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x580>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nIMUL %RAX,%RCX                                             | 1     | 0     | 1     | 0    | 0    | 0  | 0     | 0     | 0    | 3       | 1\nVPXOR %XMM3,%XMM3,%XMM3                                    | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nADD %RCX,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nLEA (%RAX,%RDX,1),%RDI                                     | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP %RDI,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJGE 474e9c <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x54c> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nLEA -0x1(%RAX),%R9                                         | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nMOV %RDX,%R8                                               | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nCMP $0x6,%R9                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJBE 474ee8 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x598> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nMOV %RAX,%RCX                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nLEA (%R12,%RDX,8),%R13                                     | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPXOR %XMM0,%XMM0,%XMM0                                    | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nMOV $0x1,%ESI                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSHR $0x3,%RCX                                              | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nVMOVDQA64 %ZMM0,%ZMM2                                      | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPBROADCASTQ %RSI,%ZMM5                                    | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nMOV $-0x3,%R9                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSAL $0x6,%RCX                                              | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nVPTERNLOGD $-0x1,%ZMM3,%ZMM3,%ZMM3                         | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPBROADCASTQ %R9,%ZMM4                                     | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nLEA (%RCX,%R13,1),%R10                                     | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nSUB $0x40,%RCX                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSHR $0x6,%RCX                                              | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nINC %RCX                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nAND $0x7,%ECX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474b99 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x249>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x1,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474b5f <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x20f>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x2,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474b2e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x1de>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x3,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474afd <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x1ad>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x4,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474acc <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x17c>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x5,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474a9b <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x14b>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $0x6,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474a6a <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x11a>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nVMOVDQU64 (%R13),%ZMM6                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM6,%K1                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM6,%K2                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM2{%K1}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM7{%K2}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM2,%ZMM0,%ZMM2                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM7,%ZMM0,%ZMM0                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM8                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM8,%K3                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM8,%K4                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM9{%K3}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM10{%K4}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM9,%ZMM2,%ZMM2                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM10,%ZMM0,%ZMM0                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM11                                    | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM11,%K5                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM11,%K6                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM12{%K5}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM13{%K6}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM12,%ZMM2,%ZMM2                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM13,%ZMM0,%ZMM0                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM14                                    | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM14,%K7                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM14,%K1                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM15{%K7}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM1{%K1}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM15,%ZMM2,%ZMM2                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM1,%ZMM0,%ZMM0                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM7                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM7,%K2                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM7,%K3                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM6{%K2}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM8{%K3}{z}                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM6,%ZMM2,%ZMM2                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM8,%ZMM0,%ZMM0                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM9                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM9,%K4                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM9,%K5                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM10{%K4}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM11{%K5}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM10,%ZMM2,%ZMM2                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM11,%ZMM0,%ZMM0                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVMOVDQU64 (%R13),%ZMM12                                    | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nADD $0x40,%R13                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPCMPEQQ %ZMM5,%ZMM12,%K6                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %ZMM4,%ZMM12,%K7                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVMOVDQA64 %ZMM3,%ZMM13{%K6}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA64 %ZMM3,%ZMM14{%K7}{z}                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPSUBQ %ZMM13,%ZMM2,%ZMM2                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nVPSUBQ %ZMM14,%ZMM0,%ZMM0                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP %R10,%R13                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474d11 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x3c1>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nVEXTRACTI64X4 $0x1,%ZMM0,%YMM3                             | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nVEXTRACTI64X4 $0x1,%ZMM2,%YMM10                            | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nMOV %RAX,%R11                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPADDQ %YMM3,%YMM0,%YMM1                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nAND $-0x8,%R11                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVEXTRACTI64X2 $0x1,%YMM1,%XMM15                            | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nADD %R11,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPADDQ %XMM15,%XMM1,%XMM0                                  | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPSRLDQ $0x8,%XMM0,%XMM7                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPADDQ %XMM7,%XMM0,%XMM8                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPADDQ %YMM10,%YMM2,%YMM0                                  | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVEXTRACTI64X2 $0x1,%YMM0,%XMM6                             | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nVMOVQ %XMM8,%RCX                                           | 1     | 1     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 2       | 1\nVPADDQ %XMM6,%XMM0,%XMM12                                  | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPSRLDQ $0x8,%XMM12,%XMM13                                 | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPADDQ %XMM13,%XMM12,%XMM2                                 | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVMOVQ %XMM2,%R10                                           | 1     | 1     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 2       | 1\nCMP %R11,%RAX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nSUB %R11,%RAX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nLEA -0x1(%RAX),%R13                                        | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP $0x3,%R13                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJBE 474df7 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x4a7> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nADD %R8,%R11                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOV $0x1,%R8D                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVMOVDQU (%R12,%R11,8),%YMM14                               | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 5       | 0.50\nVPBROADCASTQ %R8,%YMM5                                     | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nMOV $-0x3,%R11                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVPBROADCASTQ %R11,%YMM15                                   | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPCMPEQQ %YMM5,%YMM14,%YMM3                                | 1     | 0.50  | 0.50  | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.50\nVPSUBQ %YMM3,%YMM0,%YMM4                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPCMPEQQ %YMM15,%YMM14,%YMM0                               | 1     | 0.50  | 0.50  | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.50\nVEXTRACTI64X2 $0x1,%YMM4,%XMM12                            | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nVPADDQ %XMM12,%XMM4,%XMM13                                 | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPSUBQ %YMM0,%YMM1,%YMM1                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPSRLDQ $0x8,%XMM13,%XMM2                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVEXTRACTI64X2 $0x1,%YMM1,%XMM8                             | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 3       | 1\nVPADDQ %XMM2,%XMM13,%XMM14                                 | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVPADDQ %XMM8,%XMM1,%XMM9                                   | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVMOVQ %XMM14,%R10                                          | 1     | 1     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 2       | 1\nVPSRLDQ $0x8,%XMM9,%XMM10                                  | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPADDQ %XMM10,%XMM9,%XMM11                                 | 1     | 0.33  | 0.33  | 0    | 0    | 0  | 0.33  | 0     | 0    | 1       | 0.33\nVMOVQ %XMM11,%RCX                                          | 1     | 1     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 2       | 1\nTEST $0x3,%AL                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nAND $-0x4,%RAX                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nADD %RAX,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nMOV (%R12,%RDX,8),%RAX                                     | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nLEA (,%RDX,8),%RSI                                         | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP $0x1,%RAX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474f00 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x5b0>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $-0x3,%RAX                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSETE %R9B                                                  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nMOVZX %R9B,%R13D                                           | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.25\nADD %R13,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nLEA 0x1(%RDX),%R8                                          | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP %R8,%RDI                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJLE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nMOV 0x8(%R12,%RSI,1),%R11                                  | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCMP $0x1,%R11                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474f10 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x5c0>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $-0x3,%R11                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSETE %AL                                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nMOVZX %AL,%R9D                                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.25\nADD %R9,%RCX                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nLEA 0x2(%RDX),%R13                                         | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nCMP %R13,%RDI                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJLE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nMOV 0x10(%R12,%RSI,1),%R8                                  | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCMP $0x1,%R8                                               | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474ee0 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x590>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $-0x3,%R8                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSETE %R11B                                                 | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nMOVZX %R11B,%EAX                                           | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.25\nADD %RAX,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nADD $0x3,%RDX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nCMP %RDX,%RDI                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJLE 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e> | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nMOV 0x18(%R12,%RSI,1),%R12                                 | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4-5     | 0.50\nCMP $0x1,%R12                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJE 474f20 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x5d0>  | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 0       | 0.50-1\nCMP $-0x3,%R12                                             | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nSETE %DL                                                   | 1     | 0.50  | 0     | 0    | 0    | 0  | 0     | 0.50  | 0    | 1       | 0.50\nMOVZX %DL,%EDI                                             | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 1       | 0.25\nADD %RDI,%RCX                                              | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nVMOVQ %RCX,%XMM5                                           | 1     | 0     | 0     | 0    | 0    | 0  | 1     | 0     | 0    | 1       | 1\nVPINSRQ $0x1,%R10,%XMM5,%XMM3                              | 2     | 0     | 0     | 0    | 0    | 0  | 2     | 0     | 0    | 3       | 2\nVZEROUPPER                                                 | 4     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 1\nVMOVDQA %XMM3,0x30(%RSP)                                   | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 4       | 1\nCALL 40f2d0 <GOMP_atomic_start@plt>                        | 2     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 1     | 0.33 | 0       | 1\nVMOVDQA 0x30(%RSP),%XMM4                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 4       | 0.50\nVPADDQ 0x10(%RBX),%XMM4,%XMM15                             | 1     | 0.33  | 0.33  | 0.50 | 0.50 | 0  | 0.33  | 0     | 0    | 1       | 0.50\nVMOVDQU %XMM15,0x10(%RBX)                                  | 1     | 0     | 0     | 0.33 | 0.33 | 1  | 0     | 0     | 0.33 | 4       | 1\nLEA -0x18(%RBP),%RSP                                       | 1     | 0     | 0.50  | 0    | 0    | 0  | 0.50  | 0     | 0    | 1       | 0.50\nPOP %RBX                                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 2       | 0.50\nPOP %R12                                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 2       | 0.50\nPOP %R13                                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 2       | 0.50\nPOP %RBP                                                   | 1     | 0     | 0     | 0.50 | 0.50 | 0  | 0     | 0     | 0    | 2       | 0.50\nJMP 40f260 <GOMP_atomic_end@plt>                           | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPW %CS:(%RAX,%RAX,1)                                     | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %RAX                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nXOR %EDX,%EDX                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nJMP 47498c <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x3c>  | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPW (%RAX,%RAX,1)                                         | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %R10                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJMP 474e69 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x519> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX)                                                | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVPXOR %XMM1,%XMM1,%XMM1                                    | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nXOR %R11D,%R11D                                            | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nXOR %ECX,%ECX                                              | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nXOR %R10D,%R10D                                            | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nVMOVDQA %YMM1,%YMM0                                        | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nJMP 474d70 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x420> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX)                                                | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %R10                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJMP 474e1c <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x4cc> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX,%RAX,1)                                         | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %R10                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJMP 474e42 <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x4f2> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX,%RAX,1)                                         | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\nINC %R10                                                   | 1     | 0.25  | 0.25  | 0    | 0    | 0  | 0.25  | 0.25  | 0    | 1       | 0.25\nJMP 474e8e <hypre_BoomerAMGBuildMultipass._omp_fn.0+0x53e> | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 1     | 0    | 0       | 1-2\nNOPL (%RAX,%RAX,1)                                         | 1     | 0     | 0     | 0    | 0    | 0  | 0     | 0     | 0    | 0       | 0.25\n",
        },
      },
      header = {
        "Warnings:\nDetected a function call instruction: ignoring called function instructions.\nRerun with --follow-calls=append to include them to analysis  or with --follow-calls=inline to simulate inlining.",
        "0% of peak computational performance is used (0.00 out of 64.00 FLOP per cycle (GFLOPS @ 1GHz))",
      },
      brief = {
      },
      gain = {
        {
          workaround = " - Try to reorganize arrays of structures to structures of arrays\n - Consider to permute loops (see vectorization gain report)\n",
          title = "Code clean check",
          txt = "Detected a slowdown caused by scalar integer instructions (typically used for address computation).\nBy removing them, you can lower the cost of an iteration from 90.00 to 31.75 cycles (2.83x speedup).",
        },
        {
          workaround = " - Try another compiler or update/tune your current one\n - Make array accesses unit-stride:\n  * If your function streams arrays of structures (AoS), try to use structures of arrays instead (SoA):\nfor(i) a[i].x = b[i].x; (slow, non stride 1) => for(i) a.x[i] = b.x[i]; (fast, stride 1)\n",
          details = "68% of SSE/AVX instructions are used in vector version (process two or more data elements in vector registers):\n - 90% of SSE/AVX loads are used in vector version.\n - 0% of SSE/AVX divide and square root instructions are used in vector version.\n - 55% of SSE/AVX instructions that are not load, store, addition, subtraction nor multiply instructions are used in vector version.\nSince your execution units are vector units, only a fully vectorized function can use their full power.\n",
          title = "Vectorization",
          txt = "Your function is partially vectorized.\nOnly 53% of vector register length is used (average across all SSE/AVX instructions).\nBy fully vectorizing your function, you can lower the cost of an iteration from 90.00 to 60.53 cycles (1.49x speedup).",
        },
        {
          workaround = "Reduce the number of division or square root instructions:\n - If denominator is constant over iterations, use reciprocal (replace x/y with x*(1/y)). Check precision impact. This will be done by your compiler with ffast-math or Ofast\n",
          title = "Execution units bottlenecks",
          txt = "Performance is limited by execution of divide and square root operations (the divide/square root unit is a bottleneck).\n\nBy removing all these bottlenecks, you can lower the cost of an iteration from 90.00 to 75.50 cycles (1.19x speedup).\n",
        },
      },
      potential = {
        {
          title = "Expensive FP math instructions/calls",
          txt = "Detected performance impact from expensive FP math instructions/calls.\nBy removing/reexpressing them, you can lower the cost of an iteration from 90.00 to 61.50 cycles (1.46x speedup).",
        },
        {
          workaround = "If your function is irregular, try to remove or hoist conditional structures out of your function. If it mixes elements of different sizes, try to uniformize them.",
          details = "Vector registers are partially exploited, which is expected if your function is irregular or mixes elements of different sizes.",
          title = "Masked instructions",
          txt = "Detected masked instructions.",
        },
      },
    },
  common = {
    header = {
      "The function is defined in /home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c:272-276.\n",
      "Warnings:\nIgnoring paths for analysis",
    },
  },
}
