Loop Id: 2110 | Module: exec | Source: ams.c:3662-3682 [...] | Coverage: 30.83% |
---|
Loop Id: 2110 | Module: exec | Source: ams.c:3662-3682 [...] | Coverage: 30.83% |
---|
0x513608 INC %RDX |
0x51360b CMP %RDX,0x28(%RSP) |
0x513610 JE 513b8c |
0x513616 MOV 0x30(%RSP),%RCX |
0x51361b MOV (%RCX,%RDX,8),%RAX |
0x51361f LEA (,%RAX,8),%RSI |
0x513627 MOV %RAX,%R14 |
0x51362a LEA (%R11,%RSI,1),%RDI |
0x51362e VCOMISD (%RDI),%XMM3 |
0x513632 JE 513608 |
0x513634 MOV 0x20(%RSP),%R10 |
0x513639 MOV 0x30(%RSP),%R15 |
0x51363e VMOVSD (%R10,%RDX,8),%XMM6 |
0x513644 MOV 0x8(%R15,%RDX,8),%R10 |
0x513649 CMP %R10,%RAX |
0x51364c JGE 513c00 |
0x513652 SUB %RAX,%R10 |
0x513655 LEA -0x1(%R10),%R13 |
0x513659 CMP $0x2,%R13 |
0x51365d JBE 513c1a |
0x513663 MOV %R10,%R15 |
0x513666 ADD %RBX,%RSI |
0x513669 VXORPD %XMM0,%XMM0,%XMM0 |
0x51366d XOR %ECX,%ECX |
0x51366f SHR $0x2,%R15 |
0x513673 SAL $0x5,%R15 |
0x513677 LEA -0x20(%R15),%R13 |
0x51367b SHR $0x5,%R13 |
0x51367f INC %R13 |
0x513682 AND $0x7,%R13D |
0x513686 JE 51375b |
0x51368c CMP $0x1,%R13 |
0x513690 JE 513739 |
0x513696 CMP $0x2,%R13 |
0x51369a JE 513720 |
0x5136a0 CMP $0x3,%R13 |
0x5136a4 JE 513707 |
0x5136a6 CMP $0x4,%R13 |
0x5136aa JE 5136ee |
0x5136ac CMP $0x5,%R13 |
0x5136b0 JE 5136d5 |
0x5136b2 CMP $0x6,%R13 |
0x5136b6 JNE 513bb0 |
0x5136bc VMOVDQU (%RSI,%RCX,1),%YMM9 |
0x5136c1 VMOVAPD %YMM2,%YMM10 |
0x5136c5 VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM12 |
0x5136cb VFNMADD231PD (%RDI,%RCX,1),%YMM12,%YMM0 |
0x5136d1 ADD $0x20,%RCX |
0x5136d5 VMOVDQU (%RSI,%RCX,1),%YMM13 |
0x5136da VMOVAPD %YMM2,%YMM14 |
0x5136de VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 |
0x5136e4 VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 |
0x5136ea ADD $0x20,%RCX |
0x5136ee VMOVDQU (%RSI,%RCX,1),%YMM8 |
0x5136f3 VMOVAPD %YMM2,%YMM7 |
0x5136f7 VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 |
0x5136fd VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 |
0x513703 ADD $0x20,%RCX |
0x513707 VMOVDQU (%RSI,%RCX,1),%YMM9 |
0x51370c VMOVAPD %YMM2,%YMM10 |
0x513710 VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM1 |
0x513716 VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 |
0x51371c ADD $0x20,%RCX |
0x513720 VMOVDQU (%RSI,%RCX,1),%YMM14 |
0x513725 VMOVAPD %YMM2,%YMM12 |
0x513729 VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM13 |
0x51372f VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 |
0x513735 ADD $0x20,%RCX |
0x513739 VMOVDQU (%RSI,%RCX,1),%YMM7 |
0x51373e VMOVAPD %YMM2,%YMM15 |
0x513742 VGATHERQPD %YMM15,(%R8,%YMM7,8),%YMM8 |
0x513748 VFNMADD231PD (%RDI,%RCX,1),%YMM8,%YMM0 |
0x51374e ADD $0x20,%RCX |
0x513752 CMP %RCX,%R15 |
0x513755 JE 513839 |
(2112) 0x51375b VMOVDQU (%RSI,%RCX,1),%YMM10 |
(2112) 0x513760 VMOVDQU 0x20(%RSI,%RCX,1),%YMM12 |
(2112) 0x513766 VMOVAPD %YMM2,%YMM11 |
(2112) 0x51376a VMOVAPD %YMM2,%YMM1 |
(2112) 0x51376e VMOVDQU 0x40(%RSI,%RCX,1),%YMM13 |
(2112) 0x513774 VMOVDQU 0x60(%RSI,%RCX,1),%YMM8 |
(2112) 0x51377a VMOVAPD %YMM2,%YMM14 |
(2112) 0x51377e VMOVAPD %YMM2,%YMM7 |
(2112) 0x513782 VGATHERQPD %YMM11,(%R8,%YMM10,8),%YMM9 |
(2112) 0x513788 VFNMADD132PD (%RDI,%RCX,1),%YMM0,%YMM9 |
(2112) 0x51378e VGATHERQPD %YMM1,(%R8,%YMM12,8),%YMM0 |
(2112) 0x513794 VMOVDQU 0x80(%RSI,%RCX,1),%YMM10 |
(2112) 0x51379d VFNMADD132PD 0x20(%RDI,%RCX,1),%YMM9,%YMM0 |
(2112) 0x5137a4 VMOVAPD %YMM2,%YMM9 |
(2112) 0x5137a8 VMOVAPD %YMM2,%YMM12 |
(2112) 0x5137ac VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 |
(2112) 0x5137b2 VFNMADD132PD 0x40(%RDI,%RCX,1),%YMM0,%YMM15 |
(2112) 0x5137b9 VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 |
(2112) 0x5137bf VMOVDQU 0xa0(%RSI,%RCX,1),%YMM14 |
(2112) 0x5137c8 VFNMADD132PD 0x60(%RDI,%RCX,1),%YMM15,%YMM11 |
(2112) 0x5137cf VMOVAPD %YMM2,%YMM15 |
(2112) 0x5137d3 VMOVAPD %YMM2,%YMM7 |
(2112) 0x5137d7 VMOVDQU 0xc0(%RSI,%RCX,1),%YMM13 |
(2112) 0x5137e0 VGATHERQPD %YMM9,(%R8,%YMM10,8),%YMM1 |
(2112) 0x5137e6 VFNMADD132PD 0x80(%RDI,%RCX,1),%YMM11,%YMM1 |
(2112) 0x5137f0 VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM0 |
(2112) 0x5137f6 VMOVDQU 0xe0(%RSI,%RCX,1),%YMM11 |
(2112) 0x5137ff VFNMADD132PD 0xa0(%RDI,%RCX,1),%YMM1,%YMM0 |
(2112) 0x513809 VGATHERQPD %YMM15,(%R8,%YMM13,8),%YMM8 |
(2112) 0x51380f VFNMADD132PD 0xc0(%RDI,%RCX,1),%YMM0,%YMM8 |
(2112) 0x513819 VGATHERQPD %YMM7,(%R8,%YMM11,8),%YMM0 |
(2112) 0x51381f VFNMADD132PD 0xe0(%RDI,%RCX,1),%YMM8,%YMM0 |
(2112) 0x513829 ADD $0x100,%RCX |
(2112) 0x513830 CMP %RCX,%R15 |
(2112) 0x513833 JNE 51375b |
0x513839 VEXTRACTF128 $0x1,%YMM0,%XMM9 |
0x51383f VADDPD %XMM0,%XMM9,%XMM10 |
0x513843 VUNPCKHPD %XMM10,%XMM10,%XMM1 |
0x513848 VADDPD %XMM10,%XMM1,%XMM12 |
0x51384d VADDSD %XMM12,%XMM6,%XMM1 |
0x513852 TEST $0x3,%R10B |
0x513856 JE 5138b3 |
0x513858 MOV %R10,%RCX |
0x51385b VADDPD %XMM9,%XMM0,%XMM7 |
0x513860 AND $-0x4,%RCX |
0x513864 ADD %RCX,%RAX |
0x513867 SUB %RCX,%R10 |
0x51386a CMP $0x1,%R10 |
0x51386e JE 5138a3 |
0x513870 ADD %R14,%RCX |
0x513873 VMOVAPD %XMM4,%XMM14 |
0x513877 VMOVDQU (%RBX,%RCX,8),%XMM15 |
0x51387c VGATHERQPD %XMM14,(%R8,%XMM15,8),%XMM13 |
0x513882 VFNMADD132PD (%R11,%RCX,8),%XMM7,%XMM13 |
0x513888 VUNPCKHPD %XMM13,%XMM13,%XMM8 |
0x51388d VADDPD %XMM13,%XMM8,%XMM7 |
0x513892 VADDSD %XMM7,%XMM6,%XMM1 |
0x513896 TEST $0x1,%R10B |
0x51389a JE 5138b3 |
0x51389c AND $-0x2,%R10 |
0x5138a0 ADD %R10,%RAX |
0x5138a3 MOV (%RBX,%RAX,8),%R14 |
0x5138a7 VMOVSD (%R11,%RAX,8),%XMM6 |
0x5138ad VFNMADD231SD (%R8,%R14,8),%XMM6,%XMM1 |
0x5138b3 MOV 0x10(%RSP),%RAX |
0x5138b8 MOV (%RAX,%RDX,8),%RCX |
0x5138bc MOV 0x8(%RAX,%RDX,8),%RSI |
0x5138c1 CMP %RSI,%RCX |
0x5138c4 JGE 513bf0 |
0x5138ca SUB %RCX,%RSI |
0x5138cd MOV %RCX,%R15 |
0x5138d0 LEA -0x1(%RSI),%RDI |
0x5138d4 CMP $0x2,%RDI |
0x5138d8 JBE 513c09 |
0x5138de MOV 0x38(%RSP),%R14 |
0x5138e3 LEA (,%RCX,8),%RDI |
0x5138eb XOR %EAX,%EAX |
0x5138ed VXORPD %XMM6,%XMM6,%XMM6 |
0x5138f1 LEA (%R12,%RDI,1),%R13 |
0x5138f5 ADD %R14,%RDI |
0x5138f8 MOV %RSI,%R14 |
0x5138fb SHR $0x2,%R14 |
0x5138ff SAL $0x5,%R14 |
0x513903 LEA -0x20(%R14),%R10 |
0x513907 SHR $0x5,%R10 |
0x51390b INC %R10 |
0x51390e AND $0x7,%R10D |
0x513912 JE 5139f3 |
0x513918 CMP $0x1,%R10 |
0x51391c JE 5139cf |
0x513922 CMP $0x2,%R10 |
0x513926 JE 5139b4 |
0x51392c CMP $0x3,%R10 |
0x513930 JE 513999 |
0x513932 CMP $0x4,%R10 |
0x513936 JE 51397e |
0x513938 CMP $0x5,%R10 |
0x51393c JE 513963 |
0x51393e CMP $0x6,%R10 |
0x513942 JNE 513bd0 |
0x513948 VMOVDQU (%R13,%RAX,1),%YMM14 |
0x51394f VMOVAPD %YMM2,%YMM12 |
0x513953 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM15 |
0x513959 VFNMADD231PD (%RDI,%RAX,1),%YMM15,%YMM6 |
0x51395f ADD $0x20,%RAX |
0x513963 VMOVDQU (%R13,%RAX,1),%YMM8 |
0x51396a VMOVAPD %YMM2,%YMM13 |
0x51396e VGATHERQPD %YMM13,(%R9,%YMM8,8),%YMM7 |
0x513974 VFNMADD231PD (%RDI,%RAX,1),%YMM7,%YMM6 |
0x51397a ADD $0x20,%RAX |
0x51397e VMOVDQU (%R13,%RAX,1),%YMM9 |
0x513985 VMOVAPD %YMM2,%YMM11 |
0x513989 VGATHERQPD %YMM11,(%R9,%YMM9,8),%YMM10 |
0x51398f VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM6 |
0x513995 ADD $0x20,%RAX |
0x513999 VMOVDQU (%R13,%RAX,1),%YMM14 |
0x5139a0 VMOVAPD %YMM2,%YMM12 |
0x5139a4 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM0 |
0x5139aa VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM6 |
0x5139b0 ADD $0x20,%RAX |
0x5139b4 VMOVDQU (%R13,%RAX,1),%YMM13 |
0x5139bb VMOVAPD %YMM2,%YMM15 |
0x5139bf VGATHERQPD %YMM15,(%R9,%YMM13,8),%YMM8 |
0x5139c5 VFNMADD231PD (%RDI,%RAX,1),%YMM8,%YMM6 |
0x5139cb ADD $0x20,%RAX |
0x5139cf VMOVDQU (%R13,%RAX,1),%YMM11 |
0x5139d6 VMOVAPD %YMM2,%YMM7 |
0x5139da VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 |
0x5139e0 VFNMADD231PD (%RDI,%RAX,1),%YMM9,%YMM6 |
0x5139e6 ADD $0x20,%RAX |
0x5139ea CMP %RAX,%R14 |
0x5139ed JE 513ad9 |
(2111) 0x5139f3 VMOVDQU (%R13,%RAX,1),%YMM12 |
(2111) 0x5139fa VMOVAPD %YMM2,%YMM10 |
(2111) 0x5139fe VMOVAPD %YMM2,%YMM13 |
(2111) 0x513a02 VMOVDQU 0x20(%R13,%RAX,1),%YMM15 |
(2111) 0x513a09 VMOVDQU 0x40(%R13,%RAX,1),%YMM7 |
(2111) 0x513a10 VMOVAPD %YMM2,%YMM8 |
(2111) 0x513a14 VMOVAPD %YMM2,%YMM9 |
(2111) 0x513a18 VGATHERQPD %YMM10,(%R9,%YMM12,8),%YMM14 |
(2111) 0x513a1e VFNMADD231PD (%RDI,%RAX,1),%YMM14,%YMM6 |
(2111) 0x513a24 VGATHERQPD %YMM13,(%R9,%YMM15,8),%YMM0 |
(2111) 0x513a2a VMOVDQU 0x60(%R13,%RAX,1),%YMM10 |
(2111) 0x513a31 VFNMADD231PD 0x20(%RDI,%RAX,1),%YMM0,%YMM6 |
(2111) 0x513a38 VMOVAPD %YMM2,%YMM12 |
(2111) 0x513a3c VMOVAPD %YMM2,%YMM15 |
(2111) 0x513a40 VMOVDQU 0x80(%R13,%RAX,1),%YMM14 |
(2111) 0x513a4a VGATHERQPD %YMM8,(%R9,%YMM7,8),%YMM11 |
(2111) 0x513a50 VMOVAPD %YMM2,%YMM7 |
(2111) 0x513a54 VMOVDQU 0xa0(%R13,%RAX,1),%YMM0 |
(2111) 0x513a5e VFNMADD132PD 0x40(%RDI,%RAX,1),%YMM6,%YMM11 |
(2111) 0x513a65 VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM6 |
(2111) 0x513a6b VMOVAPD %YMM2,%YMM10 |
(2111) 0x513a6f VFNMADD132PD 0x60(%RDI,%RAX,1),%YMM11,%YMM6 |
(2111) 0x513a76 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM13 |
(2111) 0x513a7c VMOVDQU 0xc0(%R13,%RAX,1),%YMM11 |
(2111) 0x513a86 VGATHERQPD %YMM15,(%R9,%YMM0,8),%YMM8 |
(2111) 0x513a8c VFNMADD132PD 0x80(%RDI,%RAX,1),%YMM6,%YMM13 |
(2111) 0x513a96 VMOVDQU 0xe0(%R13,%RAX,1),%YMM12 |
(2111) 0x513aa0 VFNMADD132PD 0xa0(%RDI,%RAX,1),%YMM13,%YMM8 |
(2111) 0x513aaa VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 |
(2111) 0x513ab0 VFNMADD132PD 0xc0(%RDI,%RAX,1),%YMM8,%YMM9 |
(2111) 0x513aba VGATHERQPD %YMM10,(%R9,%YMM12,8),%YMM6 |
(2111) 0x513ac0 VFNMADD132PD 0xe0(%RDI,%RAX,1),%YMM9,%YMM6 |
(2111) 0x513aca ADD $0x100,%RAX |
(2111) 0x513ad0 CMP %RAX,%R14 |
(2111) 0x513ad3 JNE 5139f3 |
0x513ad9 VEXTRACTF128 $0x1,%YMM6,%XMM14 |
0x513adf VADDPD %XMM6,%XMM14,%XMM13 |
0x513ae3 VUNPCKHPD %XMM13,%XMM13,%XMM15 |
0x513ae8 VADDPD %XMM13,%XMM15,%XMM0 |
0x513aed VADDSD %XMM0,%XMM1,%XMM12 |
0x513af1 TEST $0x3,%SIL |
0x513af5 JE 513b5c |
0x513af7 MOV %RSI,%R10 |
0x513afa VADDPD %XMM6,%XMM14,%XMM11 |
0x513afe AND $-0x4,%R10 |
0x513b02 ADD %R10,%RCX |
0x513b05 SUB %R10,%RSI |
0x513b08 CMP $0x1,%RSI |
0x513b0c JE 513b47 |
0x513b0e ADD %R15,%R10 |
0x513b11 MOV 0x38(%RSP),%R15 |
0x513b16 VMOVAPD %XMM4,%XMM8 |
0x513b1a VMOVDQU (%R12,%R10,8),%XMM7 |
0x513b20 VGATHERQPD %XMM8,(%R9,%XMM7,8),%XMM9 |
0x513b26 VFNMADD132PD (%R15,%R10,8),%XMM11,%XMM9 |
0x513b2c VUNPCKHPD %XMM9,%XMM9,%XMM11 |
0x513b31 VADDPD %XMM9,%XMM11,%XMM10 |
0x513b36 VADDSD %XMM1,%XMM10,%XMM12 |
0x513b3a TEST $0x1,%SIL |
0x513b3e JE 513b5c |
0x513b40 AND $-0x2,%RSI |
0x513b44 ADD %RSI,%RCX |
0x513b47 MOV (%R12,%RCX,8),%RSI |
0x513b4b MOV 0x38(%RSP),%RDI |
0x513b50 VMOVSD (%R9,%RSI,8),%XMM1 |
0x513b56 VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM12 |
0x513b5c MOV 0x18(%RSP),%R13 |
0x513b61 MOV 0x8(%RSP),%RCX |
0x513b66 VMULSD %XMM12,%XMM5,%XMM6 |
0x513b6b VDIVSD (%RCX,%RDX,8),%XMM6,%XMM14 |
0x513b70 VADDSD (%R13,%RDX,8),%XMM14,%XMM13 |
0x513b77 VMOVSD %XMM13,(%R13,%RDX,8) |
0x513b7e INC %RDX |
0x513b81 CMP %RDX,0x28(%RSP) |
0x513b86 JNE 513616 |
0x513bb0 VMOVDQU (%RSI),%YMM11 |
0x513bb4 VMOVAPD %YMM2,%YMM8 |
0x513bb8 MOV $0x20,%ECX |
0x513bbd VGATHERQPD %YMM8,(%R8,%YMM11,8),%YMM1 |
0x513bc3 VFNMADD231PD (%RDI),%YMM1,%YMM0 |
0x513bc8 JMP 5136bc |
0x513bd0 VMOVDQU (%R13),%YMM10 |
0x513bd6 VMOVAPD %YMM2,%YMM9 |
0x513bda MOV $0x20,%EAX |
0x513bdf VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM0 |
0x513be5 VFNMADD231PD (%RDI),%YMM0,%YMM6 |
0x513bea JMP 513948 |
0x513bf0 VMOVSD %XMM1,%XMM1,%XMM12 |
0x513bf4 JMP 513b5c |
0x513c00 VMOVSD %XMM6,%XMM6,%XMM1 |
0x513c04 JMP 5138b3 |
0x513c09 VMOVSD %XMM1,%XMM1,%XMM12 |
0x513c0d VXORPD %XMM11,%XMM11,%XMM11 |
0x513c12 XOR %R10D,%R10D |
0x513c15 JMP 513b05 |
0x513c1a VMOVSD %XMM6,%XMM6,%XMM1 |
0x513c1e VXORPD %XMM7,%XMM7,%XMM7 |
0x513c22 XOR %ECX,%ECX |
0x513c24 JMP 513867 |
/scratch_na/users/xoserete/qaas_runs/171-172-8217/intel/AMG/build/AMG/AMG/parcsr_ls/ams.c: 3662 - 3682 |
-------------------------------------------------------------------------------- |
3662: #pragma omp parallel for private(i,ii,jj,res) HYPRE_SMP_SCHEDULE |
[...] |
3669: if (A_diag_data[A_diag_i[i]] != zero) |
3670: { |
3671: res = f_data[i]; |
3672: for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) |
3673: { |
3674: ii = A_diag_j[jj]; |
3675: res -= A_diag_data[jj] * Vtemp_data[ii]; |
3676: } |
3677: for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) |
3678: { |
3679: ii = A_offd_j[jj]; |
3680: res -= A_offd_data[jj] * Vext_data[ii]; |
3681: } |
3682: u_data[i] += (relax_weight*res)/l1_norms[i]; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.46 |
CQA speedup if FP arith vectorized | 1.55 |
CQA speedup if fully vectorized | 3.38 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source | ams.c:3662-3662,ams.c:3669-3672,ams.c:3675-3677,ams.c:3680-3682 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 53.17 |
CQA cycles if no scalar integer | 36.50 |
CQA cycles if FP arith vectorized | 34.33 |
CQA cycles if fully vectorized | 15.71 |
Front-end cycles | 53.17 |
DIV/SQRT cycles | 37.67 |
P0 cycles | 37.67 |
P1 cycles | 39.00 |
P2 cycles | 39.00 |
P3 cycles | 0.50 |
P4 cycles | 37.67 |
P5 cycles | 37.00 |
P6 cycles | 0.50 |
P7 cycles | 0.50 |
P8 cycles | 0.50 |
P9 cycles | 37.00 |
P10 cycles | 39.00 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 99.77 - 426.89 |
Stall cycles (UFS) | 46.10 - 373.22 |
Nb insns | 236.00 |
Nb uops | 301.00 |
Nb loads | 73.00 |
Nb stores | 1.00 |
Nb stack references | 7.00 |
FLOP/cycle | 2.76 |
Nb FLOP add-sub | 21.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 62.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 31.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 1640.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 64.46 |
Vectorization ratio load | 84.21 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 61.54 |
Vectorization ratio fma | 88.89 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 55.88 |
Vector-efficiency ratio all | 32.13 |
Vector-efficiency ratio load | 41.45 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 20.19 |
Vector-efficiency ratio fma | 43.06 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 29.78 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.46 |
CQA speedup if FP arith vectorized | 1.55 |
CQA speedup if fully vectorized | 3.38 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source | ams.c:3662-3662,ams.c:3669-3672,ams.c:3675-3677,ams.c:3680-3682 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 53.17 |
CQA cycles if no scalar integer | 36.50 |
CQA cycles if FP arith vectorized | 34.33 |
CQA cycles if fully vectorized | 15.71 |
Front-end cycles | 53.17 |
DIV/SQRT cycles | 37.67 |
P0 cycles | 37.67 |
P1 cycles | 39.00 |
P2 cycles | 39.00 |
P3 cycles | 0.50 |
P4 cycles | 37.67 |
P5 cycles | 37.00 |
P6 cycles | 0.50 |
P7 cycles | 0.50 |
P8 cycles | 0.50 |
P9 cycles | 37.00 |
P10 cycles | 39.00 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 99.77 - 426.89 |
Stall cycles (UFS) | 46.10 - 373.22 |
Nb insns | 236.00 |
Nb uops | 301.00 |
Nb loads | 73.00 |
Nb stores | 1.00 |
Nb stack references | 7.00 |
FLOP/cycle | 2.76 |
Nb FLOP add-sub | 21.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 62.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 31.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 1640.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 64.46 |
Vectorization ratio load | 84.21 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 61.54 |
Vectorization ratio fma | 88.89 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 55.88 |
Vector-efficiency ratio all | 32.13 |
Vector-efficiency ratio load | 41.45 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 20.19 |
Vector-efficiency ratio fma | 43.06 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 29.78 |
Path / |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source file and lines | ams.c:3662-3682 |
Module | exec |
nb instructions | 236 |
nb uops | 301 |
loop length | 1070 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 15 |
used ymm registers | 13 |
used zmm registers | 0 |
nb stack references | 7 |
ADD-SUB / MUL ratio | 13.00 |
micro-operation queue | 53.17 cycles |
front end | 53.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 37.67 | 37.67 | 39.00 | 39.00 | 0.50 | 37.67 | 37.00 | 0.50 | 0.50 | 0.50 | 37.00 | 39.00 |
cycles | 37.67 | 37.67 | 39.00 | 39.00 | 0.50 | 37.67 | 37.00 | 0.50 | 0.50 | 0.50 | 37.00 | 39.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 99.77-426.89 |
Stall cycles | 46.10-373.22 |
ROB full (events) | 51.88-389.06 |
Front-end | 53.17 |
Dispatch | 39.00 |
DIV/SQRT | 4.00 |
Overall L1 | 53.17 |
all | 43% |
load | 94% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 73% |
load | 80% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 80% |
all | 64% |
load | 84% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 55% |
all | 27% |
load | 44% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 34% |
load | 40% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 37% |
all | 32% |
load | 41% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 29% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,0x28(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 513b8c <hypre_ParCSRRelaxThreads._omp_fn.1+0x63c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RAX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R11,%RSI,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCOMISD (%RDI),%XMM3 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JE 513608 <hypre_ParCSRRelaxThreads._omp_fn.1+0xb8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R10,%RDX,8),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R15,%RDX,8),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 513c00 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%R10),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x2,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 513c1a <hypre_ParCSRRelaxThreads._omp_fn.1+0x6ca> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SAL $0x5,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA -0x20(%R15),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 51375b <hypre_ParCSRRelaxThreads._omp_fn.1+0x20b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513739 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1e9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513720 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1d0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513707 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1b7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5136ee <hypre_ParCSRRelaxThreads._omp_fn.1+0x19e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5136d5 <hypre_ParCSRRelaxThreads._omp_fn.1+0x185> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 513bb0 <hypre_ParCSRRelaxThreads._omp_fn.1+0x660> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%RSI,%RCX,1),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM12 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM12,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM1 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM13 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM15,(%R8,%YMM7,8),%YMM8 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM8,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513839 <hypre_ParCSRRelaxThreads._omp_fn.1+0x2e9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM0,%XMM9,%XMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM10,%XMM10,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM10,%XMM1,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM12,%XMM6,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x3,%R10B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 5138b3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM9,%XMM0,%XMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5138a3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x353> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVAPD %XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQU (%RBX,%RCX,8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERQPD %XMM14,(%R8,%XMM15,8),%XMM13 | 5 | 1.33 | 0.83 | 0.67 | 0.67 | 0 | 0.83 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0-29 | 1.25 |
VFNMADD132PD (%R11,%RCX,8),%XMM7,%XMM13 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM13,%XMM13,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM13,%XMM8,%XMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM7,%XMM6,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x1,%R10B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 5138b3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%RBX,%RAX,8),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11,%RAX,8),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFNMADD231SD (%R8,%R14,8),%XMM6,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 513bf0 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x2,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 513c09 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6b9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R12,%RDI,1),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R14,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SAL $0x5,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA -0x20(%R14),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 5139f3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5139cf <hypre_ParCSRRelaxThreads._omp_fn.1+0x47f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5139b4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x464> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513999 <hypre_ParCSRRelaxThreads._omp_fn.1+0x449> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 51397e <hypre_ParCSRRelaxThreads._omp_fn.1+0x42e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513963 <hypre_ParCSRRelaxThreads._omp_fn.1+0x413> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 513bd0 <hypre_ParCSRRelaxThreads._omp_fn.1+0x680> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%R13,%RAX,1),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM15 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM13,(%R9,%YMM8,8),%YMM7 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM7,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM11,(%R9,%YMM9,8),%YMM10 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM0 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM15,(%R9,%YMM13,8),%YMM8 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM9,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513ad9 <hypre_ParCSRRelaxThreads._omp_fn.1+0x589> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF128 $0x1,%YMM6,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM6,%XMM14,%XMM13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM13,%XMM13,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM13,%XMM15,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM0,%XMM1,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x3,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 513b5c <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM6,%XMM14,%XMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R10,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513b47 <hypre_ParCSRRelaxThreads._omp_fn.1+0x5f7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R15,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %XMM4,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQU (%R12,%R10,8),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERQPD %XMM8,(%R9,%XMM7,8),%XMM9 | 5 | 1.33 | 0.83 | 0.67 | 0.67 | 0 | 0.83 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0-29 | 1.25 |
VFNMADD132PD (%R15,%R10,8),%XMM11,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM9,%XMM9,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM9,%XMM11,%XMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM1,%XMM10,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x1,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 513b5c <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R12,%RCX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R9,%RSI,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM12,%XMM5,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD (%RCX,%RDX,8),%XMM6,%XMM14 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VADDSD (%R13,%RDX,8),%XMM14,%XMM13 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM13,(%R13,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,0x28(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 513616 <hypre_ParCSRRelaxThreads._omp_fn.1+0xc6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%RSI),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV $0x20,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VGATHERQPD %YMM8,(%R8,%YMM11,8),%YMM1 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI),%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
JMP 5136bc <hypre_ParCSRRelaxThreads._omp_fn.1+0x16c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVDQU (%R13),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV $0x20,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM0 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI),%YMM0,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
JMP 513948 <hypre_ParCSRRelaxThreads._omp_fn.1+0x3f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM1,%XMM1,%XMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 513b5c <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM6,%XMM6,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 5138b3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM1,%XMM1,%XMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 513b05 <hypre_ParCSRRelaxThreads._omp_fn.1+0x5b5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM6,%XMM6,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 513867 <hypre_ParCSRRelaxThreads._omp_fn.1+0x317> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source file and lines | ams.c:3662-3682 |
Module | exec |
nb instructions | 236 |
nb uops | 301 |
loop length | 1070 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 15 |
used ymm registers | 13 |
used zmm registers | 0 |
nb stack references | 7 |
ADD-SUB / MUL ratio | 13.00 |
micro-operation queue | 53.17 cycles |
front end | 53.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 37.67 | 37.67 | 39.00 | 39.00 | 0.50 | 37.67 | 37.00 | 0.50 | 0.50 | 0.50 | 37.00 | 39.00 |
cycles | 37.67 | 37.67 | 39.00 | 39.00 | 0.50 | 37.67 | 37.00 | 0.50 | 0.50 | 0.50 | 37.00 | 39.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 99.77-426.89 |
Stall cycles | 46.10-373.22 |
ROB full (events) | 51.88-389.06 |
Front-end | 53.17 |
Dispatch | 39.00 |
DIV/SQRT | 4.00 |
Overall L1 | 53.17 |
all | 43% |
load | 94% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 73% |
load | 80% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 80% |
all | 64% |
load | 84% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 55% |
all | 27% |
load | 44% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 34% |
load | 40% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 37% |
all | 32% |
load | 41% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 29% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,0x28(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 513b8c <hypre_ParCSRRelaxThreads._omp_fn.1+0x63c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RAX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R11,%RSI,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCOMISD (%RDI),%XMM3 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JE 513608 <hypre_ParCSRRelaxThreads._omp_fn.1+0xb8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R10,%RDX,8),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R15,%RDX,8),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 513c00 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%R10),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x2,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 513c1a <hypre_ParCSRRelaxThreads._omp_fn.1+0x6ca> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SAL $0x5,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA -0x20(%R15),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 51375b <hypre_ParCSRRelaxThreads._omp_fn.1+0x20b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513739 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1e9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513720 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1d0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513707 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1b7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5136ee <hypre_ParCSRRelaxThreads._omp_fn.1+0x19e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5136d5 <hypre_ParCSRRelaxThreads._omp_fn.1+0x185> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 513bb0 <hypre_ParCSRRelaxThreads._omp_fn.1+0x660> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%RSI,%RCX,1),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM12 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM12,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM1 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM13 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM15,(%R8,%YMM7,8),%YMM8 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM8,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513839 <hypre_ParCSRRelaxThreads._omp_fn.1+0x2e9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM0,%XMM9,%XMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM10,%XMM10,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM10,%XMM1,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM12,%XMM6,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x3,%R10B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 5138b3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM9,%XMM0,%XMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5138a3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x353> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVAPD %XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQU (%RBX,%RCX,8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERQPD %XMM14,(%R8,%XMM15,8),%XMM13 | 5 | 1.33 | 0.83 | 0.67 | 0.67 | 0 | 0.83 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0-29 | 1.25 |
VFNMADD132PD (%R11,%RCX,8),%XMM7,%XMM13 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM13,%XMM13,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM13,%XMM8,%XMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM7,%XMM6,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x1,%R10B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 5138b3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%RBX,%RAX,8),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11,%RAX,8),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFNMADD231SD (%R8,%R14,8),%XMM6,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 513bf0 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x2,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 513c09 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6b9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R12,%RDI,1),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R14,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SAL $0x5,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA -0x20(%R14),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 5139f3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5139cf <hypre_ParCSRRelaxThreads._omp_fn.1+0x47f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5139b4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x464> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513999 <hypre_ParCSRRelaxThreads._omp_fn.1+0x449> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 51397e <hypre_ParCSRRelaxThreads._omp_fn.1+0x42e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513963 <hypre_ParCSRRelaxThreads._omp_fn.1+0x413> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 513bd0 <hypre_ParCSRRelaxThreads._omp_fn.1+0x680> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%R13,%RAX,1),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM15 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM13,(%R9,%YMM8,8),%YMM7 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM7,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM11,(%R9,%YMM9,8),%YMM10 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM0 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM15,(%R9,%YMM13,8),%YMM8 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM9,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513ad9 <hypre_ParCSRRelaxThreads._omp_fn.1+0x589> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF128 $0x1,%YMM6,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM6,%XMM14,%XMM13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM13,%XMM13,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM13,%XMM15,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM0,%XMM1,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x3,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 513b5c <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM6,%XMM14,%XMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R10,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 513b47 <hypre_ParCSRRelaxThreads._omp_fn.1+0x5f7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R15,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %XMM4,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQU (%R12,%R10,8),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERQPD %XMM8,(%R9,%XMM7,8),%XMM9 | 5 | 1.33 | 0.83 | 0.67 | 0.67 | 0 | 0.83 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0-29 | 1.25 |
VFNMADD132PD (%R15,%R10,8),%XMM11,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM9,%XMM9,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM9,%XMM11,%XMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM1,%XMM10,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x1,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 513b5c <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R12,%RCX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R9,%RSI,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM12,%XMM5,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD (%RCX,%RDX,8),%XMM6,%XMM14 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VADDSD (%R13,%RDX,8),%XMM14,%XMM13 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM13,(%R13,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,0x28(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 513616 <hypre_ParCSRRelaxThreads._omp_fn.1+0xc6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%RSI),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV $0x20,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VGATHERQPD %YMM8,(%R8,%YMM11,8),%YMM1 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI),%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
JMP 5136bc <hypre_ParCSRRelaxThreads._omp_fn.1+0x16c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVDQU (%R13),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM2,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV $0x20,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM0 | 5 | 1.33 | 1.33 | 1.33 | 1.33 | 0 | 1.33 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI),%YMM0,%YMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
JMP 513948 <hypre_ParCSRRelaxThreads._omp_fn.1+0x3f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM1,%XMM1,%XMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 513b5c <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM6,%XMM6,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 5138b3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM1,%XMM1,%XMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 513b05 <hypre_ParCSRRelaxThreads._omp_fn.1+0x5b5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM6,%XMM6,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 513867 <hypre_ParCSRRelaxThreads._omp_fn.1+0x317> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |