Loop Id: 60 | Module: exec | Source: RandomGenerator.h:51-55 [...] | Coverage: 0.01% |
---|
Loop Id: 60 | Module: exec | Source: RandomGenerator.h:51-55 [...] | Coverage: 0.01% |
---|
0x40b290 MOV %RSP,%R12 |
0x40b293 MOV 0x1390(%R15),%RCX |
0x40b29a VXORPD %XMM0,%XMM0,%XMM0 |
0x40b29e XOR %EAX,%EAX |
0x40b2a0 VMOVAPD %XMM5,%XMM1 |
0x40b2a4 JMP 40b329 |
(64) 0x40b2b0 MOV 0x10(%R15,%RCX,8),%RDX |
(64) 0x40b2b5 MOV %RDX,%RSI |
(64) 0x40b2b8 SHR $0xb,%RSI |
(64) 0x40b2bc MOV %ESI,%ESI |
(64) 0x40b2be XOR %RDX,%RSI |
(64) 0x40b2c1 MOV %ESI,%EDX |
(64) 0x40b2c3 AND $0x13a58ad,%EDX |
(64) 0x40b2c9 SAL $0x7,%RDX |
(64) 0x40b2cd XOR %RSI,%RDX |
(64) 0x40b2d0 MOV %EDX,%ESI |
(64) 0x40b2d2 AND $0x1df8c,%ESI |
(64) 0x40b2d8 SAL $0xf,%RSI |
(64) 0x40b2dc XOR %RDX,%RSI |
(64) 0x40b2df MOV %RSI,%RDX |
(64) 0x40b2e2 SHR $0x12,%RDX |
(64) 0x40b2e6 XOR %RSI,%RDX |
(64) 0x40b2e9 VCVTUSI2SD %RDX,%XMM11,%XMM2 |
(64) 0x40b2ef INC %RCX |
(64) 0x40b2f2 VMOVSD %XMM1,-0x2d8(%RBP) |
(64) 0x40b2fa FLDL -0x2d8(%RBP) |
(64) 0x40b300 FMULS 0x8b50a(%RIP) |
(64) 0x40b306 VFMADD231SD %XMM2,%XMM1,%XMM0 |
(64) 0x40b30b FSTPL -0x2d0(%RBP) |
(64) 0x40b311 VMOVSD -0x2d0(%RBP),%XMM1 |
(64) 0x40b319 LEA 0x1(%RAX),%RDX |
(64) 0x40b31d CMP %RBX,%RAX |
(64) 0x40b320 MOV %RDX,%RAX |
(64) 0x40b323 JE 40b4f0 |
(64) 0x40b329 CMP $0x26f,%RCX |
(64) 0x40b330 JBE 40b2b0 |
(64) 0x40b336 MOV 0x10(%R15),%RDX |
(64) 0x40b33a XOR %ECX,%ECX |
(64) 0x40b33c NOPL (%RAX) |
(65) 0x40b340 VMOVDQU 0x18(%R15,%RCX,1),%YMM2 |
(65) 0x40b347 VPAND %YMM6,%YMM2,%YMM3 |
(65) 0x40b34b VPBROADCASTQ %RDX,%YMM4 |
(65) 0x40b351 VALIGNQ $0x3,%YMM4,%YMM2,%YMM4 |
(65) 0x40b358 VPTERNLOGQ $-0x14,%YMM7,%YMM3,%YMM4 |
(65) 0x40b35f VPSRLQ $0x1,%YMM4,%YMM3 |
(65) 0x40b364 VPTESTMQ %YMM8,%YMM2,%K1 |
(65) 0x40b36a VMOVDQA64 %YMM9,%YMM2{%K1}{z} |
(65) 0x40b370 VPTERNLOGQ $-0x6a,0xc78(%R15,%RCX,1),%YMM3,%YMM2 |
(65) 0x40b37c VMOVDQU %YMM2,0x10(%R15,%RCX,1) |
(65) 0x40b383 MOV 0x30(%R15,%RCX,1),%RDX |
(65) 0x40b388 ADD $0x20,%RCX |
(65) 0x40b38c CMP $0x700,%RCX |
(65) 0x40b393 JNE 40b340 |
(64) 0x40b395 MOV 0x718(%R15),%RDX |
(64) 0x40b39c MOV %EDX,%ECX |
(64) 0x40b39e AND $0x7ffffffe,%ECX |
(64) 0x40b3a4 MOV 0x710(%R15),%RSI |
(64) 0x40b3ab AND %R8,%RSI |
(64) 0x40b3ae OR %RCX,%RSI |
(64) 0x40b3b1 SHR $0x1,%RSI |
(64) 0x40b3b4 XOR 0x1378(%R15),%RSI |
(64) 0x40b3bb MOV 0x720(%R15),%RCX |
(64) 0x40b3c2 MOV %EDX,%EDI |
(64) 0x40b3c4 AND $0x1,%EDI |
(64) 0x40b3c7 NEG %EDI |
(64) 0x40b3c9 AND $-0x66f74f21,%EDI |
(64) 0x40b3cf XOR %RSI,%RDI |
(64) 0x40b3d2 MOV %RDI,0x710(%R15) |
(64) 0x40b3d9 MOV %ECX,%ESI |
(64) 0x40b3db AND $0x7ffffffe,%ESI |
(64) 0x40b3e1 AND $-0x80000000,%RDX |
(64) 0x40b3e8 OR %RSI,%RDX |
(64) 0x40b3eb SHR $0x1,%RDX |
(64) 0x40b3ee XOR 0x1380(%R15),%RDX |
(64) 0x40b3f5 MOV %ECX,%ESI |
(64) 0x40b3f7 AND $0x1,%ESI |
(64) 0x40b3fa NEG %ESI |
(64) 0x40b3fc AND $-0x66f74f21,%ESI |
(64) 0x40b402 XOR %RDX,%RSI |
(64) 0x40b405 MOV %RSI,0x718(%R15) |
(64) 0x40b40c MOV 0x728(%R15),%RDX |
(64) 0x40b413 MOV %EDX,%ESI |
(64) 0x40b415 AND $0x7ffffffe,%ESI |
(64) 0x40b41b AND $-0x80000000,%RCX |
(64) 0x40b422 OR %RSI,%RCX |
(64) 0x40b425 SHR $0x1,%RCX |
(64) 0x40b428 XOR 0x1388(%R15),%RCX |
(64) 0x40b42f AND $0x1,%EDX |
(64) 0x40b432 NEG %EDX |
(64) 0x40b434 AND $-0x66f74f21,%EDX |
(64) 0x40b43a XOR %RCX,%RDX |
(64) 0x40b43d MOV %RDX,0x720(%R15) |
(64) 0x40b444 XOR %ECX,%ECX |
(64) 0x40b446 NOPW %CS:(%RAX,%RAX,1) |
(66) 0x40b450 VMOVDQU 0x730(%R15,%RCX,1),%YMM2 |
(66) 0x40b45a VPAND %YMM6,%YMM2,%YMM3 |
(66) 0x40b45e VALIGNQ $0x3,0x728(%R15,%RCX,1){1to4},%YMM2,%YMM4 |
(66) 0x40b46a VPTERNLOGQ $-0x14,%YMM7,%YMM3,%YMM4 |
(66) 0x40b471 VPSRLQ $0x1,%YMM4,%YMM3 |
(66) 0x40b476 VPTESTMQ %YMM8,%YMM2,%K1 |
(66) 0x40b47c VMOVDQA64 %YMM9,%YMM2{%K1}{z} |
(66) 0x40b482 VPTERNLOGQ $-0x6a,0x10(%R15,%RCX,1),%YMM3,%YMM2 |
(66) 0x40b48e VMOVDQU %YMM2,0x728(%R15,%RCX,1) |
(66) 0x40b498 ADD $0x20,%RCX |
(66) 0x40b49c CMP $0xc60,%RCX |
(66) 0x40b4a3 JNE 40b450 |
(64) 0x40b4a5 MOV 0x10(%R15),%RDX |
(64) 0x40b4a9 MOV %EDX,%ECX |
(64) 0x40b4ab AND $0x7ffffffe,%ECX |
(64) 0x40b4b1 MOV 0x1388(%R15),%RSI |
(64) 0x40b4b8 AND %R8,%RSI |
(64) 0x40b4bb OR %RCX,%RSI |
(64) 0x40b4be SHR $0x1,%RSI |
(64) 0x40b4c1 XOR 0xc70(%R15),%RSI |
(64) 0x40b4c8 MOV %EDX,%ECX |
(64) 0x40b4ca AND $0x1,%ECX |
(64) 0x40b4cd NEG %ECX |
(64) 0x40b4cf AND $-0x66f74f21,%ECX |
(64) 0x40b4d5 XOR %RSI,%RCX |
(64) 0x40b4d8 MOV %RCX,0x1388(%R15) |
(64) 0x40b4df XOR %ECX,%ECX |
(64) 0x40b4e1 JMP 40b2b5 |
0x40b4f0 MOV %RCX,0x1390(%R15) |
0x40b4f7 VDIVSD %XMM1,%XMM0,%XMM0 |
0x40b4fb VUCOMISD %XMM5,%XMM0 |
0x40b4ff JAE 40b881 |
0x40b505 VMOVSD 0x1398(%R15),%XMM1 |
0x40b50e VMOVSD 0x13a0(%R15),%XMM2 |
0x40b517 VSUBSD %XMM1,%XMM2,%XMM2 |
0x40b51b VFMADD213SD %XMM1,%XMM0,%XMM2 |
0x40b520 MOV %R12,%RSP |
0x40b523 MOV %RSP,%R12 |
0x40b526 VMULSD 0x8ab2a(%RIP),%XMM2,%XMM10 |
0x40b52e MOV 0x1390(%R15),%RCX |
0x40b535 VXORPD %XMM0,%XMM0,%XMM0 |
0x40b539 VMOVAPD %XMM5,%XMM1 |
0x40b53d XOR %EAX,%EAX |
0x40b53f JMP 40b5c9 |
(61) 0x40b550 MOV 0x10(%R15,%RCX,8),%RDX |
(61) 0x40b555 MOV %RDX,%RSI |
(61) 0x40b558 SHR $0xb,%RSI |
(61) 0x40b55c MOV %ESI,%ESI |
(61) 0x40b55e XOR %RDX,%RSI |
(61) 0x40b561 MOV %ESI,%EDX |
(61) 0x40b563 AND $0x13a58ad,%EDX |
(61) 0x40b569 SAL $0x7,%RDX |
(61) 0x40b56d XOR %RSI,%RDX |
(61) 0x40b570 MOV %EDX,%ESI |
(61) 0x40b572 AND $0x1df8c,%ESI |
(61) 0x40b578 SAL $0xf,%RSI |
(61) 0x40b57c XOR %RDX,%RSI |
(61) 0x40b57f MOV %RSI,%RDX |
(61) 0x40b582 SHR $0x12,%RDX |
(61) 0x40b586 XOR %RSI,%RDX |
(61) 0x40b589 VCVTUSI2SD %RDX,%XMM11,%XMM2 |
(61) 0x40b58f INC %RCX |
(61) 0x40b592 VMOVSD %XMM1,-0x2c8(%RBP) |
(61) 0x40b59a FLDL -0x2c8(%RBP) |
(61) 0x40b5a0 FMULS 0x8b26a(%RIP) |
(61) 0x40b5a6 VFMADD231SD %XMM2,%XMM1,%XMM0 |
(61) 0x40b5ab FSTPL -0x2c0(%RBP) |
(61) 0x40b5b1 VMOVSD -0x2c0(%RBP),%XMM1 |
(61) 0x40b5b9 LEA 0x1(%RAX),%RDX |
(61) 0x40b5bd CMP %RBX,%RAX |
(61) 0x40b5c0 MOV %RDX,%RAX |
(61) 0x40b5c3 JE 40b790 |
(61) 0x40b5c9 CMP $0x26f,%RCX |
(61) 0x40b5d0 JBE 40b550 |
(61) 0x40b5d6 MOV 0x10(%R15),%RDX |
(61) 0x40b5da XOR %ECX,%ECX |
(61) 0x40b5dc NOPL (%RAX) |
(62) 0x40b5e0 VMOVDQU 0x18(%R15,%RCX,1),%YMM2 |
(62) 0x40b5e7 VPAND %YMM6,%YMM2,%YMM3 |
(62) 0x40b5eb VPBROADCASTQ %RDX,%YMM4 |
(62) 0x40b5f1 VALIGNQ $0x3,%YMM4,%YMM2,%YMM4 |
(62) 0x40b5f8 VPTERNLOGQ $-0x14,%YMM7,%YMM3,%YMM4 |
(62) 0x40b5ff VPSRLQ $0x1,%YMM4,%YMM3 |
(62) 0x40b604 VPTESTMQ %YMM8,%YMM2,%K1 |
(62) 0x40b60a VMOVDQA64 %YMM9,%YMM2{%K1}{z} |
(62) 0x40b610 VPTERNLOGQ $-0x6a,0xc78(%R15,%RCX,1),%YMM3,%YMM2 |
(62) 0x40b61c VMOVDQU %YMM2,0x10(%R15,%RCX,1) |
(62) 0x40b623 MOV 0x30(%R15,%RCX,1),%RDX |
(62) 0x40b628 ADD $0x20,%RCX |
(62) 0x40b62c CMP $0x700,%RCX |
(62) 0x40b633 JNE 40b5e0 |
(61) 0x40b635 MOV 0x718(%R15),%RDX |
(61) 0x40b63c MOV %EDX,%ECX |
(61) 0x40b63e AND $0x7ffffffe,%ECX |
(61) 0x40b644 MOV 0x710(%R15),%RSI |
(61) 0x40b64b AND %R8,%RSI |
(61) 0x40b64e OR %RCX,%RSI |
(61) 0x40b651 SHR $0x1,%RSI |
(61) 0x40b654 XOR 0x1378(%R15),%RSI |
(61) 0x40b65b MOV 0x720(%R15),%RCX |
(61) 0x40b662 MOV %EDX,%EDI |
(61) 0x40b664 AND $0x1,%EDI |
(61) 0x40b667 NEG %EDI |
(61) 0x40b669 AND $-0x66f74f21,%EDI |
(61) 0x40b66f XOR %RSI,%RDI |
(61) 0x40b672 MOV %RDI,0x710(%R15) |
(61) 0x40b679 MOV %ECX,%ESI |
(61) 0x40b67b AND $0x7ffffffe,%ESI |
(61) 0x40b681 AND $-0x80000000,%RDX |
(61) 0x40b688 OR %RSI,%RDX |
(61) 0x40b68b SHR $0x1,%RDX |
(61) 0x40b68e XOR 0x1380(%R15),%RDX |
(61) 0x40b695 MOV %ECX,%ESI |
(61) 0x40b697 AND $0x1,%ESI |
(61) 0x40b69a NEG %ESI |
(61) 0x40b69c AND $-0x66f74f21,%ESI |
(61) 0x40b6a2 XOR %RDX,%RSI |
(61) 0x40b6a5 MOV %RSI,0x718(%R15) |
(61) 0x40b6ac MOV 0x728(%R15),%RDX |
(61) 0x40b6b3 MOV %EDX,%ESI |
(61) 0x40b6b5 AND $0x7ffffffe,%ESI |
(61) 0x40b6bb AND $-0x80000000,%RCX |
(61) 0x40b6c2 OR %RSI,%RCX |
(61) 0x40b6c5 SHR $0x1,%RCX |
(61) 0x40b6c8 XOR 0x1388(%R15),%RCX |
(61) 0x40b6cf AND $0x1,%EDX |
(61) 0x40b6d2 NEG %EDX |
(61) 0x40b6d4 AND $-0x66f74f21,%EDX |
(61) 0x40b6da XOR %RCX,%RDX |
(61) 0x40b6dd MOV %RDX,0x720(%R15) |
(61) 0x40b6e4 XOR %ECX,%ECX |
(61) 0x40b6e6 NOPW %CS:(%RAX,%RAX,1) |
(63) 0x40b6f0 VMOVDQU 0x730(%R15,%RCX,1),%YMM2 |
(63) 0x40b6fa VPAND %YMM6,%YMM2,%YMM3 |
(63) 0x40b6fe VALIGNQ $0x3,0x728(%R15,%RCX,1){1to4},%YMM2,%YMM4 |
(63) 0x40b70a VPTERNLOGQ $-0x14,%YMM7,%YMM3,%YMM4 |
(63) 0x40b711 VPSRLQ $0x1,%YMM4,%YMM3 |
(63) 0x40b716 VPTESTMQ %YMM8,%YMM2,%K1 |
(63) 0x40b71c VMOVDQA64 %YMM9,%YMM2{%K1}{z} |
(63) 0x40b722 VPTERNLOGQ $-0x6a,0x10(%R15,%RCX,1),%YMM3,%YMM2 |
(63) 0x40b72e VMOVDQU %YMM2,0x728(%R15,%RCX,1) |
(63) 0x40b738 ADD $0x20,%RCX |
(63) 0x40b73c CMP $0xc60,%RCX |
(63) 0x40b743 JNE 40b6f0 |
(61) 0x40b745 MOV 0x10(%R15),%RDX |
(61) 0x40b749 MOV %EDX,%ECX |
(61) 0x40b74b AND $0x7ffffffe,%ECX |
(61) 0x40b751 MOV 0x1388(%R15),%RSI |
(61) 0x40b758 AND %R8,%RSI |
(61) 0x40b75b OR %RCX,%RSI |
(61) 0x40b75e SHR $0x1,%RSI |
(61) 0x40b761 XOR 0xc70(%R15),%RSI |
(61) 0x40b768 MOV %EDX,%ECX |
(61) 0x40b76a AND $0x1,%ECX |
(61) 0x40b76d NEG %ECX |
(61) 0x40b76f AND $-0x66f74f21,%ECX |
(61) 0x40b775 XOR %RSI,%RCX |
(61) 0x40b778 MOV %RCX,0x1388(%R15) |
(61) 0x40b77f XOR %ECX,%ECX |
(61) 0x40b781 JMP 40b555 |
0x40b790 MOV %RCX,0x1390(%R15) |
0x40b797 VDIVSD %XMM1,%XMM0,%XMM0 |
0x40b79b VUCOMISD %XMM5,%XMM0 |
0x40b79f JAE 40b8c9 |
0x40b7a5 VSUBSD %XMM10,%XMM5,%XMM1 |
0x40b7aa VMOVSD 0x1398(%R15),%XMM2 |
0x40b7b3 VMOVSD 0x13a0(%R15),%XMM3 |
0x40b7bc VSUBSD %XMM2,%XMM3,%XMM3 |
0x40b7c0 VFMADD213SD %XMM2,%XMM0,%XMM3 |
0x40b7c5 VMOVSD %XMM3,-0x38(%RBP) |
0x40b7ca MOV %R12,%RSP |
0x40b7cd VMOVAPD %XMM1,%XMM0 |
0x40b7d1 VZEROUPPER |
0x40b7d4 CALL 478e10 <log> |
0x40b7d9 VMULSD 0x8a87f(%RIP),%XMM0,%XMM0 |
0x40b7e1 VSQRTSD %XMM0,%XMM0,%XMM0 |
0x40b7e5 VMOVSD %XMM0,-0x48(%RBP) |
0x40b7ea VMOVSD -0x38(%RBP),%XMM0 |
0x40b7ef VMULSD 0x8a871(%RIP),%XMM0,%XMM0 |
0x40b7f7 LEA -0x2b8(%RBP),%RDI |
0x40b7fe LEA -0x2b0(%RBP),%RSI |
0x40b805 CALL 478e90 <sincos> |
0x40b80a VMOVSD 0x8a816(%RIP),%XMM5 |
0x40b812 VMOVSD -0x48(%RBP),%XMM1 |
0x40b817 VMULSD -0x2b0(%RBP),%XMM1,%XMM0 |
0x40b81f MOV %R14,%RAX |
0x40b822 SAL $0x4,%RAX |
0x40b826 MOV -0x90(%RBP),%RCX |
0x40b82d VMOVSD %XMM0,(%RCX,%RAX,1) |
0x40b832 VMULSD -0x2b8(%RBP),%XMM1,%XMM0 |
0x40b83a VMOVSD %XMM0,0x8(%RCX,%RAX,1) |
0x40b840 LEA 0x1(%R14),%RAX |
0x40b844 CMP -0x228(%RBP),%R14 |
0x40b84b MOV %RAX,%R14 |
0x40b84e VPBROADCASTQ 0x8a7e1(%RIP),%YMM6 |
0x40b857 VPBROADCASTQ 0x8a7e0(%RIP),%YMM7 |
0x40b860 VPBROADCASTQ 0x8a7df(%RIP),%YMM8 |
0x40b869 VPBROADCASTQ 0x8a7de(%RIP),%YMM9 |
0x40b872 MOV $-0x80000000,%R8 |
0x40b879 JNE 40b290 |
0x40b881 VXORPD %XMM1,%XMM1,%XMM1 |
0x40b885 VMOVAPD %XMM5,%XMM0 |
0x40b889 VZEROUPPER |
0x40b88c CALL 478e60 <nextafter> |
0x40b891 MOV $-0x80000000,%R8 |
0x40b898 VPBROADCASTQ 0x8a7af(%RIP),%YMM9 |
0x40b8a1 VPBROADCASTQ 0x8a79e(%RIP),%YMM8 |
0x40b8aa VPBROADCASTQ 0x8a78d(%RIP),%YMM7 |
0x40b8b3 VPBROADCASTQ 0x8a77c(%RIP),%YMM6 |
0x40b8bc VMOVSD 0x8a764(%RIP),%XMM5 |
0x40b8c4 JMP 40b505 |
0x40b8c9 VXORPD %XMM1,%XMM1,%XMM1 |
0x40b8cd VMOVAPD %XMM5,%XMM0 |
0x40b8d1 VMOVSD %XMM10,-0x38(%RBP) |
0x40b8d6 VZEROUPPER |
0x40b8d9 CALL 478e60 <nextafter> |
0x40b8de VMOVSD -0x38(%RBP),%XMM10 |
0x40b8e3 VMOVSD 0x8a73d(%RIP),%XMM5 |
0x40b8eb JMP 40b7a5 |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/random.tcc: 404 - 3371 |
-------------------------------------------------------------------------------- |
404: for (size_t __k = 0; __k < (__n - __m); ++__k) |
405: { |
406: _UIntType __y = ((_M_x[__k] & __upper_mask) |
407: | (_M_x[__k + 1] & __lower_mask)); |
408: _M_x[__k] = (_M_x[__k + __m] ^ (__y >> 1) |
409: ^ ((__y & 0x01) ? __a : 0)); |
410: } |
411: |
412: for (size_t __k = (__n - __m); __k < (__n - 1); ++__k) |
413: { |
414: _UIntType __y = ((_M_x[__k] & __upper_mask) |
415: | (_M_x[__k + 1] & __lower_mask)); |
416: _M_x[__k] = (_M_x[__k + (__m - __n)] ^ (__y >> 1) |
417: ^ ((__y & 0x01) ? __a : 0)); |
418: } |
419: |
420: _UIntType __y = ((_M_x[__n - 1] & __upper_mask) |
421: | (_M_x[0] & __lower_mask)); |
422: _M_x[__n - 1] = (_M_x[__m - 1] ^ (__y >> 1) |
423: ^ ((__y & 0x01) ? __a : 0)); |
[...] |
458: if (_M_p >= state_size) |
459: _M_gen_rand(); |
460: |
461: // Calculate o(x(i)). |
462: result_type __z = _M_x[_M_p++]; |
463: __z ^= (__z >> __u) & __d; |
464: __z ^= (__z << __s) & __b; |
465: __z ^= (__z << __t) & __c; |
466: __z ^= (__z >> __l); |
[...] |
3357: const size_t __m = std::max<size_t>(1UL, |
3358: (__b + __log2r - 1UL) / __log2r); |
3359: _RealType __ret; |
3360: _RealType __sum = _RealType(0); |
3361: _RealType __tmp = _RealType(1); |
3362: for (size_t __k = __m; __k != 0; --__k) |
3363: { |
3364: __sum += _RealType(__urng() - __urng.min()) * __tmp; |
3365: __tmp *= __r; |
3366: } |
3367: __ret = __sum / __tmp; |
3368: if (__builtin_expect(__ret >= _RealType(1), 0)) |
3369: { |
3370: #if _GLIBCXX_USE_C99_MATH_TR1 |
3371: __ret = std::nextafter(_RealType(1), _RealType(0)); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Utilities/RandomGenerator.h: 51 - 55 |
-------------------------------------------------------------------------------- |
51: for (int i = 0; i + 1 < n; i += 2) |
52: { |
53: double temp1 = 1.0 - 0.9999999999 * rng(), temp2 = rng(); |
54: a[i] = sqrt(-2.0 * log(temp1)) * cos(6.283185306 * temp2); |
55: a[i + 1] = sqrt(-2.0 * log(temp1)) * sin(6.283185306 * temp2); |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/random.h: 1804 - 1909 |
-------------------------------------------------------------------------------- |
1804: { return _M_a; } |
1805: |
1806: result_type |
1807: b() const |
1808: { return _M_b; } |
[...] |
1900: { return this->operator()(__urng, _M_param); } |
[...] |
1909: return (__aurng() * (__p.b() - __p.a())) + __p.a(); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.47 |
CQA speedup if FP arith vectorized | 2.00 |
CQA speedup if fully vectorized | 3.76 - 3.36 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | NA |
Bottlenecks | micro-operation queue, |
Function | main.extracted.104 |
Source | random.tcc:404-3371,RandomGenerator.h:51-55,random.h:1804-1909 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 23.50 |
CQA cycles if no scalar integer | 16.00 |
CQA cycles if FP arith vectorized | 11.75 |
CQA cycles if fully vectorized | 6.25 - 7.00 |
Front-end cycles | 23.50 |
DIV/SQRT cycles | 9.50 |
P0 cycles | 8.50 |
P1 cycles | 13.50 |
P2 cycles | 13.50 |
P3 cycles | 11.00 |
P4 cycles | 6.00 |
P5 cycles | 9.00 |
P6 cycles | 11.00 |
P7 cycles | 12.50 - 14.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 20.70 - 21.49 |
Stall cycles (UFS) | 0.00 - 0.74 |
Nb insns | 81.00 |
Nb uops | 94.00 |
Nb loads | 27.00 |
Nb stores | 7.00 |
Nb stack references | 6.00 |
FLOP/cycle | 0.64 |
Nb FLOP add-sub | 3.00 |
Nb FLOP mul | 5.00 |
Nb FLOP fma | 2.00 |
Nb FLOP div | 2.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 11.57 |
Bytes prefetched | 0.00 |
Bytes loaded | 216.00 |
Bytes stored | 56.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 20.69 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 42.86 |
Vector-efficiency ratio all | 14.87 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 17.41 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.47 |
CQA speedup if FP arith vectorized | 2.00 |
CQA speedup if fully vectorized | 3.76 - 3.36 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | NA |
Bottlenecks | micro-operation queue, |
Function | main.extracted.104 |
Source | random.tcc:404-3371,RandomGenerator.h:51-55,random.h:1804-1909 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 23.50 |
CQA cycles if no scalar integer | 16.00 |
CQA cycles if FP arith vectorized | 11.75 |
CQA cycles if fully vectorized | 6.25 - 7.00 |
Front-end cycles | 23.50 |
DIV/SQRT cycles | 9.50 |
P0 cycles | 8.50 |
P1 cycles | 13.50 |
P2 cycles | 13.50 |
P3 cycles | 11.00 |
P4 cycles | 6.00 |
P5 cycles | 9.00 |
P6 cycles | 11.00 |
P7 cycles | 12.50 - 14.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 20.70 - 21.49 |
Stall cycles (UFS) | 0.00 - 0.74 |
Nb insns | 81.00 |
Nb uops | 94.00 |
Nb loads | 27.00 |
Nb stores | 7.00 |
Nb stack references | 6.00 |
FLOP/cycle | 0.64 |
Nb FLOP add-sub | 3.00 |
Nb FLOP mul | 5.00 |
Nb FLOP fma | 2.00 |
Nb FLOP div | 2.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 11.57 |
Bytes prefetched | 0.00 |
Bytes loaded | 216.00 |
Bytes stored | 56.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 20.69 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 42.86 |
Vector-efficiency ratio all | 14.87 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 17.41 |
Path / |
Function | main.extracted.104 |
Source file and lines | RandomGenerator.h:51-55 |
Module | exec |
nb instructions | 81 |
nb uops | 94 |
loop length | 459 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 4 |
used zmm registers | 0 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 0.60 |
micro-operation queue | 23.50 cycles |
front end | 23.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 9.50 | 8.50 | 13.50 | 13.50 | 11.00 | 6.00 | 9.00 | 11.00 |
cycles | 9.50 | 8.50 | 13.50 | 13.50 | 11.00 | 6.00 | 9.00 | 11.00 |
Cycles executing div or sqrt instructions | 12.50-14.00 |
FE+BE cycles | 20.70-21.49 |
Stall cycles | 0.00-0.74 |
Front-end | 23.50 |
Dispatch | 13.50 |
DIV/SQRT | 12.50-14.00 |
Overall L1 | 23.50 |
all | 15% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 17% |
all | 23% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 81% |
all | 20% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 42% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 15% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 22% |
all | 14% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOV %RSP,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x1390(%R15),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40b329 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RCX,0x1390(%R15) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VDIVSD %XMM1,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VUCOMISD %XMM5,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JAE 40b881 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0x1398(%R15),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x13a0(%R15),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM1,%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM1,%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSP,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULSD 0x8ab2a(%RIP),%XMM2,%XMM10 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x1390(%R15),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40b5c9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RCX,0x1390(%R15) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VDIVSD %XMM1,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VUCOMISD %XMM5,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JAE 40b8c9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VSUBSD %XMM10,%XMM5,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0x1398(%R15),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x13a0(%R15),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM2,%XMM3,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM2,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM3,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R12,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 478e10 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMULSD 0x8a87f(%RIP),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTSD %XMM0,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50-6 |
VMOVSD %XMM0,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD -0x38(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULSD 0x8a871(%RIP),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA -0x2b8(%RBP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x2b0(%RBP),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 478e90 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD 0x8a816(%RIP),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD -0x48(%RBP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULSD -0x2b0(%RBP),%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x4,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD %XMM0,(%RCX,%RAX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMULSD -0x2b8(%RBP),%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0x8(%RCX,%RAX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x1(%R14),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP -0x228(%RBP),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ 0x8a7e1(%RIP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a7e0(%RIP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a7df(%RIP),%YMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a7de(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV $-0x80000000,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 40b290 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 478e60 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $-0x80000000,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPBROADCASTQ 0x8a7af(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a79e(%RIP),%YMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a78d(%RIP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a77c(%RIP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x8a764(%RIP),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 40b505 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD %XMM10,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 478e60 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD -0x38(%RBP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x8a73d(%RIP),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 40b7a5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
Function | main.extracted.104 |
Source file and lines | RandomGenerator.h:51-55 |
Module | exec |
nb instructions | 81 |
nb uops | 94 |
loop length | 459 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 4 |
used zmm registers | 0 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 0.60 |
micro-operation queue | 23.50 cycles |
front end | 23.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 9.50 | 8.50 | 13.50 | 13.50 | 11.00 | 6.00 | 9.00 | 11.00 |
cycles | 9.50 | 8.50 | 13.50 | 13.50 | 11.00 | 6.00 | 9.00 | 11.00 |
Cycles executing div or sqrt instructions | 12.50-14.00 |
FE+BE cycles | 20.70-21.49 |
Stall cycles | 0.00-0.74 |
Front-end | 23.50 |
Dispatch | 13.50 |
DIV/SQRT | 12.50-14.00 |
Overall L1 | 23.50 |
all | 15% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 17% |
all | 23% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 81% |
all | 20% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 42% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 15% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 22% |
all | 14% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOV %RSP,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x1390(%R15),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40b329 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RCX,0x1390(%R15) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VDIVSD %XMM1,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VUCOMISD %XMM5,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JAE 40b881 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0x1398(%R15),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x13a0(%R15),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM1,%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM1,%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSP,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULSD 0x8ab2a(%RIP),%XMM2,%XMM10 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x1390(%R15),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40b5c9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RCX,0x1390(%R15) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VDIVSD %XMM1,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VUCOMISD %XMM5,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JAE 40b8c9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VSUBSD %XMM10,%XMM5,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0x1398(%R15),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x13a0(%R15),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM2,%XMM3,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM2,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM3,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R12,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 478e10 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMULSD 0x8a87f(%RIP),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTSD %XMM0,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50-6 |
VMOVSD %XMM0,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD -0x38(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULSD 0x8a871(%RIP),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA -0x2b8(%RBP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x2b0(%RBP),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 478e90 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD 0x8a816(%RIP),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD -0x48(%RBP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULSD -0x2b0(%RBP),%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x4,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD %XMM0,(%RCX,%RAX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMULSD -0x2b8(%RBP),%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0x8(%RCX,%RAX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x1(%R14),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP -0x228(%RBP),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ 0x8a7e1(%RIP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a7e0(%RIP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a7df(%RIP),%YMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a7de(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV $-0x80000000,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 40b290 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 478e60 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $-0x80000000,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPBROADCASTQ 0x8a7af(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a79e(%RIP),%YMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a78d(%RIP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x8a77c(%RIP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x8a764(%RIP),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 40b505 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD %XMM10,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 478e60 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD -0x38(%RBP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x8a73d(%RIP),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 40b7a5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |