Function: miniqmcreference::TwoBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::ratioGrad(qmcpl ... | Module: exec | Source: TwoBodyJastrowRef.h:148-303 [...] | Coverage: 0.66% |
---|
Function: miniqmcreference::TwoBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::ratioGrad(qmcpl ... | Module: exec | Source: TwoBodyJastrowRef.h:148-303 [...] | Coverage: 0.66% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 148 - 303 |
-------------------------------------------------------------------------------- |
148: for (int idim = 0; idim < OHMMS_DIM; ++idim) |
149: { |
150: const valT* restrict dX = displ.data(idim); |
151: valT s = valT(); |
152: |
153: for (int jat = 0; jat < N; ++jat) |
154: s += du[jat] * dX[jat]; |
155: grad[idim] = s; |
[...] |
259: const int jelmax = triangle ? iat : N; |
[...] |
265: const int igt = P.GroupID[iat] * NumGroups; |
266: for (int jg = 0; jg < NumGroups; ++jg) |
267: { |
268: const FuncType& f2(*F[igt + jg]); |
269: int iStart = P.first(jg); |
270: int iEnd = std::min(jelmax, P.last(jg)); |
271: f2.evaluateVGL(iat, iStart, iEnd, dist, u, du, d2u, DistCompressed.data(), DistIndice.data()); |
[...] |
296: { |
297: UpdateMode = ORB_PBYP_PARTIAL; |
298: |
299: computeU3(P, iat, P.DistTables[0]->Temp_r.data(), cur_u.data(), cur_du.data(), cur_d2u.data()); |
300: cur_Uat = std::accumulate(cur_u.begin(), cur_u.begin() + N, valT()); |
301: DiffVal = Uat[iat] - cur_Uat; |
302: grad_iat += accumulateG(cur_du.data(), P.DistTables[0]->Temp_dr); |
303: return std::exp(DiffVal); |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_iterator.h: 1077 - 1244 |
-------------------------------------------------------------------------------- |
1077: : _M_current(__i) { } |
[...] |
1244: { return __lhs.base() != __rhs.base(); } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 302 - 305 |
-------------------------------------------------------------------------------- |
302: inline int first(int igroup) const { return SubPtcl[igroup]; } |
303: |
304: /// return the last index of a group i |
305: inline int last(int igroup) const { return SubPtcl[igroup + 1]; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 94 - 183 |
-------------------------------------------------------------------------------- |
94: (const_cast<T1&>(a) += b); |
[...] |
183: return (const_cast<T1&>(a) = b); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 229 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_numeric.h: 140 - 141 |
-------------------------------------------------------------------------------- |
140: for (; __first != __last; ++__first) |
141: __init = _GLIBCXX_MOVE_IF_20(__init) + *__first; |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 243 - 243 |
-------------------------------------------------------------------------------- |
243: const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_algobase.h: 930 - 1123 |
-------------------------------------------------------------------------------- |
930: for (; __first != __last; ++__first) |
931: *__first = __tmp; |
[...] |
1123: if (__n <= 0) |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_vector.h: 1126 - 1258 |
-------------------------------------------------------------------------------- |
1126: return *(this->_M_impl._M_start + __n); |
[...] |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
0x41c720 PUSH %RBP |
0x41c721 MOV %RSP,%RBP |
0x41c724 PUSH %R15 |
0x41c726 PUSH %R14 |
0x41c728 PUSH %R13 |
0x41c72a PUSH %R12 |
0x41c72c PUSH %RBX |
0x41c72d SUB $0x58,%RSP |
0x41c731 MOV %EDX,%R15D |
0x41c734 MOV %RSI,%R13 |
0x41c737 MOV %RDI,%RBX |
0x41c73a MOVL $0x2,0xc(%RDI) |
0x41c741 MOV 0xa10(%RSI),%RAX |
0x41c748 MOV (%RAX),%RAX |
0x41c74b MOV 0x68(%RAX),%R8 |
0x41c74f MOV 0x90(%RDI),%R14 |
0x41c756 MOV 0x140(%RDI),%R12 |
0x41c75d MOV 0x158(%RDI),%R10 |
0x41c764 MOV 0x170(%RDI),%R11 |
0x41c76b TEST %R14D,%R14D |
0x41c76e MOV %R10,-0x50(%RBP) |
0x41c772 MOV %R11,-0x48(%RBP) |
0x41c776 MOV %RCX,-0x58(%RBP) |
0x41c77a JLE 41c7d7 |
0x41c77c MOV $-0x1,%EAX |
0x41c781 AND %R14,%RAX |
0x41c784 JE 41c7d7 |
0x41c786 SAL $0x3,%RAX |
0x41c78a MOV %R12,%RDI |
0x41c78d XOR %ESI,%ESI |
0x41c78f MOV %RAX,%RDX |
0x41c792 MOV %RAX,-0x30(%RBP) |
0x41c796 MOV %R15D,-0x40(%RBP) |
0x41c79a MOV %R8,-0x38(%RBP) |
0x41c79e CALL 47ebf0 <_intel_fast_memset> |
0x41c7a3 MOV -0x50(%RBP),%RDI |
0x41c7a7 XOR %ESI,%ESI |
0x41c7a9 MOV -0x30(%RBP),%R15 |
0x41c7ad MOV %R15,%RDX |
0x41c7b0 CALL 47ebf0 <_intel_fast_memset> |
0x41c7b5 MOV -0x48(%RBP),%RDI |
0x41c7b9 XOR %ESI,%ESI |
0x41c7bb MOV %R15,%RDX |
0x41c7be CALL 47ebf0 <_intel_fast_memset> |
0x41c7c3 MOV -0x48(%RBP),%R11 |
0x41c7c7 MOV -0x50(%RBP),%R10 |
0x41c7cb MOV -0x38(%RBP),%R8 |
0x41c7cf MOV -0x40(%RBP),%R15D |
0x41c7d3 MOV -0x58(%RBP),%RCX |
0x41c7d7 MOVSXD %R15D,%RSI |
0x41c7da MOV 0xa0(%RBX),%RAX |
0x41c7e1 TEST %RAX,%RAX |
0x41c7e4 JE 41c890 |
0x41c7ea MOV 0x5c0(%R13),%RCX |
0x41c7f1 MOV %RSI,-0x60(%RBP) |
0x41c7f5 IMUL (%RCX,%RSI,4),%EAX |
0x41c7f9 CLTQ |
0x41c7fb SAL $0x3,%RAX |
0x41c7ff MOV %RAX,-0x40(%RBP) |
0x41c803 XOR %EAX,%EAX |
0x41c805 MOV %R14,-0x38(%RBP) |
0x41c809 MOV %R12,-0x30(%RBP) |
0x41c80d NOPL (%RAX) |
(243) 0x41c810 MOV 0x200(%RBX),%RCX |
(243) 0x41c817 ADD -0x40(%RBP),%RCX |
(243) 0x41c81b MOV (%RCX,%RAX,8),%RDI |
(243) 0x41c81f MOV 0xa98(%R13),%RCX |
(243) 0x41c826 MOV %R13,%R14 |
(243) 0x41c829 LEA 0x1(%RAX),%R13 |
(243) 0x41c82d MOV (%RCX,%RAX,4),%EDX |
(243) 0x41c830 MOV 0x4(%RCX,%RAX,4),%ECX |
(243) 0x41c834 MOV -0x38(%RBP),%RAX |
(243) 0x41c838 CMP %EAX,%ECX |
(243) 0x41c83a CMOVGE %EAX,%ECX |
(243) 0x41c83d MOV %R15D,%ESI |
(243) 0x41c840 MOV %R8,%R12 |
(243) 0x41c843 MOV -0x30(%RBP),%R9 |
(243) 0x41c847 PUSHQ 0x1e8(%RBX) |
(243) 0x41c84d PUSHQ 0x1d0(%RBX) |
(243) 0x41c853 PUSH %R11 |
(243) 0x41c855 PUSH %R10 |
(243) 0x41c857 CALL 41aa40 <_ZNK11qmcplusplus14BsplineFunctorIdE11evaluateVGLEiiiPKdPdS4_S4_S4_Pi> |
(243) 0x41c85c MOV -0x48(%RBP),%R11 |
(243) 0x41c860 MOV -0x50(%RBP),%R10 |
(243) 0x41c864 MOV %R12,%R8 |
(243) 0x41c867 ADD $0x20,%RSP |
(243) 0x41c86b MOV %R13,%RAX |
(243) 0x41c86e CMP %R13,0xa0(%RBX) |
(243) 0x41c875 MOV %R14,%R13 |
(243) 0x41c878 JA 41c810 |
0x41c87a MOV 0x90(%RBX),%R14 |
0x41c881 MOV 0x140(%RBX),%R12 |
0x41c888 MOV -0x58(%RBP),%RCX |
0x41c88c MOV -0x60(%RBP),%RSI |
0x41c890 TEST %R14,%R14 |
0x41c893 JE 41c906 |
0x41c895 LEA -0x1(%R14),%RAX |
0x41c899 MOV $0x3d,%DL |
0x41c89b BZHI %RDX,%RAX,%RDI |
0x41c8a0 INC %RDI |
0x41c8a3 MOV $0x3ffffffffffffff0,%RAX |
0x41c8ad AND %RDI,%RAX |
0x41c8b0 JE 41c90c |
0x41c8b2 VXORPD %XMM0,%XMM0,%XMM0 |
0x41c8b6 XOR %EDX,%EDX |
0x41c8b8 VXORPD %XMM1,%XMM1,%XMM1 |
0x41c8bc NOPL (%RAX) |
(242) 0x41c8c0 VADDPD (%R12,%RDX,8),%ZMM0,%ZMM0 |
(242) 0x41c8c7 VADDPD 0x40(%R12,%RDX,8),%ZMM1,%ZMM1 |
(242) 0x41c8cf ADD $0x10,%RDX |
(242) 0x41c8d3 CMP %RAX,%RDX |
(242) 0x41c8d6 JB 41c8c0 |
0x41c8d8 VADDPD %ZMM1,%ZMM0,%ZMM0 |
0x41c8de VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 |
0x41c8e5 VADDPD %ZMM1,%ZMM0,%ZMM0 |
0x41c8eb VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x41c8f1 VADDPD %XMM1,%XMM0,%XMM0 |
0x41c8f5 VPERMILPD $0x1,%XMM0,%XMM1 |
0x41c8fb VADDSD %XMM1,%XMM0,%XMM0 |
0x41c8ff CMP %RAX,%RDI |
0x41c902 JNE 41c912 |
0x41c904 JMP 41c97a |
0x41c906 VXORPD %XMM0,%XMM0,%XMM0 |
0x41c90a JMP 41c97a |
0x41c90c XOR %EAX,%EAX |
0x41c90e VXORPD %XMM0,%XMM0,%XMM0 |
0x41c912 VPBROADCASTQ %RDI,%ZMM1 |
0x41c918 VPBROADCASTQ %RAX,%ZMM2 |
0x41c91e VPORQ 0x72f98(%RIP),%ZMM2,%ZMM3 |
0x41c928 VPORQ 0x72fce(%RIP),%ZMM2,%ZMM2 |
0x41c932 VPCMPLTUQ %ZMM1,%ZMM2,%K1 |
0x41c939 VPCMPLTUQ %ZMM1,%ZMM3,%K2 |
0x41c940 VMOVUPD 0x40(%R12,%RAX,8),%ZMM1{%K2}{z} |
0x41c948 VMOVUPD (%R12,%RAX,8),%ZMM2{%K1}{z} |
0x41c94f VADDPD %ZMM1,%ZMM2,%ZMM1 |
0x41c955 VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 |
0x41c95c VADDPD %ZMM2,%ZMM1,%ZMM1 |
0x41c962 VEXTRACTF128 $0x1,%YMM1,%XMM2 |
0x41c968 VADDPD %XMM2,%XMM1,%XMM1 |
0x41c96c VPERMILPD $0x1,%XMM1,%XMM2 |
0x41c972 VADDSD %XMM2,%XMM1,%XMM1 |
0x41c976 VADDSD %XMM1,%XMM0,%XMM0 |
0x41c97a VMOVSD %XMM0,0x138(%RBX) |
0x41c982 MOV 0xd8(%RBX),%RAX |
0x41c989 VMOVSD (%RAX,%RSI,8),%XMM1 |
0x41c98e VSUBSD %XMM0,%XMM1,%XMM0 |
0x41c992 VMOVSD %XMM0,0xb0(%RBX) |
0x41c99a MOV 0x158(%RBX),%RAX |
0x41c9a1 MOV 0xa10(%R13),%RDX |
0x41c9a8 MOV (%RDX),%RDX |
0x41c9ab VXORPD %XMM1,%XMM1,%XMM1 |
0x41c9af VMOVUPD %XMM1,-0x80(%RBP) |
0x41c9b4 MOVQ $0,-0x70(%RBP) |
0x41c9bc MOV 0x88(%RDX),%R9 |
0x41c9c3 MOV 0x98(%RDX),%R8 |
0x41c9ca MOV %R14,%R11 |
0x41c9cd AND $-0x8,%R11 |
0x41c9d1 LEA -0x1(%R11),%RDI |
0x41c9d5 VPBROADCASTQ %R14,%ZMM1 |
0x41c9db LEA (,%R9,8),%R10 |
0x41c9e3 XOR %EDX,%EDX |
0x41c9e5 VMOVDQU64 0x72f11(%RIP),%ZMM2 |
0x41c9ef MOV %R8,%RBX |
0x41c9f2 JMP 41ca1e |
0x41c9f4 NOPW %CS:(%RAX,%RAX,1) |
(240) 0x41ca00 VXORPD %XMM5,%XMM5,%XMM5 |
(240) 0x41ca04 VMOVSD %XMM5,-0x80(%RBP,%RDX,8) |
(240) 0x41ca0a LEA 0x1(%RDX),%RSI |
(240) 0x41ca0e ADD %R10,%RBX |
(240) 0x41ca11 CMP $0x2,%RDX |
(240) 0x41ca15 MOV %RSI,%RDX |
(240) 0x41ca18 JE 41caf6 |
(240) 0x41ca1e TEST %R14,%R14 |
(240) 0x41ca21 JE 41ca00 |
(240) 0x41ca23 MOV %R9,%R15 |
(240) 0x41ca26 IMUL %RDX,%R15 |
(240) 0x41ca2a TEST %R11,%R11 |
(240) 0x41ca2d JE 41ca90 |
(240) 0x41ca2f VXORPD %XMM5,%XMM5,%XMM5 |
(240) 0x41ca33 XOR %ESI,%ESI |
(240) 0x41ca35 NOPW %CS:(%RAX,%RAX,1) |
(241) 0x41ca40 VMOVUPD (%RBX,%RSI,8),%ZMM6 |
(241) 0x41ca47 VFMADD231PD (%RAX,%RSI,8),%ZMM6,%ZMM5 |
(241) 0x41ca4e ADD $0x8,%RSI |
(241) 0x41ca52 CMP %RDI,%RSI |
(241) 0x41ca55 JLE 41ca40 |
(240) 0x41ca57 VEXTRACTF64X4 $0x1,%ZMM5,%YMM6 |
(240) 0x41ca5e VADDPD %ZMM6,%ZMM5,%ZMM5 |
(240) 0x41ca64 VEXTRACTF128 $0x1,%YMM5,%XMM6 |
(240) 0x41ca6a VADDPD %XMM6,%XMM5,%XMM5 |
(240) 0x41ca6e VPERMILPD $0x1,%XMM5,%XMM6 |
(240) 0x41ca74 VADDSD %XMM6,%XMM5,%XMM5 |
(240) 0x41ca78 MOV %R11,%RSI |
(240) 0x41ca7b CMP %R11,%R14 |
(240) 0x41ca7e JE 41ca04 |
(240) 0x41ca80 JMP 41ca96 |
0x41ca82 NOPW %CS:(%RAX,%RAX,1) |
(240) 0x41ca90 VXORPD %XMM5,%XMM5,%XMM5 |
(240) 0x41ca94 XOR %ESI,%ESI |
(240) 0x41ca96 VPBROADCASTQ %RSI,%ZMM6 |
(240) 0x41ca9c VPORQ %ZMM2,%ZMM6,%ZMM6 |
(240) 0x41caa2 VPCMPLTUQ %ZMM1,%ZMM6,%K1 |
(240) 0x41caa9 ADD %RSI,%R15 |
(240) 0x41caac VMOVUPD (%R8,%R15,8),%ZMM6{%K1}{z} |
(240) 0x41cab3 VMOVAPD %ZMM6,%ZMM4{%K1} |
(240) 0x41cab9 VMOVUPD (%RAX,%RSI,8),%ZMM7{%K1}{z} |
(240) 0x41cac0 VMOVAPD %ZMM7,%ZMM3{%K1} |
(240) 0x41cac6 VMULPD %ZMM7,%ZMM6,%ZMM6{%K1}{z} |
(240) 0x41cacc VEXTRACTF64X4 $0x1,%ZMM6,%YMM7 |
(240) 0x41cad3 VADDPD %ZMM7,%ZMM6,%ZMM6 |
(240) 0x41cad9 VEXTRACTF128 $0x1,%YMM6,%XMM7 |
(240) 0x41cadf VADDPD %XMM7,%XMM6,%XMM6 |
(240) 0x41cae3 VPERMILPD $0x1,%XMM6,%XMM7 |
(240) 0x41cae9 VADDSD %XMM7,%XMM6,%XMM6 |
(240) 0x41caed VADDSD %XMM6,%XMM5,%XMM5 |
(240) 0x41caf1 JMP 41ca04 |
0x41caf6 VMOVUPD (%RCX),%XMM1 |
0x41cafa VADDPD -0x80(%RBP),%XMM1,%XMM1 |
0x41caff VMOVUPD %XMM1,(%RCX) |
0x41cb03 VMOVSD 0x10(%RCX),%XMM1 |
0x41cb08 VADDSD -0x70(%RBP),%XMM1,%XMM1 |
0x41cb0d VMOVSD %XMM1,0x10(%RCX) |
0x41cb12 ADD $0x58,%RSP |
0x41cb16 POP %RBX |
0x41cb17 POP %R12 |
0x41cb19 POP %R13 |
0x41cb1b POP %R14 |
0x41cb1d POP %R15 |
0x41cb1f POP %RBP |
0x41cb20 VZEROUPPER |
0x41cb23 JMP 4703c0 |
0x41cb28 NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.73+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:207 | exec |
○ | main.extracted.104 | stl_vector.h:1126 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►1.27+ | main.extracted.104 | stl_vector.h:1126 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | TwoBodyJastrowRef.h:148-303 |
Module | exec |
nb instructions | 150 |
nb uops | 154 |
loop length | 678 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 3 |
used zmm registers | 4 |
nb stack references | 9 |
micro-operation queue | 38.50 cycles |
front end | 38.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 14.50 | 14.50 | 23.33 | 23.33 | 26.00 | 14.50 | 14.50 | 23.33 |
cycles | 14.50 | 14.50 | 23.33 | 23.33 | 26.00 | 14.50 | 14.50 | 23.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 37.40 |
Stall cycles | 0.00 |
Front-end | 38.50 |
Dispatch | 26.00 |
Overall L1 | 38.50 |
all | 17% |
load | 30% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 35% |
all | 69% |
load | 57% |
store | 40% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 58% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 43% |
load | 41% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 58% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 64% |
all | 24% |
load | 38% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 37% |
all | 36% |
load | 41% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 44% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 29% |
all | 30% |
load | 39% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 44% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 34% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %EDX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVL $0x2,0xc(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV 0xa10(%RSI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x68(%RAX),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x90(%RDI),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RDI),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x158(%RDI),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x170(%RDI),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R14D,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R10,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JLE 41c7d7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $-0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %R14,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41c7d7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R15D,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 47ebf0 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x30(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 47ebf0 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x48(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 47ebf0 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x40(%RBP),%R15D | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %R15D,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0xa0(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %RAX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41c890 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x5c0(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
IMUL (%RCX,%RSI,4),%EAX | 1 | 0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
CLTQ | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R12,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x90(%RBX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RBX),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x60(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R14,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41c906 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x1(%R14),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x3d,%DL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
BZHI %RDX,%RAX,%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3ffffffffffffff0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %RDI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41c90c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 41c912 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 41c97a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 41c97a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RDI,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RAX,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPORQ 0x72f98(%RIP),%ZMM2,%ZMM3 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPORQ 0x72fce(%RIP),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %ZMM1,%ZMM2,%K1 | |||||||||||
VPCMPLTUQ %ZMM1,%ZMM3,%K2 | |||||||||||
VMOVUPD 0x40(%R12,%RAX,8),%ZMM1{%K2}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD (%R12,%RAX,8),%ZMM2{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM1,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0x138(%RBX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd8(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%RAX,%RSI,8),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM0,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0xb0(%RBX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x158(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xa10(%R13),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %XMM1,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVQ $0,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV 0x88(%RDX),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x98(%RDX),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R14,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R11),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R14,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
LEA (,%R9,8),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQU64 0x72f11(%RIP),%ZMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 41ca1e | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD (%RCX),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDPD -0x80(%RBP),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM1,(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0x10(%RCX),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD -0x70(%RBP),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM1,0x10(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 4703c0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | TwoBodyJastrowRef.h:148-303 |
Module | exec |
nb instructions | 150 |
nb uops | 154 |
loop length | 678 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 3 |
used zmm registers | 4 |
nb stack references | 9 |
micro-operation queue | 38.50 cycles |
front end | 38.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 14.50 | 14.50 | 23.33 | 23.33 | 26.00 | 14.50 | 14.50 | 23.33 |
cycles | 14.50 | 14.50 | 23.33 | 23.33 | 26.00 | 14.50 | 14.50 | 23.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 37.40 |
Stall cycles | 0.00 |
Front-end | 38.50 |
Dispatch | 26.00 |
Overall L1 | 38.50 |
all | 17% |
load | 30% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 35% |
all | 69% |
load | 57% |
store | 40% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 58% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 43% |
load | 41% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 58% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 64% |
all | 24% |
load | 38% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 37% |
all | 36% |
load | 41% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 44% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 29% |
all | 30% |
load | 39% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 44% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 34% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %EDX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVL $0x2,0xc(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV 0xa10(%RSI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x68(%RAX),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x90(%RDI),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RDI),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x158(%RDI),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x170(%RDI),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R14D,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R10,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JLE 41c7d7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $-0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %R14,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41c7d7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R15D,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 47ebf0 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x30(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 47ebf0 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x48(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 47ebf0 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x40(%RBP),%R15D | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %R15D,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0xa0(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %RAX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41c890 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x5c0(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
IMUL (%RCX,%RSI,4),%EAX | 1 | 0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
CLTQ | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R12,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x90(%RBX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RBX),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x60(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R14,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41c906 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x1(%R14),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x3d,%DL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
BZHI %RDX,%RAX,%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3ffffffffffffff0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %RDI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41c90c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 41c912 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 41c97a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 41c97a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RDI,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RAX,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPORQ 0x72f98(%RIP),%ZMM2,%ZMM3 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPORQ 0x72fce(%RIP),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %ZMM1,%ZMM2,%K1 | |||||||||||
VPCMPLTUQ %ZMM1,%ZMM3,%K2 | |||||||||||
VMOVUPD 0x40(%R12,%RAX,8),%ZMM1{%K2}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD (%R12,%RAX,8),%ZMM2{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM1,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0x138(%RBX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd8(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%RAX,%RSI,8),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM0,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0xb0(%RBX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x158(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xa10(%R13),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %XMM1,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVQ $0,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV 0x88(%RDX),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x98(%RDX),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R14,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R11),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R14,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
LEA (,%R9,8),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQU64 0x72f11(%RIP),%ZMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 41ca1e | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD (%RCX),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDPD -0x80(%RBP),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM1,(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0x10(%RCX),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD -0x70(%RBP),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM1,0x10(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 4703c0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::TwoBodyJastrowRef | 0.66 | 0.39 |
○Loop 242 - stl_numeric.h:140-141 - exec | 0.06 | 0.04 |
○Loop 243 - TwoBodyJastrowRef.h:266-271 - exec | 0.02 | 0.01 |
▼Loop 240 - TwoBodyJastrowRef.h:148-155 - exec– | 0.01 | 0 |
○Loop 241 - TwoBodyJastrowRef.h:153-154 - exec | 0.52 | 0.31 |