Function: miniqmcreference::OneBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::evaluateGL(qmcp ... | Module: exec | Source: OneBodyJastrowRef.h:112-198 [...] | Coverage: 0.03% |
---|
Function: miniqmcreference::OneBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::evaluateGL(qmcp ... | Module: exec | Source: OneBodyJastrowRef.h:112-198 [...] | Coverage: 0.03% |
---|
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_iterator.h: 1077 - 1244 |
-------------------------------------------------------------------------------- |
1077: : _M_current(__i) { } |
[...] |
1244: { return __lhs.base() != __rhs.base(); } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 49 - 49 |
-------------------------------------------------------------------------------- |
49: for (unsigned d = 0; d < D; ++d) |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 214 - 214 |
-------------------------------------------------------------------------------- |
214: inline const Type_t* operator[](size_type i) const { return X.data() + i * D2; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 94 - 94 |
-------------------------------------------------------------------------------- |
94: (const_cast<T1&>(a) += b); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 242 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
242: inline iterator begin() { return X; } |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_numeric.h: 140 - 141 |
-------------------------------------------------------------------------------- |
140: for (; __first != __last; ++__first) |
141: __init = _GLIBCXX_MOVE_IF_20(__init) + *__first; |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h: 112 - 198 |
-------------------------------------------------------------------------------- |
112: const DistanceTableData& d_ie(*(P.DistTables[myTableID])); |
113: for (int iat = 0; iat < Nelec; ++iat) |
114: { |
115: computeU3(P, iat, d_ie.Distances[iat]); |
116: Vat[iat] = std::accumulate(U.begin(), U.begin() + Nions, valT()); |
117: Lap[iat] = accumulateGL(dU.data(), d2U.data(), d_ie.Displacements[iat], Grad[iat]); |
[...] |
169: { |
170: if (fromscratch) |
171: recompute(P); |
172: |
173: for (size_t iat = 0; iat < Nelec; ++iat) |
174: G[iat] += Grad[iat]; |
175: for (size_t iat = 0; iat < Nelec; ++iat) |
176: L[iat] -= Lap[iat]; |
177: LogValue = -std::accumulate(Vat.begin(), Vat.begin() + Nelec, valT()); |
178: } |
[...] |
190: for (int jat = 0; jat < Nions; ++jat) |
191: lap += d2u[jat] + lapfac * du[jat]; |
192: for (int idim = 0; idim < OHMMS_DIM; ++idim) |
193: { |
194: const valT* restrict dX = displ.data(idim); |
195: valT s = valT(); |
196: for (int jat = 0; jat < Nions; ++jat) |
197: s += du[jat] * dX[jat]; |
198: grad[idim] = s; |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 176 - 177 |
-------------------------------------------------------------------------------- |
176: inline Type_t& operator[](unsigned int i) { return X[i]; } |
177: inline const Type_t& operator[](unsigned int i) const { return X[i]; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 243 - 243 |
-------------------------------------------------------------------------------- |
243: const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_vector.h: 1126 - 1258 |
-------------------------------------------------------------------------------- |
1126: return *(this->_M_impl._M_start + __n); |
[...] |
1145: return *(this->_M_impl._M_start + __n); |
[...] |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
0x418c50 PUSH %RBP |
0x418c51 MOV %RSP,%RBP |
0x418c54 PUSH %R15 |
0x418c56 PUSH %R14 |
0x418c58 PUSH %R13 |
0x418c5a PUSH %R12 |
0x418c5c PUSH %RBX |
0x418c5d AND $-0x40,%RSP |
0x418c61 SUB $0x200,%RSP |
0x418c68 MOV %RSI,0x20(%RSP) |
0x418c6d MOV %RDI,%R13 |
0x418c70 MOV $0x3ffffffffffffff0,%R10 |
0x418c7a MOV 0x98(%RDI),%EAX |
0x418c80 TEST %R8B,%R8B |
0x418c83 JE 41910b |
0x418c89 TEST %EAX,%EAX |
0x418c8b JLE 41910b |
0x418c91 MOV %RDX,0x28(%RSP) |
0x418c96 MOV %RCX,0x30(%RSP) |
0x418c9b MOV 0x20(%RSP),%RAX |
0x418ca0 MOV 0xa10(%RAX),%RAX |
0x418ca7 MOVSXD 0x90(%R13),%RCX |
0x418cae MOV (%RAX,%RCX,8),%RBX |
0x418cb2 XOR %R14D,%R14D |
0x418cb5 MOV %RBX,0x38(%RSP) |
0x418cba JMP 418cf5 |
0x418cbc NOPL (%RAX) |
(205) 0x418cc0 VMOVUPD %ZMM7,0x40(%RSP) |
(205) 0x418cc8 VMOVUPD %ZMM6,0x80(%RSP) |
(205) 0x418cd0 MOV 0x1b0(%R13),%RAX |
(205) 0x418cd7 VMOVSD %XMM0,(%RAX,%R14,8) |
(205) 0x418cdd INC %R14 |
(205) 0x418ce0 MOVSXD 0x98(%R13),%RAX |
(205) 0x418ce7 CMP %RAX,%R14 |
(205) 0x418cea MOV 0x38(%RSP),%RBX |
(205) 0x418cef JGE 4190f7 |
(205) 0x418cf5 MOV 0x18(%RBX),%RCX |
(205) 0x418cf9 IMUL %R14,%RCX |
(205) 0x418cfd SAL $0x3,%RCX |
(205) 0x418d01 ADD 0x40(%RBX),%RCX |
(205) 0x418d05 MOV %R13,%RDI |
(205) 0x418d08 MOV 0x20(%RSP),%RSI |
(205) 0x418d0d MOV %R14D,%EDX |
(205) 0x418d10 VZEROUPPER |
(205) 0x418d13 CALL 41a8a0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERNS1_11ParticleSetEiPKd> |
(205) 0x418d18 MOVSXD 0x94(%R13),%R10 |
(205) 0x418d1f VXORPD %XMM0,%XMM0,%XMM0 |
(205) 0x418d23 TEST %R10,%R10 |
(205) 0x418d26 JE 418de0 |
(205) 0x418d2c MOV 0xf8(%R13),%RAX |
(205) 0x418d33 MOV $0x1fffffffffffffff,%RDX |
(205) 0x418d3d LEA (%R10,%RDX,1),%RCX |
(205) 0x418d41 AND %RDX,%RCX |
(205) 0x418d44 INC %RCX |
(205) 0x418d47 MOV %RCX,%RDX |
(205) 0x418d4a MOV $0x3ffffffffffffff0,%RSI |
(205) 0x418d54 AND %RSI,%RDX |
(205) 0x418d57 VMOVDQU64 0x7691f(%RIP),%ZMM5 |
(205) 0x418d61 VMOVUPD 0x80(%RSP),%ZMM6 |
(205) 0x418d69 VMOVUPD 0x40(%RSP),%ZMM7 |
(205) 0x418d71 JE 418e10 |
(205) 0x418d77 VXORPD %XMM1,%XMM1,%XMM1 |
(205) 0x418d7b XOR %ESI,%ESI |
(205) 0x418d7d VXORPD %XMM2,%XMM2,%XMM2 |
(205) 0x418d81 NOPW %CS:(%RAX,%RAX,1) |
(209) 0x418d90 VADDPD (%RAX,%RSI,8),%ZMM1,%ZMM1 |
(209) 0x418d97 VADDPD 0x40(%RAX,%RSI,8),%ZMM2,%ZMM2 |
(209) 0x418d9f ADD $0x10,%RSI |
(209) 0x418da3 CMP %RDX,%RSI |
(209) 0x418da6 JB 418d90 |
(205) 0x418da8 VADDPD %ZMM2,%ZMM1,%ZMM1 |
(205) 0x418dae VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 |
(205) 0x418db5 VADDPD %ZMM2,%ZMM1,%ZMM1 |
(205) 0x418dbb VEXTRACTF128 $0x1,%YMM1,%XMM2 |
(205) 0x418dc1 VADDPD %XMM2,%XMM1,%XMM1 |
(205) 0x418dc5 VPERMILPD $0x1,%XMM1,%XMM2 |
(205) 0x418dcb VADDSD %XMM2,%XMM1,%XMM1 |
(205) 0x418dcf CMP %RDX,%RCX |
(205) 0x418dd2 JNE 418e16 |
(205) 0x418dd4 JMP 418ea6 |
0x418dd9 NOPL (%RAX) |
(205) 0x418de0 VXORPD %XMM1,%XMM1,%XMM1 |
(205) 0x418de4 VMOVDQU64 0x76892(%RIP),%ZMM5 |
(205) 0x418dee VMOVUPD 0x80(%RSP),%ZMM6 |
(205) 0x418df6 VMOVUPD 0x40(%RSP),%ZMM7 |
(205) 0x418dfe JMP 418ea6 |
0x418e03 NOPW %CS:(%RAX,%RAX,1) |
(205) 0x418e10 VXORPD %XMM1,%XMM1,%XMM1 |
(205) 0x418e14 XOR %EDX,%EDX |
(205) 0x418e16 VPBROADCASTQ %RCX,%ZMM2 |
(205) 0x418e1c VPBROADCASTQ %RDX,%ZMM3 |
(205) 0x418e22 VPORQ %ZMM5,%ZMM3,%ZMM4 |
(205) 0x418e28 VPORQ 0x7688e(%RIP),%ZMM3,%ZMM3 |
(205) 0x418e32 VPCMPLTUQ %ZMM2,%ZMM3,%K1 |
(205) 0x418e39 VPCMPLTUQ %ZMM2,%ZMM4,%K2 |
(205) 0x418e40 VMOVUPD (%RAX,%RDX,8),%ZMM2{%K2}{z} |
(205) 0x418e47 VMOVUPD 0x40(%RAX,%RDX,8),%ZMM3{%K1}{z} |
(205) 0x418e4f VMOVUPD 0xc0(%RSP),%ZMM4 |
(205) 0x418e57 VMOVAPD %ZMM3,%ZMM4{%K1} |
(205) 0x418e5d VMOVUPD %ZMM4,0xc0(%RSP) |
(205) 0x418e65 VMOVUPD 0x100(%RSP),%ZMM4 |
(205) 0x418e6d VMOVAPD %ZMM2,%ZMM4{%K2} |
(205) 0x418e73 VMOVUPD %ZMM4,0x100(%RSP) |
(205) 0x418e7b VADDPD %ZMM3,%ZMM2,%ZMM2 |
(205) 0x418e81 VEXTRACTF64X4 $0x1,%ZMM2,%YMM3 |
(205) 0x418e88 VADDPD %ZMM3,%ZMM2,%ZMM2 |
(205) 0x418e8e VEXTRACTF128 $0x1,%YMM2,%XMM3 |
(205) 0x418e94 VADDPD %XMM3,%XMM2,%XMM2 |
(205) 0x418e98 VPERMILPD $0x1,%XMM2,%XMM3 |
(205) 0x418e9e VADDSD %XMM3,%XMM2,%XMM2 |
(205) 0x418ea2 VADDSD %XMM2,%XMM1,%XMM1 |
(205) 0x418ea6 MOV 0xe8(%R13),%RAX |
(205) 0x418ead VMOVSD %XMM1,(%RAX,%R14,8) |
(205) 0x418eb3 MOV 0x110(%R13),%RCX |
(205) 0x418eba MOV 0x50(%RBX),%RAX |
(205) 0x418ebe MOV 0x188(%R13),%R9 |
(205) 0x418ec5 VPBROADCASTQ %R10,%ZMM1 |
(205) 0x418ecb TEST %R10D,%R10D |
(205) 0x418ece JLE 418fd2 |
(205) 0x418ed4 MOV 0x128(%R13),%RDX |
(205) 0x418edb MOV %R10,%RSI |
(205) 0x418ede AND $-0x8,%RSI |
(205) 0x418ee2 JE 418f50 |
(205) 0x418ee4 VXORPD %XMM0,%XMM0,%XMM0 |
(205) 0x418ee8 XOR %EDI,%EDI |
(205) 0x418eea VBROADCASTSD 0x78074(%RIP),%ZMM2 |
(205) 0x418ef4 NOPW %CS:(%RAX,%RAX,1) |
(208) 0x418f00 VADDPD (%RDX,%RDI,8),%ZMM0,%ZMM0 |
(208) 0x418f07 VFMADD231PD (%RCX,%RDI,8),%ZMM2,%ZMM0 |
(208) 0x418f0e ADD $0x8,%RDI |
(208) 0x418f12 CMP %RSI,%RDI |
(208) 0x418f15 JL 418f00 |
(205) 0x418f17 VEXTRACTF64X4 $0x1,%ZMM0,%YMM2 |
(205) 0x418f1e VADDPD %ZMM2,%ZMM0,%ZMM0 |
(205) 0x418f24 VEXTRACTF128 $0x1,%YMM0,%XMM2 |
(205) 0x418f2a VADDPD %XMM2,%XMM0,%XMM0 |
(205) 0x418f2e VPERMILPD $0x1,%XMM0,%XMM2 |
(205) 0x418f34 VADDSD %XMM2,%XMM0,%XMM0 |
(205) 0x418f38 CMP %R10,%RSI |
(205) 0x418f3b JNE 418f56 |
(205) 0x418f3d JMP 418fd2 |
0x418f42 NOPW %CS:(%RAX,%RAX,1) |
(205) 0x418f50 VXORPD %XMM0,%XMM0,%XMM0 |
(205) 0x418f54 XOR %ESI,%ESI |
(205) 0x418f56 VPBROADCASTQ %RSI,%ZMM2 |
(205) 0x418f5c VPORQ %ZMM5,%ZMM2,%ZMM2 |
(205) 0x418f62 VPCMPLTUQ %ZMM1,%ZMM2,%K1 |
(205) 0x418f69 VMOVUPD (%RCX,%RSI,8),%ZMM2{%K1}{z} |
(205) 0x418f70 VMOVUPD 0x140(%RSP),%ZMM4 |
(205) 0x418f78 VMOVAPD %ZMM2,%ZMM4{%K1} |
(205) 0x418f7e VMOVUPD (%RDX,%RSI,8),%ZMM2{%K1}{z} |
(205) 0x418f85 VMOVUPD 0x180(%RSP),%ZMM3 |
(205) 0x418f8d VMOVAPD %ZMM2,%ZMM3{%K1} |
(205) 0x418f93 VMOVUPD %ZMM3,0x180(%RSP) |
(205) 0x418f9b VMOVUPD %ZMM4,0x140(%RSP) |
(205) 0x418fa3 VFMADD231PD 0x77fbb(%RIP){1to8},%ZMM4,%ZMM2{%K1}{z} |
(205) 0x418fad VEXTRACTF64X4 $0x1,%ZMM2,%YMM3 |
(205) 0x418fb4 VADDPD %ZMM3,%ZMM2,%ZMM2 |
(205) 0x418fba VEXTRACTF128 $0x1,%YMM2,%XMM3 |
(205) 0x418fc0 VADDPD %XMM3,%XMM2,%XMM2 |
(205) 0x418fc4 VPERMILPD $0x1,%XMM2,%XMM3 |
(205) 0x418fca VADDSD %XMM3,%XMM2,%XMM2 |
(205) 0x418fce VADDSD %XMM2,%XMM0,%XMM0 |
(205) 0x418fd2 LEA (%R14,%R14,4),%RDX |
(205) 0x418fd6 MOV 0x8(%RAX,%RDX,8),%R11 |
(205) 0x418fdb MOV 0x18(%RAX,%RDX,8),%R8 |
(205) 0x418fe0 MOV %R10,%RAX |
(205) 0x418fe3 AND $-0x8,%RAX |
(205) 0x418fe7 LEA (,%R11,8),%R15 |
(205) 0x418fef MOV %R8,%RDX |
(205) 0x418ff2 XOR %EDI,%EDI |
(205) 0x418ff4 JMP 419025 |
0x418ff6 NOPW %CS:(%RAX,%RAX,1) |
(206) 0x419000 VXORPD %XMM2,%XMM2,%XMM2 |
(206) 0x419004 LEA (%R14,%R14,2),%RSI |
(206) 0x419008 LEA (%R9,%RSI,8),%RSI |
(206) 0x41900c VMOVSD %XMM2,(%RSI,%RDI,8) |
(206) 0x419011 LEA 0x1(%RDI),%RSI |
(206) 0x419015 ADD %R15,%RDX |
(206) 0x419018 CMP $0x2,%RDI |
(206) 0x41901c MOV %RSI,%RDI |
(206) 0x41901f JE 418cc0 |
(206) 0x419025 TEST %R10D,%R10D |
(206) 0x419028 JLE 419000 |
(206) 0x41902a MOV %R11,%RSI |
(206) 0x41902d IMUL %RDI,%RSI |
(206) 0x419031 TEST %RAX,%RAX |
(206) 0x419034 JE 419090 |
(206) 0x419036 VXORPD %XMM2,%XMM2,%XMM2 |
(206) 0x41903a XOR %EBX,%EBX |
(206) 0x41903c NOPL (%RAX) |
(207) 0x419040 VMOVUPD (%RDX,%RBX,8),%ZMM3 |
(207) 0x419047 VFMADD231PD (%RCX,%RBX,8),%ZMM3,%ZMM2 |
(207) 0x41904e ADD $0x8,%RBX |
(207) 0x419052 CMP %RAX,%RBX |
(207) 0x419055 JL 419040 |
(206) 0x419057 VEXTRACTF64X4 $0x1,%ZMM2,%YMM3 |
(206) 0x41905e VADDPD %ZMM3,%ZMM2,%ZMM2 |
(206) 0x419064 VEXTRACTF128 $0x1,%YMM2,%XMM3 |
(206) 0x41906a VADDPD %XMM3,%XMM2,%XMM2 |
(206) 0x41906e VPERMILPD $0x1,%XMM2,%XMM3 |
(206) 0x419074 VADDSD %XMM3,%XMM2,%XMM2 |
(206) 0x419078 MOV %RAX,%R12 |
(206) 0x41907b CMP %R10,%RAX |
(206) 0x41907e JE 419004 |
(206) 0x419080 JMP 419097 |
0x419082 NOPW %CS:(%RAX,%RAX,1) |
(206) 0x419090 VXORPD %XMM2,%XMM2,%XMM2 |
(206) 0x419094 XOR %R12D,%R12D |
(206) 0x419097 VPBROADCASTQ %R12,%ZMM3 |
(206) 0x41909d VPORQ %ZMM5,%ZMM3,%ZMM3 |
(206) 0x4190a3 VPCMPLTUQ %ZMM1,%ZMM3,%K1 |
(206) 0x4190aa ADD %R12,%RSI |
(206) 0x4190ad VMOVUPD (%R8,%RSI,8),%ZMM3{%K1}{z} |
(206) 0x4190b4 VMOVAPD %ZMM3,%ZMM7{%K1} |
(206) 0x4190ba VMOVUPD (%RCX,%R12,8),%ZMM4{%K1}{z} |
(206) 0x4190c1 VMOVAPD %ZMM4,%ZMM6{%K1} |
(206) 0x4190c7 VMULPD %ZMM4,%ZMM3,%ZMM3{%K1}{z} |
(206) 0x4190cd VEXTRACTF64X4 $0x1,%ZMM3,%YMM4 |
(206) 0x4190d4 VADDPD %ZMM4,%ZMM3,%ZMM3 |
(206) 0x4190da VEXTRACTF128 $0x1,%YMM3,%XMM4 |
(206) 0x4190e0 VADDPD %XMM4,%XMM3,%XMM3 |
(206) 0x4190e4 VPERMILPD $0x1,%XMM3,%XMM4 |
(206) 0x4190ea VADDSD %XMM4,%XMM3,%XMM3 |
(206) 0x4190ee VADDSD %XMM3,%XMM2,%XMM2 |
(206) 0x4190f2 JMP 419004 |
0x4190f7 MOV $0x3ffffffffffffff0,%R10 |
0x419101 MOV 0x30(%RSP),%RCX |
0x419106 MOV 0x28(%RSP),%RDX |
0x41910b TEST %EAX,%EAX |
0x41910d JE 4191a1 |
0x419113 MOVSXD %EAX,%R8 |
0x419116 MOV 0x188(%R13),%RSI |
0x41911d MOV 0x18(%RDX),%RDI |
0x419121 CMP $0x2,%R8 |
0x419125 MOV $0x1,%R9D |
0x41912b CMOVAE %R8,%R9 |
0x41912f LEA -0x1(%R9),%R11 |
0x419133 LEA -0x8(,%R9,8),%RAX |
0x41913b LEA (%RAX,%RAX,2),%RAX |
0x41913f LEA (%RDI,%RAX,1),%RDX |
0x419143 ADD $0x10,%RDX |
0x419147 CMP %RSI,%RDX |
0x41914a JB 4191aa |
0x41914c ADD %RSI,%RAX |
0x41914f ADD $0x10,%RAX |
0x419153 CMP %RDI,%RAX |
0x419156 JB 4191aa |
0x419158 XOR %EDX,%EDX |
0x41915a NOPW (%RAX,%RAX,1) |
(204) 0x419160 XOR %EAX,%EAX |
(204) 0x419162 NOPW %CS:(%RAX,%RAX,1) |
(203) 0x419170 VMOVSD (%RDI,%RAX,8),%XMM0 |
(203) 0x419175 VADDSD (%RSI,%RAX,8),%XMM0,%XMM0 |
(203) 0x41917a VMOVSD %XMM0,(%RDI,%RAX,8) |
(203) 0x41917f INC %RAX |
(203) 0x419182 CMP $0x3,%RAX |
(203) 0x419186 JNE 419170 |
(204) 0x419188 LEA 0x1(%RDX),%RAX |
(204) 0x41918c ADD $0x18,%RSI |
(204) 0x419190 ADD $0x18,%RDI |
(204) 0x419194 CMP %R11,%RDX |
(204) 0x419197 MOV %RAX,%RDX |
(204) 0x41919a JNE 419160 |
0x41919c JMP 419251 |
0x4191a1 VXORPD %XMM0,%XMM0,%XMM0 |
0x4191a5 JMP 4193f5 |
0x4191aa TEST %R11,%R11 |
0x4191ad JE 4192cb |
0x4191b3 MOV %R9,%RDX |
0x4191b6 AND $-0x2,%RDX |
0x4191ba MOV $0x28,%EAX |
0x4191bf XOR %EBX,%EBX |
0x4191c1 NOPW %CS:(%RAX,%RAX,1) |
(202) 0x4191d0 VMOVUPD -0x28(%RDI,%RAX,1),%XMM0 |
(202) 0x4191d6 VADDPD -0x28(%RSI,%RAX,1),%XMM0,%XMM0 |
(202) 0x4191dc VMOVUPD %XMM0,-0x28(%RDI,%RAX,1) |
(202) 0x4191e2 VMOVSD -0x18(%RDI,%RAX,1),%XMM0 |
(202) 0x4191e8 VADDSD -0x18(%RSI,%RAX,1),%XMM0,%XMM0 |
(202) 0x4191ee VMOVSD %XMM0,-0x18(%RDI,%RAX,1) |
(202) 0x4191f4 VMOVUPD -0x10(%RDI,%RAX,1),%XMM0 |
(202) 0x4191fa VADDPD -0x10(%RSI,%RAX,1),%XMM0,%XMM0 |
(202) 0x419200 VMOVUPD %XMM0,-0x10(%RDI,%RAX,1) |
(202) 0x419206 VMOVSD (%RDI,%RAX,1),%XMM0 |
(202) 0x41920b VADDSD (%RSI,%RAX,1),%XMM0,%XMM0 |
(202) 0x419210 VMOVSD %XMM0,(%RDI,%RAX,1) |
(202) 0x419215 ADD $0x2,%RBX |
(202) 0x419219 ADD $0x30,%RAX |
(202) 0x41921d CMP %RBX,%RDX |
(202) 0x419220 JNE 4191d0 |
0x419222 TEST $0x1,%R9B |
0x419226 JE 419251 |
0x419228 SAL $0x3,%RBX |
0x41922c LEA (%RBX,%RBX,2),%RAX |
0x419230 VMOVUPD (%RDI,%RAX,1),%XMM0 |
0x419235 VADDPD (%RSI,%RAX,1),%XMM0,%XMM0 |
0x41923a VMOVUPD %XMM0,(%RDI,%RAX,1) |
0x41923f VMOVSD 0x10(%RDI,%RAX,1),%XMM0 |
0x419245 VADDSD 0x10(%RSI,%RAX,1),%XMM0,%XMM0 |
0x41924b VMOVSD %XMM0,0x10(%RDI,%RAX,1) |
0x419251 MOV 0x1b0(%R13),%RDI |
0x419258 MOV 0x18(%RCX),%RSI |
0x41925c LEA (%RSI,%R11,8),%RAX |
0x419260 CMP %RDI,%RAX |
0x419263 JB 41928c |
0x419265 LEA (%RDI,%R11,8),%RAX |
0x419269 CMP %RSI,%RAX |
0x41926c JB 41928c |
0x41926e XOR %EAX,%EAX |
(201) 0x419270 VMOVSD (%RSI,%RAX,8),%XMM0 |
(201) 0x419275 VSUBSD (%RDI,%RAX,8),%XMM0,%XMM0 |
(201) 0x41927a VMOVSD %XMM0,(%RSI,%RAX,8) |
(201) 0x41927f INC %RAX |
(201) 0x419282 CMP %RAX,%R9 |
(201) 0x419285 JNE 419270 |
0x419287 JMP 419316 |
0x41928c MOV %R9,%RDX |
0x41928f AND $-0x8,%RDX |
0x419293 JE 4192dc |
0x419295 LEA -0x1(%RDX),%RAX |
0x419299 XOR %ECX,%ECX |
0x41929b NOPL (%RAX,%RAX,1) |
(200) 0x4192a0 VMOVUPD (%RSI,%RCX,8),%ZMM0 |
(200) 0x4192a7 VSUBPD (%RDI,%RCX,8),%ZMM0,%ZMM0 |
(200) 0x4192ae VMOVUPD %ZMM0,(%RSI,%RCX,8) |
(200) 0x4192b5 ADD $0x8,%RCX |
(200) 0x4192b9 CMP %RAX,%RCX |
(200) 0x4192bc JBE 4192a0 |
0x4192be CMP %RDX,%R9 |
0x4192c1 JE 419316 |
0x4192c3 VPBROADCASTQ %R9,%ZMM0 |
0x4192c9 JMP 4192e4 |
0x4192cb XOR %EBX,%EBX |
0x4192cd TEST $0x1,%R9B |
0x4192d1 JNE 419228 |
0x4192d7 JMP 419251 |
0x4192dc VPBROADCASTQ %R9,%ZMM0 |
0x4192e2 XOR %EDX,%EDX |
0x4192e4 VPBROADCASTQ %RDX,%ZMM1 |
0x4192ea VPORQ 0x7638c(%RIP),%ZMM1,%ZMM1 |
0x4192f4 VPCMPLTUQ %ZMM0,%ZMM1,%K1 |
0x4192fb VMOVUPD (%RSI,%RDX,8),%ZMM0{%K1}{z} |
0x419302 VMOVUPD (%RDI,%RDX,8),%ZMM1{%K1}{z} |
0x419309 VSUBPD %ZMM1,%ZMM0,%ZMM0 |
0x41930f VMOVUPD %ZMM0,(%RSI,%RDX,8){%K1} |
0x419316 MOV 0xe8(%R13),%RCX |
0x41931d MOV $0x1fffffffffffffff,%RAX |
0x419327 ADD %RAX,%R8 |
0x41932a AND %RAX,%R8 |
0x41932d INC %R8 |
0x419330 AND %R8,%R10 |
0x419333 JE 419386 |
0x419335 VXORPD %XMM0,%XMM0,%XMM0 |
0x419339 XOR %EAX,%EAX |
0x41933b VPXOR %XMM1,%XMM1,%XMM1 |
0x41933f NOP |
(199) 0x419340 VADDPD (%RCX,%RAX,8),%ZMM0,%ZMM0 |
(199) 0x419347 VADDPD 0x40(%RCX,%RAX,8),%ZMM1,%ZMM1 |
(199) 0x41934f ADD $0x10,%RAX |
(199) 0x419353 CMP %R10,%RAX |
(199) 0x419356 JB 419340 |
0x419358 VADDPD %ZMM1,%ZMM0,%ZMM0 |
0x41935e VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 |
0x419365 VADDPD %ZMM1,%ZMM0,%ZMM0 |
0x41936b VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x419371 VADDPD %XMM1,%XMM0,%XMM0 |
0x419375 VPERMILPD $0x1,%XMM0,%XMM1 |
0x41937b VADDSD %XMM1,%XMM0,%XMM0 |
0x41937f CMP %R10,%R8 |
0x419382 JNE 41938d |
0x419384 JMP 4193f5 |
0x419386 XOR %R10D,%R10D |
0x419389 VXORPD %XMM0,%XMM0,%XMM0 |
0x41938d VPBROADCASTQ %R8,%ZMM1 |
0x419393 VPBROADCASTQ %R10,%ZMM2 |
0x419399 VPORQ 0x7631d(%RIP),%ZMM2,%ZMM3 |
0x4193a3 VPORQ 0x762d3(%RIP),%ZMM2,%ZMM2 |
0x4193ad VPCMPLTUQ %ZMM1,%ZMM2,%K1 |
0x4193b4 VPCMPLTUQ %ZMM1,%ZMM3,%K2 |
0x4193bb VMOVUPD 0x40(%RCX,%R10,8),%ZMM1{%K2}{z} |
0x4193c3 VMOVUPD (%RCX,%R10,8),%ZMM2{%K1}{z} |
0x4193ca VADDPD %ZMM1,%ZMM2,%ZMM1 |
0x4193d0 VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 |
0x4193d7 VADDPD %ZMM2,%ZMM1,%ZMM1 |
0x4193dd VEXTRACTF128 $0x1,%YMM1,%XMM2 |
0x4193e3 VADDPD %XMM2,%XMM1,%XMM1 |
0x4193e7 VPERMILPD $0x1,%XMM1,%XMM2 |
0x4193ed VADDSD %XMM2,%XMM1,%XMM1 |
0x4193f1 VADDSD %XMM1,%XMM0,%XMM0 |
0x4193f5 VXORPD 0x74c81(%RIP){1to2},%XMM0,%XMM0 |
0x4193ff VMOVLPD %XMM0,0x10(%R13) |
0x419405 LEA -0x28(%RBP),%RSP |
0x419409 POP %RBX |
0x41940a POP %R12 |
0x41940c POP %R13 |
0x41940e POP %R14 |
0x419410 POP %R15 |
0x419412 POP %RBP |
0x419413 VZEROUPPER |
0x419416 RET |
0x419417 NOPW (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:126 | exec |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:181 | exec |
○ | main.extracted.107 | miniqmc.cpp:375 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:374 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | OneBodyJastrowRef.h:112-198 |
Module | exec |
nb instructions | 160 |
nb uops | 160 |
loop length | 760 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 3 |
used zmm registers | 4 |
nb stack references | 5 |
micro-operation queue | 40.50 cycles |
front end | 40.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 22.42 | 22.58 | 15.00 | 15.00 | 14.00 | 22.50 | 22.50 | 15.00 |
cycles | 22.42 | 22.58 | 15.00 | 15.00 | 14.00 | 22.50 | 22.50 | 15.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 40.13 |
Stall cycles | 0.00 |
Front-end | 40.50 |
Dispatch | 22.58 |
Overall L1 | 40.50 |
all | 29% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 34% |
all | 78% |
load | 77% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 55% |
load | 83% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 54% |
all | 32% |
load | 100% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 35% |
all | 47% |
load | 55% |
store | 37% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 52% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 40% |
load | 66% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 52% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x200,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x3ffffffffffffff0,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x98(%RDI),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R8B,%R8B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41910b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41910b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,0x30(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xa10(%RAX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0x90(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX,%RCX,8),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RBX,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 418cf5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x3ffffffffffffff0,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4191a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %EAX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x188(%R13),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RDX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP $0x2,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%R9D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVAE %R8,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA -0x1(%R9),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x8(,%R9,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%RAX,1),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x10,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 4191aa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $0x10,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 4191aa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 419251 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4193f5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
TEST %R11,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4192cb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R9,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x28,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST $0x1,%R9B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 419251 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RBX,%RBX,2),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD (%RDI,%RAX,1),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDPD (%RSI,%RAX,1),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM0,(%RDI,%RAX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0x10(%RDI,%RAX,1),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD 0x10(%RSI,%RAX,1),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0x10(%RDI,%RAX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x1b0(%R13),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RCX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RSI,%R11,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 41928c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (%RDI,%R11,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 41928c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 419316 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R9,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4192dc | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x1(%RDX),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %RDX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 419316 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPBROADCASTQ %R9,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
JMP 4192e4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST $0x1,%R9B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 419228 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 419251 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VPBROADCASTQ %R9,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RDX,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPORQ 0x7638c(%RIP),%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %ZMM0,%ZMM1,%K1 | |||||||||||
VMOVUPD (%RSI,%RDX,8),%ZMM0{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD (%RDI,%RDX,8),%ZMM1{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VSUBPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM0,(%RSI,%RDX,8){%K1} | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe8(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x1fffffffffffffff,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %RAX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 419386 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R10,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 41938d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 4193f5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %R8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R10,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPORQ 0x7631d(%RIP),%ZMM2,%ZMM3 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPORQ 0x762d3(%RIP),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %ZMM1,%ZMM2,%K1 | |||||||||||
VPCMPLTUQ %ZMM1,%ZMM3,%K2 | |||||||||||
VMOVUPD 0x40(%RCX,%R10,8),%ZMM1{%K2}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD (%RCX,%R10,8),%ZMM2{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM1,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD 0x74c81(%RIP){1to2},%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VMOVLPD %XMM0,0x10(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | OneBodyJastrowRef.h:112-198 |
Module | exec |
nb instructions | 160 |
nb uops | 160 |
loop length | 760 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 3 |
used zmm registers | 4 |
nb stack references | 5 |
micro-operation queue | 40.50 cycles |
front end | 40.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 22.42 | 22.58 | 15.00 | 15.00 | 14.00 | 22.50 | 22.50 | 15.00 |
cycles | 22.42 | 22.58 | 15.00 | 15.00 | 14.00 | 22.50 | 22.50 | 15.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 40.13 |
Stall cycles | 0.00 |
Front-end | 40.50 |
Dispatch | 22.58 |
Overall L1 | 40.50 |
all | 29% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 34% |
all | 78% |
load | 77% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 55% |
load | 83% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 54% |
all | 32% |
load | 100% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 35% |
all | 47% |
load | 55% |
store | 37% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 52% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 40% |
load | 66% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 52% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x200,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x3ffffffffffffff0,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x98(%RDI),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R8B,%R8B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41910b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41910b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,0x30(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xa10(%RAX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0x90(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX,%RCX,8),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RBX,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 418cf5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x3ffffffffffffff0,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4191a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %EAX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x188(%R13),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RDX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP $0x2,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%R9D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVAE %R8,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA -0x1(%R9),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x8(,%R9,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%RAX,1),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x10,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 4191aa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $0x10,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 4191aa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 419251 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4193f5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
TEST %R11,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4192cb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R9,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x28,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST $0x1,%R9B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 419251 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RBX,%RBX,2),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD (%RDI,%RAX,1),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDPD (%RSI,%RAX,1),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM0,(%RDI,%RAX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0x10(%RDI,%RAX,1),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD 0x10(%RSI,%RAX,1),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0x10(%RDI,%RAX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x1b0(%R13),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RCX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RSI,%R11,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 41928c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (%RDI,%R11,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 41928c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 419316 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R9,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4192dc | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x1(%RDX),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %RDX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 419316 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPBROADCASTQ %R9,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
JMP 4192e4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST $0x1,%R9B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 419228 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 419251 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VPBROADCASTQ %R9,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RDX,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPORQ 0x7638c(%RIP),%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %ZMM0,%ZMM1,%K1 | |||||||||||
VMOVUPD (%RSI,%RDX,8),%ZMM0{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD (%RDI,%RDX,8),%ZMM1{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VSUBPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM0,(%RSI,%RDX,8){%K1} | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe8(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x1fffffffffffffff,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %RAX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 419386 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R10,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 41938d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 4193f5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %R8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R10,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPORQ 0x7631d(%RIP),%ZMM2,%ZMM3 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPORQ 0x762d3(%RIP),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %ZMM1,%ZMM2,%K1 | |||||||||||
VPCMPLTUQ %ZMM1,%ZMM3,%K2 | |||||||||||
VMOVUPD 0x40(%RCX,%R10,8),%ZMM1{%K2}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD (%RCX,%R10,8),%ZMM2{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM1,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD 0x74c81(%RIP){1to2},%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VMOVLPD %XMM0,0x10(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::OneBodyJastrowRef | 0.03 | 0.02 |
▼Loop 205 - OneBodyJastrowRef.h:112-198 - exec– | 0 | 0 |
○Loop 209 - stl_numeric.h:140-141 - exec | 0 | 0 |
▼Loop 206 - OneBodyJastrowRef.h:170-198 - exec– | 0 | 0 |
○Loop 207 - OneBodyJastrowRef.h:196-197 - exec | 0.03 | 0.01 |
○Loop 208 - OneBodyJastrowRef.h:190-191 - exec | 0 | 0 |
▼Loop 204 - OneBodyJastrowRef.h:173-173 - exec– | 0 | 0 |
○Loop 203 - TinyVectorOps.h:49-49 - exec | 0 | 0 |
○Loop 201 - OneBodyJastrowRef.h:175-176 - exec | 0 | 0 |
○Loop 202 - OneBodyJastrowRef.h:173-173 - exec | 0 | 0 |
○Loop 200 - OneBodyJastrowRef.h:175-176 - exec | 0 | 0 |
○Loop 199 - stl_numeric.h:140-141 - exec | 0 | 0 |