Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: exec | Source: DiracDeterminantRef.cpp:156-181 [...] | Coverage: 0.53% |
---|
Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: exec | Source: DiracDeterminantRef.cpp:156-181 [...] | Coverage: 0.53% |
---|
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 155 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
/usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h: 173 - 173 |
-------------------------------------------------------------------------------- |
173: pointer _M_ptr() const { return std::get<0>(_M_t); } |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 156 - 181 |
-------------------------------------------------------------------------------- |
156: { |
157: if (UpdateMode == ORB_PBYP_RATIO) |
158: { //need to compute dpsiM and d2psiM. Do not touch psiM! |
159: SPOVGLTimer->start(); |
160: Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); |
161: SPOVGLTimer->stop(); |
162: } |
163: |
164: if (NumPtcls == 1) |
165: { |
166: ValueType y = psiM(0, 0); |
167: GradType rv = y * dpsiM(0, 0); |
168: G[FirstIndex] += rv; |
169: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
170: } |
171: else |
172: { |
173: for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) |
174: { |
175: mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
176: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
177: G[iat] += rv; |
178: L[iat] += dot_temp - dot(rv, rv); |
179: } |
180: } |
181: } |
0x273db0 PUSH %RBP |
0x273db1 MOV %RSP,%RBP |
0x273db4 PUSH %R15 |
0x273db6 PUSH %R14 |
0x273db8 PUSH %R13 |
0x273dba PUSH %R12 |
0x273dbc PUSH %RBX |
0x273dbd SUB $0x38,%RSP |
0x273dc1 CMPL $0,0xc(%RDI) |
0x273dc5 MOV %RCX,%RBX |
0x273dc8 MOV %RDX,%R15 |
0x273dcb MOV %RDI,%R14 |
0x273dce JNE 273e23 |
0x273dd0 MOV 0x468(%R14),%RDI |
0x273dd7 MOV %RSI,%R12 |
0x273dda CALL 2a07a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x273ddf MOV 0x470(%R14),%RDI |
0x273de6 MOV 0x478(%R14),%EDX |
0x273ded MOV 0x47c(%R14),%ECX |
0x273df4 LEA 0x90(%R14),%R8 |
0x273dfb LEA 0x110(%R14),%R9 |
0x273e02 LEA 0x150(%R14),%RAX |
0x273e09 MOV %R12,%RSI |
0x273e0c MOV (%RDI),%R10 |
0x273e0f MOV %RAX,(%RSP) |
0x273e13 CALLQ 0x28(%R10) |
0x273e17 MOV 0x468(%R14),%RDI |
0x273e1e CALL 2a0890 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x273e23 MOVSXD 0x484(%R14),%RAX |
0x273e2a TEST %RAX,%RAX |
0x273e2d JE 274555 |
0x273e33 CMP $0x1,%EAX |
0x273e36 JNE 273eaa |
0x273e38 MOV 0x100(%R14),%RAX |
0x273e3f MOV 0x140(%R14),%RCX |
0x273e46 MOV 0x18(%R15),%RDX |
0x273e4a VMOVDDUP (%RAX),%XMM0 |
0x273e4e MOVSXD 0x478(%R14),%RAX |
0x273e55 VMULPD (%RCX),%XMM0,%XMM2 |
0x273e59 VMULSD 0x10(%RCX),%XMM0,%XMM1 |
0x273e5e LEA (%RAX,%RAX,2),%RSI |
0x273e62 VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 |
0x273e67 VMULPD %XMM2,%XMM2,%XMM2 |
0x273e6b VMOVUPD %XMM3,(%RDX,%RSI,8) |
0x273e70 VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 |
0x273e76 VMOVSD %XMM3,0x10(%RDX,%RSI,8) |
0x273e7c VPERMILPD $0x1,%XMM2,%XMM3 |
0x273e82 MOV 0x180(%R14),%RCX |
0x273e89 VADDSD %XMM2,%XMM3,%XMM2 |
0x273e8d MOV 0x18(%RBX),%RDX |
0x273e91 VFMADD231SD %XMM1,%XMM1,%XMM2 |
0x273e96 VFMSUB231SD (%RCX),%XMM0,%XMM2 |
0x273e9b VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 |
0x273ea0 VMOVSD %XMM0,(%RDX,%RAX,8) |
0x273ea5 JMP 274555 |
0x273eaa MOV 0x480(%R14),%ECX |
0x273eb1 TEST %ECX,%ECX |
0x273eb3 JLE 274550 |
0x273eb9 MOV 0x18(%RBX),%RBX |
0x273ebd MOV 0x118(%R14),%RDX |
0x273ec4 VMOVAPD -0x6f84c(%RIP),%YMM1 |
0x273ecc VMOVAPD -0x6f8f4(%RIP),%YMM2 |
0x273ed4 VMOVAPD -0x6f8dc(%RIP),%YMM3 |
0x273edc VMOVAPD -0x69326(%RIP),%ZMM4 |
0x273ee6 VMOVAPD -0x692f0(%RIP),%ZMM5 |
0x273ef0 VMOVAPD -0x692ba(%RIP),%ZMM6 |
0x273efa VMOVAPD -0x69284(%RIP),%ZMM7 |
0x273f04 VMOVAPD -0x6924e(%RIP),%ZMM8 |
0x273f0e VMOVAPD -0x69218(%RIP),%ZMM9 |
0x273f18 MOV 0x158(%R14),%R11 |
0x273f1f MOV 0xd8(%R14),%R9 |
0x273f26 MOV 0x100(%R14),%RSI |
0x273f2d MOV 0x180(%R14),%RDI |
0x273f34 MOV 0x18(%R15),%R10 |
0x273f38 MOV 0x140(%R14),%R8 |
0x273f3f MOV %RCX,%R13 |
0x273f42 SHR $0x5,%R13 |
0x273f46 MOV %ECX,%R15D |
0x273f49 AND $-0x4,%R15D |
0x273f4d VXORPD %XMM0,%XMM0,%XMM0 |
0x273f51 SAL $0x8,%R13 |
0x273f55 MOV %RBX,-0x40(%RBP) |
0x273f59 MOVSXD 0x478(%R14),%RBX |
0x273f60 SAL $0x3,%RDX |
0x273f64 SAL $0x3,%R11 |
0x273f68 MOV %ECX,%R14D |
0x273f6b AND $-0x20,%R14D |
0x273f6f SAL $0x3,%R9 |
0x273f73 LEA 0xc0(%RSI),%R12 |
0x273f7a LEA (%RDX,%RDX,2),%RDX |
0x273f7e MOV %R11,-0x48(%RBP) |
0x273f82 LEA 0xc0(%RDI),%R11 |
0x273f89 MOV %R14,-0x30(%RBP) |
0x273f8d XOR %R14D,%R14D |
0x273f90 MOV %R9,-0x50(%RBP) |
0x273f94 MOV %RDX,-0x38(%RBP) |
0x273f98 JMP 27400f |
0x273f9a NOPW (%RAX,%RAX,1) |
(1060) 0x273fa0 LEA (%RBX,%RBX,2),%RDX |
(1060) 0x273fa4 MOV -0x48(%RBP),%R9 |
(1060) 0x273fa8 ADD -0x38(%RBP),%R8 |
(1060) 0x273fac INC %R14 |
(1060) 0x273faf VADDPD (%R10,%RDX,8),%XMM11,%XMM12 |
(1060) 0x273fb5 VMULPD %XMM11,%XMM11,%XMM11 |
(1060) 0x273fba ADD %R9,%R11 |
(1060) 0x273fbd ADD %R9,%RDI |
(1060) 0x273fc0 VMOVUPD %XMM12,(%R10,%RDX,8) |
(1060) 0x273fc6 VADDSD 0x10(%R10,%RDX,8),%XMM19,%XMM12 |
(1060) 0x273fce VMOVSD %XMM12,0x10(%R10,%RDX,8) |
(1060) 0x273fd5 VPERMILPD $0x1,%XMM11,%XMM12 |
(1060) 0x273fdb MOV -0x40(%RBP),%RDX |
(1060) 0x273fdf VADDSD %XMM11,%XMM12,%XMM11 |
(1060) 0x273fe4 VFNMSUB231SD %XMM19,%XMM19,%XMM11 |
(1060) 0x273fea VADDSD %XMM11,%XMM10,%XMM10 |
(1060) 0x273fef VADDSD (%RDX,%RBX,8),%XMM10,%XMM10 |
(1060) 0x273ff4 VMOVSD %XMM10,(%RDX,%RBX,8) |
(1060) 0x273ff9 MOV -0x50(%RBP),%RDX |
(1060) 0x273ffd INC %RBX |
(1060) 0x274000 ADD %RDX,%R12 |
(1060) 0x274003 ADD %RDX,%RSI |
(1060) 0x274006 CMP %RAX,%R14 |
(1060) 0x274009 JE 274555 |
(1060) 0x27400f CMP $0x4,%ECX |
(1060) 0x274012 JAE 274020 |
(1060) 0x274014 VXORPD %XMM10,%XMM10,%XMM10 |
(1060) 0x274019 XOR %EDX,%EDX |
(1060) 0x27401b JMP 274140 |
(1060) 0x274020 CMP $0x20,%ECX |
(1060) 0x274023 JAE 274040 |
(1060) 0x274025 VXORPD %XMM10,%XMM10,%XMM10 |
(1060) 0x27402a XOR %R9D,%R9D |
(1060) 0x27402d JMP 2740f0 |
0x274032 NOPW %CS:(%RAX,%RAX,1) |
(1060) 0x274040 VXORPD %XMM10,%XMM10,%XMM10 |
(1060) 0x274045 VXORPD %XMM11,%XMM11,%XMM11 |
(1060) 0x27404a VXORPD %XMM12,%XMM12,%XMM12 |
(1060) 0x27404f VXORPD %XMM13,%XMM13,%XMM13 |
(1060) 0x274054 XOR %EDX,%EDX |
(1060) 0x274056 NOPW %CS:(%RAX,%RAX,1) |
(1058) 0x274060 VMOVUPD -0xc0(%R11,%RDX,1),%ZMM14 |
(1058) 0x274068 VMOVUPD -0x80(%R11,%RDX,1),%ZMM15 |
(1058) 0x274070 VMOVUPD -0x40(%R11,%RDX,1),%ZMM16 |
(1058) 0x274078 VMOVUPD (%R11,%RDX,1),%ZMM17 |
(1058) 0x27407f VFMADD231PD -0xc0(%R12,%RDX,1),%ZMM14,%ZMM10 |
(1058) 0x274087 VFMADD231PD -0x80(%R12,%RDX,1),%ZMM15,%ZMM11 |
(1058) 0x27408f VFMADD231PD -0x40(%R12,%RDX,1),%ZMM16,%ZMM12 |
(1058) 0x274097 VFMADD231PD (%R12,%RDX,1),%ZMM17,%ZMM13 |
(1058) 0x27409e ADD $0x100,%RDX |
(1058) 0x2740a5 CMP %RDX,%R13 |
(1058) 0x2740a8 JNE 274060 |
(1060) 0x2740aa VADDPD %ZMM10,%ZMM11,%ZMM10 |
(1060) 0x2740b0 VADDPD %ZMM12,%ZMM13,%ZMM12 |
(1060) 0x2740b6 MOV -0x30(%RBP),%RDX |
(1060) 0x2740ba VADDPD %ZMM10,%ZMM12,%ZMM10 |
(1060) 0x2740c0 VEXTRACTF64X4 $0x1,%ZMM10,%YMM11 |
(1060) 0x2740c7 VADDPD %ZMM11,%ZMM10,%ZMM10 |
(1060) 0x2740cd VEXTRACTF128 $0x1,%YMM10,%XMM11 |
(1060) 0x2740d3 VADDPD %XMM11,%XMM10,%XMM10 |
(1060) 0x2740d8 VPERMILPD $0x1,%XMM10,%XMM11 |
(1060) 0x2740de VADDSD %XMM11,%XMM10,%XMM10 |
(1060) 0x2740e3 CMP %RCX,%RDX |
(1060) 0x2740e6 JE 274153 |
(1060) 0x2740e8 MOV %RDX,%R9 |
(1060) 0x2740eb TEST $0x1c,%CL |
(1060) 0x2740ee JE 274140 |
(1060) 0x2740f0 VMOVQ %XMM10,%XMM10 |
(1060) 0x2740f5 NOPW %CS:(%RAX,%RAX,1) |
(1064) 0x274100 VMOVUPD (%RDI,%R9,8),%YMM11 |
(1064) 0x274106 VFMADD231PD (%RSI,%R9,8),%YMM11,%YMM10 |
(1064) 0x27410c ADD $0x4,%R9 |
(1064) 0x274110 CMP %R9,%R15 |
(1064) 0x274113 JNE 274100 |
(1060) 0x274115 VEXTRACTF128 $0x1,%YMM10,%XMM11 |
(1060) 0x27411b MOV %R15,%RDX |
(1060) 0x27411e VADDPD %XMM11,%XMM10,%XMM10 |
(1060) 0x274123 VPERMILPD $0x1,%XMM10,%XMM11 |
(1060) 0x274129 VADDSD %XMM11,%XMM10,%XMM10 |
(1060) 0x27412e CMP %RCX,%R15 |
(1060) 0x274131 JE 274153 |
(1060) 0x274133 NOPW %CS:(%RAX,%RAX,1) |
(1063) 0x274140 VMOVSD (%RDI,%RDX,8),%XMM11 |
(1063) 0x274145 VFMADD231SD (%RSI,%RDX,8),%XMM11,%XMM10 |
(1063) 0x27414b INC %RDX |
(1063) 0x27414e CMP %RDX,%RCX |
(1063) 0x274151 JNE 274140 |
(1060) 0x274153 CMP $0x4,%ECX |
(1060) 0x274156 JAE 274170 |
(1060) 0x274158 VXORPD %XMM11,%XMM11,%XMM11 |
(1060) 0x27415d XOR %EDX,%EDX |
(1060) 0x27415f JMP 274510 |
0x274164 NOPW %CS:(%RAX,%RAX,1) |
(1060) 0x274170 CMP $0x20,%ECX |
(1060) 0x274173 JAE 274190 |
(1060) 0x274175 VXORPD %XMM11,%XMM11,%XMM11 |
(1060) 0x27417a VXORPD %XMM19,%XMM19,%XMM19 |
(1060) 0x274180 XOR %EDX,%EDX |
(1060) 0x274182 JMP 274433 |
0x274187 NOPW (%RAX,%RAX,1) |
(1060) 0x274190 VXORPD %XMM11,%XMM11,%XMM11 |
(1060) 0x274195 VXORPD %XMM12,%XMM12,%XMM12 |
(1060) 0x27419a VXORPD %XMM13,%XMM13,%XMM13 |
(1060) 0x27419f VXORPD %XMM14,%XMM14,%XMM14 |
(1060) 0x2741a4 VXORPD %XMM15,%XMM15,%XMM15 |
(1060) 0x2741a9 VXORPD %XMM16,%XMM16,%XMM16 |
(1060) 0x2741af VXORPD %XMM17,%XMM17,%XMM17 |
(1060) 0x2741b5 VXORPD %XMM18,%XMM18,%XMM18 |
(1060) 0x2741bb VXORPD %XMM19,%XMM19,%XMM19 |
(1060) 0x2741c1 VXORPD %XMM20,%XMM20,%XMM20 |
(1060) 0x2741c7 VXORPD %XMM21,%XMM21,%XMM21 |
(1060) 0x2741cd VXORPD %XMM22,%XMM22,%XMM22 |
(1060) 0x2741d3 XOR %EDX,%EDX |
(1060) 0x2741d5 MOV %R8,%R9 |
(1060) 0x2741d8 NOPL (%RAX,%RAX,1) |
(1059) 0x2741e0 VMOVUPD (%R9),%ZMM24 |
(1059) 0x2741e6 VMOVUPD 0x40(%R9),%ZMM23 |
(1059) 0x2741ed VMOVUPD 0x80(%R9),%ZMM28 |
(1059) 0x2741f4 VMOVUPD -0xc0(%R12,%RDX,1),%ZMM27 |
(1059) 0x2741fc VMOVUPD -0x80(%R12,%RDX,1),%ZMM29 |
(1059) 0x274204 VMOVAPD %ZMM24,%ZMM25 |
(1059) 0x27420a VPERMT2PD %ZMM23,%ZMM4,%ZMM25 |
(1059) 0x274210 VMOVAPD %ZMM24,%ZMM26 |
(1059) 0x274216 VPERMT2PD %ZMM23,%ZMM6,%ZMM26 |
(1059) 0x27421c VPERMT2PD %ZMM24,%ZMM8,%ZMM23 |
(1059) 0x274222 VPERMT2PD %ZMM28,%ZMM5,%ZMM25 |
(1059) 0x274228 VPERMT2PD %ZMM28,%ZMM7,%ZMM26 |
(1059) 0x27422e VPERMT2PD %ZMM28,%ZMM9,%ZMM23 |
(1059) 0x274234 VMOVUPD 0xc0(%R9),%ZMM28 |
(1059) 0x27423b VFMADD231PD %ZMM25,%ZMM27,%ZMM11 |
(1059) 0x274241 VMOVUPD 0x100(%R9),%ZMM25 |
(1059) 0x274248 VFMADD231PD %ZMM23,%ZMM27,%ZMM19 |
(1059) 0x27424e VFMADD231PD %ZMM26,%ZMM27,%ZMM15 |
(1059) 0x274254 VMOVUPD 0x140(%R9),%ZMM27 |
(1059) 0x27425b VMOVAPD %ZMM28,%ZMM23 |
(1059) 0x274261 VMOVAPD %ZMM28,%ZMM24 |
(1059) 0x274267 VPERMT2PD %ZMM25,%ZMM4,%ZMM23 |
(1059) 0x27426d VPERMT2PD %ZMM25,%ZMM6,%ZMM24 |
(1059) 0x274273 VPERMT2PD %ZMM28,%ZMM8,%ZMM25 |
(1059) 0x274279 VMOVUPD 0x180(%R9),%ZMM28 |
(1059) 0x274280 VPERMT2PD %ZMM27,%ZMM5,%ZMM23 |
(1059) 0x274286 VPERMT2PD %ZMM27,%ZMM9,%ZMM25 |
(1059) 0x27428c VPERMT2PD %ZMM27,%ZMM7,%ZMM24 |
(1059) 0x274292 VMOVUPD 0x200(%R9),%ZMM27 |
(1059) 0x274299 VMOVAPD %ZMM28,%ZMM26 |
(1059) 0x27429f VFMADD231PD %ZMM23,%ZMM29,%ZMM12 |
(1059) 0x2742a5 VMOVUPD 0x1c0(%R9),%ZMM23 |
(1059) 0x2742ac VFMADD231PD %ZMM25,%ZMM29,%ZMM20 |
(1059) 0x2742b2 VMOVAPD %ZMM28,%ZMM25 |
(1059) 0x2742b8 VFMADD231PD %ZMM24,%ZMM29,%ZMM16 |
(1059) 0x2742be VPERMT2PD %ZMM23,%ZMM4,%ZMM25 |
(1059) 0x2742c4 VPERMT2PD %ZMM23,%ZMM6,%ZMM26 |
(1059) 0x2742ca VPERMT2PD %ZMM28,%ZMM8,%ZMM23 |
(1059) 0x2742d0 VMOVUPD -0x40(%R12,%RDX,1),%ZMM28 |
(1059) 0x2742d8 VPERMT2PD %ZMM27,%ZMM5,%ZMM25 |
(1059) 0x2742de VPERMT2PD %ZMM27,%ZMM7,%ZMM26 |
(1059) 0x2742e4 VPERMT2PD %ZMM27,%ZMM9,%ZMM23 |
(1059) 0x2742ea VMOVUPD 0x240(%R9),%ZMM27 |
(1059) 0x2742f1 VFMADD231PD %ZMM25,%ZMM28,%ZMM13 |
(1059) 0x2742f7 VMOVUPD 0x280(%R9),%ZMM25 |
(1059) 0x2742fe VFMADD231PD %ZMM23,%ZMM28,%ZMM21 |
(1059) 0x274304 VFMADD231PD %ZMM26,%ZMM28,%ZMM17 |
(1059) 0x27430a VMOVUPD 0x2c0(%R9),%ZMM28 |
(1059) 0x274311 ADD $0x300,%R9 |
(1059) 0x274318 VMOVAPD %ZMM27,%ZMM23 |
(1059) 0x27431e VMOVAPD %ZMM27,%ZMM24 |
(1059) 0x274324 VPERMT2PD %ZMM25,%ZMM4,%ZMM23 |
(1059) 0x27432a VPERMT2PD %ZMM25,%ZMM6,%ZMM24 |
(1059) 0x274330 VPERMT2PD %ZMM27,%ZMM8,%ZMM25 |
(1059) 0x274336 VMOVUPD (%R12,%RDX,1),%ZMM27 |
(1059) 0x27433d ADD $0x100,%RDX |
(1059) 0x274344 VPERMT2PD %ZMM28,%ZMM5,%ZMM23 |
(1059) 0x27434a VPERMT2PD %ZMM28,%ZMM7,%ZMM24 |
(1059) 0x274350 VPERMT2PD %ZMM28,%ZMM9,%ZMM25 |
(1059) 0x274356 VFMADD231PD %ZMM23,%ZMM27,%ZMM14 |
(1059) 0x27435c VFMADD231PD %ZMM24,%ZMM27,%ZMM18 |
(1059) 0x274362 VFMADD231PD %ZMM25,%ZMM27,%ZMM22 |
(1059) 0x274368 CMP %RDX,%R13 |
(1059) 0x27436b JNE 2741e0 |
(1060) 0x274371 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(1060) 0x274377 VADDPD %ZMM17,%ZMM18,%ZMM16 |
(1060) 0x27437d VADDPD %ZMM11,%ZMM12,%ZMM11 |
(1060) 0x274383 VADDPD %ZMM13,%ZMM14,%ZMM13 |
(1060) 0x274389 VADDPD %ZMM19,%ZMM20,%ZMM19 |
(1060) 0x27438f VADDPD %ZMM21,%ZMM22,%ZMM20 |
(1060) 0x274395 MOV -0x30(%RBP),%RDX |
(1060) 0x274399 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(1060) 0x27439f VADDPD %ZMM11,%ZMM13,%ZMM11 |
(1060) 0x2743a5 VADDPD %ZMM19,%ZMM20,%ZMM19 |
(1060) 0x2743ab VEXTRACTF64X4 $0x1,%ZMM15,%YMM16 |
(1060) 0x2743b2 VEXTRACTF64X4 $0x1,%ZMM11,%YMM12 |
(1060) 0x2743b9 VEXTRACTF64X4 $0x1,%ZMM19,%YMM20 |
(1060) 0x2743c0 VADDPD %ZMM16,%ZMM15,%ZMM15 |
(1060) 0x2743c6 VADDPD %ZMM12,%ZMM11,%ZMM11 |
(1060) 0x2743cc VADDPD %ZMM20,%ZMM19,%ZMM19 |
(1060) 0x2743d2 VEXTRACTF32X4 $0x1,%YMM15,%XMM16 |
(1060) 0x2743d9 VEXTRACTF128 $0x1,%YMM11,%XMM12 |
(1060) 0x2743df VEXTRACTF32X4 $0x1,%YMM19,%XMM20 |
(1060) 0x2743e6 VADDPD %XMM16,%XMM15,%XMM15 |
(1060) 0x2743ec VADDPD %XMM12,%XMM11,%XMM11 |
(1060) 0x2743f1 VADDPD %XMM20,%XMM19,%XMM19 |
(1060) 0x2743f7 VPERMILPD $0x1,%XMM15,%XMM16 |
(1060) 0x2743fe VPERMILPD $0x1,%XMM11,%XMM12 |
(1060) 0x274404 VPERMILPD $0x1,%XMM19,%XMM20 |
(1060) 0x27440b VADDSD %XMM16,%XMM15,%XMM15 |
(1060) 0x274411 VADDSD %XMM12,%XMM11,%XMM12 |
(1060) 0x274416 VADDSD %XMM20,%XMM19,%XMM19 |
(1060) 0x27441c VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(1060) 0x274421 CMP %RCX,%RDX |
(1060) 0x274424 JE 273fa0 |
(1060) 0x27442a TEST $0x1c,%CL |
(1060) 0x27442d JE 2744f4 |
(1060) 0x274433 LEA (%RDX,%RDX,2),%R9 |
(1060) 0x274437 VMOVQ %XMM11,%XMM12 |
(1060) 0x27443c VUNPCKHPD %XMM0,%XMM11,%XMM11 |
(1060) 0x274440 VMOVQ %XMM19,%XMM13 |
(1060) 0x274446 LEA (%R8,%R9,8),%R9 |
(1060) 0x27444a NOPW (%RAX,%RAX,1) |
(1062) 0x274450 VMOVUPD (%R9),%ZMM15 |
(1062) 0x274456 VMOVUPD 0x40(%R9),%YMM16 |
(1062) 0x27445d VMOVUPD (%RSI,%RDX,8),%YMM14 |
(1062) 0x274462 ADD $0x4,%RDX |
(1062) 0x274466 ADD $0x60,%R9 |
(1062) 0x27446a VMOVAPD %ZMM15,%ZMM17 |
(1062) 0x274470 VMOVAPD %ZMM15,%ZMM18 |
(1062) 0x274476 VPERMT2PD %ZMM16,%ZMM1,%ZMM17 |
(1062) 0x27447c VPERMT2PD %ZMM16,%ZMM2,%ZMM18 |
(1062) 0x274482 VPERMT2PD %ZMM16,%ZMM3,%ZMM15 |
(1062) 0x274488 VFMADD231PD %YMM17,%YMM14,%YMM12 |
(1062) 0x27448e VFMADD231PD %YMM18,%YMM14,%YMM11 |
(1062) 0x274494 VFMADD231PD %YMM15,%YMM14,%YMM13 |
(1062) 0x274499 CMP %RDX,%R15 |
(1062) 0x27449c JNE 274450 |
(1060) 0x27449e VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(1060) 0x2744a4 VADDPD %YMM14,%YMM13,%YMM13 |
(1060) 0x2744a9 VPERMILPD $0x1,%XMM13,%XMM14 |
(1060) 0x2744af VADDPD %YMM14,%YMM13,%YMM19 |
(1060) 0x2744b5 VEXTRACTF128 $0x1,%YMM11,%XMM13 |
(1060) 0x2744bb VADDPD %XMM13,%XMM11,%XMM11 |
(1060) 0x2744c0 VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(1060) 0x2744c6 VADDPD %XMM13,%XMM12,%XMM12 |
(1060) 0x2744cb VUNPCKHPD %XMM11,%XMM12,%XMM13 |
(1060) 0x2744d0 VUNPCKLPD %XMM11,%XMM12,%XMM11 |
(1060) 0x2744d5 VADDPD %XMM13,%XMM11,%XMM11 |
(1060) 0x2744da CMP %RCX,%R15 |
(1060) 0x2744dd JE 273fa0 |
(1060) 0x2744e3 VBROADCASTSD %XMM19,%YMM12 |
(1060) 0x2744e9 MOV %R15,%RDX |
(1060) 0x2744ec VBLENDPD $0xc,%YMM12,%YMM11,%YMM11 |
(1060) 0x2744f2 JMP 274510 |
(1060) 0x2744f4 VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(1060) 0x2744f9 VINSERTF32X4 $0x1,%XMM19,%YMM11,%YMM11 |
(1060) 0x274500 VPERMILPD $0x2,%YMM11,%YMM11 |
(1060) 0x274506 NOPW %CS:(%RAX,%RAX,1) |
(1060) 0x274510 LEA (%RDX,%RDX,2),%R9 |
(1060) 0x274514 LEA (%R8,%R9,8),%R9 |
(1060) 0x274518 NOPL (%RAX,%RAX,1) |
(1061) 0x274520 VPERMILPD $0x2,(%R9),%YMM12 |
(1061) 0x274526 ADD $0x18,%R9 |
(1061) 0x27452a VFMADD231PD (%RSI,%RDX,8){1to4},%YMM12,%YMM11 |
(1061) 0x274531 INC %RDX |
(1061) 0x274534 CMP %RDX,%RCX |
(1061) 0x274537 JNE 274520 |
(1060) 0x274539 VEXTRACTF32X4 $0x1,%YMM11,%XMM19 |
(1060) 0x274540 JMP 273fa0 |
0x274545 NOPW %CS:(%RAX,%RAX,1) |
(1057) 0x274550 DEC %RAX |
(1057) 0x274553 JNE 274550 |
0x274555 ADD $0x38,%RSP |
0x274559 POP %RBX |
0x27455a POP %R12 |
0x27455c POP %R13 |
0x27455e POP %R14 |
0x274560 POP %R15 |
0x274562 POP %RBP |
0x274563 VZEROUPPER |
0x274566 RET |
0x274567 INT $0x3 |
0x274568 INT $0x3 |
0x274569 INT $0x3 |
0x27456a INT $0x3 |
0x27456b INT $0x3 |
0x27456c INT $0x3 |
0x27456d INT $0x3 |
0x27456e INT $0x3 |
0x27456f INT $0x3 |
Path / |
Source file and lines | DiracDeterminantRef.cpp:156-181 |
Module | exec |
nb instructions | 120 |
nb uops | 111 |
loop length | 569 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 5.00 | 17.67 | 17.67 | 17.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 5.00 | 19.67 | 19.67 | 19.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.50 |
Dispatch | 19.67 |
Overall L1 | 19.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 62% |
load | 68% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 36% |
load | 55% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 23% |
all | 12% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 42% |
load | 53% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 45% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMPL $0,0xc(%RDI) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JNE 273e23 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x73> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2a07a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x470(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x478(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x47c(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x90(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x110(%R14),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x150(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALLQ 0x28(%R10) | 4 | 0.50 | 0 | 0 | 0 | 0.50 | 0.67 | 0.67 | 0.67 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.20 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 2a0890 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x484(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 274555 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMP $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 273eaa <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xfa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
JMP 274555 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV 0x480(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 274550 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x118(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x6f84c(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6f8f4(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6f8dc(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x69326(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x692f0(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x692ba(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x69284(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x6924e(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x69218(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x158(%R14),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%R14),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x100(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x8,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0x478(%R14),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%R9 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RSI),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RDX,%RDX,2),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%RDI),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R9,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 27400f <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x25f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | DiracDeterminantRef.cpp:156-181 |
Module | exec |
nb instructions | 120 |
nb uops | 111 |
loop length | 569 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 5.00 | 17.67 | 17.67 | 17.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 5.00 | 19.67 | 19.67 | 19.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.50 |
Dispatch | 19.67 |
Overall L1 | 19.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 62% |
load | 68% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 36% |
load | 55% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 23% |
all | 12% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 42% |
load | 53% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 45% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMPL $0,0xc(%RDI) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JNE 273e23 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x73> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2a07a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x470(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x478(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x47c(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x90(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x110(%R14),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x150(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALLQ 0x28(%R10) | 4 | 0.50 | 0 | 0 | 0 | 0.50 | 0.67 | 0.67 | 0.67 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.20 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 2a0890 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x484(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 274555 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMP $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 273eaa <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xfa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
JMP 274555 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV 0x480(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 274550 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x118(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x6f84c(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6f8f4(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6f8dc(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x69326(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x692f0(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x692ba(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x69284(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x6924e(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x69218(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x158(%R14),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%R14),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x100(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x8,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0x478(%R14),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%R9 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RSI),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RDX,%RDX,2),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%RDI),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R9,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 27400f <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x25f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::DiracDeterminantRef | 0.53 | 0.99 |
▼Loop 1060 - inner_product.hpp:82-155 - exec– | 0 | 0.01 |
○Loop 1059 - inner_product.hpp:155-155 - exec | 0.33 | 0.6 |
○Loop 1058 - inner_product.hpp:82-83 - exec | 0.2 | 0.37 |
○Loop 1064 - inner_product.hpp:82-83 - exec | 0 | 0 |
○Loop 1063 - inner_product.hpp:82-83 - exec | 0 | 0 |
○Loop 1062 - inner_product.hpp:155-155 - exec | 0 | 0 |
○Loop 1061 - inner_product.hpp:155-155 - exec | 0 | 0 |
○Loop 1057 - DiracDeterminantRef.cpp:173-173 - exec | 0 | 0 |