Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGL ... | Module: exec | Source: DiracDeterminantRef.cpp:156-181 [...] | Coverage: 0.48% |
---|
Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGL ... | Module: exec | Source: DiracDeterminantRef.cpp:156-181 [...] | Coverage: 0.48% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 155 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/unique_ptr.h: 173 - 173 |
-------------------------------------------------------------------------------- |
173: pointer _M_ptr() const { return std::get<0>(_M_t); } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 156 - 181 |
-------------------------------------------------------------------------------- |
156: { |
157: if (UpdateMode == ORB_PBYP_RATIO) |
158: { //need to compute dpsiM and d2psiM. Do not touch psiM! |
159: SPOVGLTimer->start(); |
160: Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); |
161: SPOVGLTimer->stop(); |
162: } |
163: |
164: if (NumPtcls == 1) |
165: { |
166: ValueType y = psiM(0, 0); |
167: GradType rv = y * dpsiM(0, 0); |
168: G[FirstIndex] += rv; |
169: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
170: } |
171: else |
172: { |
173: for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) |
174: { |
175: mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
176: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
177: G[iat] += rv; |
178: L[iat] += dot_temp - dot(rv, rv); |
179: } |
180: } |
181: } |
0x274070 PUSH %RBP |
0x274071 MOV %RSP,%RBP |
0x274074 PUSH %R15 |
0x274076 PUSH %R14 |
0x274078 PUSH %R13 |
0x27407a PUSH %R12 |
0x27407c PUSH %RBX |
0x27407d SUB $0x38,%RSP |
0x274081 CMPL $0,0xc(%RDI) |
0x274085 MOV %RCX,%RBX |
0x274088 MOV %RDX,%R15 |
0x27408b MOV %RDI,%R14 |
0x27408e JNE 2740e3 |
0x274090 MOV 0x468(%R14),%RDI |
0x274097 MOV %RSI,%R12 |
0x27409a CALL 2a0a60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x27409f MOV 0x470(%R14),%RDI |
0x2740a6 MOV 0x478(%R14),%EDX |
0x2740ad MOV 0x47c(%R14),%ECX |
0x2740b4 LEA 0x90(%R14),%R8 |
0x2740bb LEA 0x110(%R14),%R9 |
0x2740c2 LEA 0x150(%R14),%RAX |
0x2740c9 MOV %R12,%RSI |
0x2740cc MOV (%RDI),%R10 |
0x2740cf MOV %RAX,(%RSP) |
0x2740d3 CALLQ 0x28(%R10) |
0x2740d7 MOV 0x468(%R14),%RDI |
0x2740de CALL 2a0b50 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x2740e3 MOVSXD 0x484(%R14),%RAX |
0x2740ea TEST %RAX,%RAX |
0x2740ed JE 274815 |
0x2740f3 CMP $0x1,%EAX |
0x2740f6 JNE 27416a |
0x2740f8 MOV 0x100(%R14),%RAX |
0x2740ff MOV 0x140(%R14),%RCX |
0x274106 MOV 0x18(%R15),%RDX |
0x27410a VMOVDDUP (%RAX),%XMM0 |
0x27410e MOVSXD 0x478(%R14),%RAX |
0x274115 VMULPD (%RCX),%XMM0,%XMM2 |
0x274119 VMULSD 0x10(%RCX),%XMM0,%XMM1 |
0x27411e LEA (%RAX,%RAX,2),%RSI |
0x274122 VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 |
0x274127 VMULPD %XMM2,%XMM2,%XMM2 |
0x27412b VMOVUPD %XMM3,(%RDX,%RSI,8) |
0x274130 VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 |
0x274136 VMOVSD %XMM3,0x10(%RDX,%RSI,8) |
0x27413c VPERMILPD $0x1,%XMM2,%XMM3 |
0x274142 MOV 0x180(%R14),%RCX |
0x274149 VADDSD %XMM2,%XMM3,%XMM2 |
0x27414d MOV 0x18(%RBX),%RDX |
0x274151 VFMADD231SD %XMM1,%XMM1,%XMM2 |
0x274156 VFMSUB231SD (%RCX),%XMM0,%XMM2 |
0x27415b VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 |
0x274160 VMOVSD %XMM0,(%RDX,%RAX,8) |
0x274165 JMP 274815 |
0x27416a MOV 0x480(%R14),%ECX |
0x274171 TEST %ECX,%ECX |
0x274173 JLE 274810 |
0x274179 MOV 0x18(%RBX),%RBX |
0x27417d MOV 0x118(%R14),%RDX |
0x274184 VMOVAPD -0x6fb6c(%RIP),%YMM1 |
0x27418c VMOVAPD -0x6fc14(%RIP),%YMM2 |
0x274194 VMOVAPD -0x6fbfc(%RIP),%YMM3 |
0x27419c VMOVAPD -0x69326(%RIP),%ZMM4 |
0x2741a6 VMOVAPD -0x692f0(%RIP),%ZMM5 |
0x2741b0 VMOVAPD -0x692ba(%RIP),%ZMM6 |
0x2741ba VMOVAPD -0x69284(%RIP),%ZMM7 |
0x2741c4 VMOVAPD -0x6924e(%RIP),%ZMM8 |
0x2741ce VMOVAPD -0x69218(%RIP),%ZMM9 |
0x2741d8 MOV 0x158(%R14),%R11 |
0x2741df MOV 0xd8(%R14),%R9 |
0x2741e6 MOV 0x100(%R14),%RSI |
0x2741ed MOV 0x180(%R14),%RDI |
0x2741f4 MOV 0x18(%R15),%R10 |
0x2741f8 MOV 0x140(%R14),%R8 |
0x2741ff MOV %RCX,%R13 |
0x274202 SHR $0x5,%R13 |
0x274206 MOV %ECX,%R15D |
0x274209 AND $-0x4,%R15D |
0x27420d VXORPD %XMM0,%XMM0,%XMM0 |
0x274211 SAL $0x8,%R13 |
0x274215 MOV %RBX,-0x40(%RBP) |
0x274219 MOVSXD 0x478(%R14),%RBX |
0x274220 SAL $0x3,%RDX |
0x274224 SAL $0x3,%R11 |
0x274228 MOV %ECX,%R14D |
0x27422b AND $-0x20,%R14D |
0x27422f SAL $0x3,%R9 |
0x274233 LEA 0xc0(%RSI),%R12 |
0x27423a LEA (%RDX,%RDX,2),%RDX |
0x27423e MOV %R11,-0x48(%RBP) |
0x274242 LEA 0xc0(%RDI),%R11 |
0x274249 MOV %R14,-0x30(%RBP) |
0x27424d XOR %R14D,%R14D |
0x274250 MOV %R9,-0x50(%RBP) |
0x274254 MOV %RDX,-0x38(%RBP) |
0x274258 JMP 2742cf |
0x27425a NOPW (%RAX,%RAX,1) |
(1060) 0x274260 LEA (%RBX,%RBX,2),%RDX |
(1060) 0x274264 MOV -0x48(%RBP),%R9 |
(1060) 0x274268 ADD -0x38(%RBP),%R8 |
(1060) 0x27426c INC %R14 |
(1060) 0x27426f VADDPD (%R10,%RDX,8),%XMM11,%XMM12 |
(1060) 0x274275 VMULPD %XMM11,%XMM11,%XMM11 |
(1060) 0x27427a ADD %R9,%R11 |
(1060) 0x27427d ADD %R9,%RDI |
(1060) 0x274280 VMOVUPD %XMM12,(%R10,%RDX,8) |
(1060) 0x274286 VADDSD 0x10(%R10,%RDX,8),%XMM19,%XMM12 |
(1060) 0x27428e VMOVSD %XMM12,0x10(%R10,%RDX,8) |
(1060) 0x274295 VPERMILPD $0x1,%XMM11,%XMM12 |
(1060) 0x27429b MOV -0x40(%RBP),%RDX |
(1060) 0x27429f VADDSD %XMM11,%XMM12,%XMM11 |
(1060) 0x2742a4 VFNMSUB231SD %XMM19,%XMM19,%XMM11 |
(1060) 0x2742aa VADDSD %XMM11,%XMM10,%XMM10 |
(1060) 0x2742af VADDSD (%RDX,%RBX,8),%XMM10,%XMM10 |
(1060) 0x2742b4 VMOVSD %XMM10,(%RDX,%RBX,8) |
(1060) 0x2742b9 MOV -0x50(%RBP),%RDX |
(1060) 0x2742bd INC %RBX |
(1060) 0x2742c0 ADD %RDX,%R12 |
(1060) 0x2742c3 ADD %RDX,%RSI |
(1060) 0x2742c6 CMP %RAX,%R14 |
(1060) 0x2742c9 JE 274815 |
(1060) 0x2742cf CMP $0x4,%ECX |
(1060) 0x2742d2 JAE 2742e0 |
(1060) 0x2742d4 VXORPD %XMM10,%XMM10,%XMM10 |
(1060) 0x2742d9 XOR %EDX,%EDX |
(1060) 0x2742db JMP 274400 |
(1060) 0x2742e0 CMP $0x20,%ECX |
(1060) 0x2742e3 JAE 274300 |
(1060) 0x2742e5 VXORPD %XMM10,%XMM10,%XMM10 |
(1060) 0x2742ea XOR %R9D,%R9D |
(1060) 0x2742ed JMP 2743b0 |
0x2742f2 NOPW %CS:(%RAX,%RAX,1) |
(1060) 0x274300 VXORPD %XMM10,%XMM10,%XMM10 |
(1060) 0x274305 VXORPD %XMM11,%XMM11,%XMM11 |
(1060) 0x27430a VXORPD %XMM12,%XMM12,%XMM12 |
(1060) 0x27430f VXORPD %XMM13,%XMM13,%XMM13 |
(1060) 0x274314 XOR %EDX,%EDX |
(1060) 0x274316 NOPW %CS:(%RAX,%RAX,1) |
(1058) 0x274320 VMOVUPD -0xc0(%R11,%RDX,1),%ZMM14 |
(1058) 0x274328 VMOVUPD -0x80(%R11,%RDX,1),%ZMM15 |
(1058) 0x274330 VMOVUPD -0x40(%R11,%RDX,1),%ZMM16 |
(1058) 0x274338 VMOVUPD (%R11,%RDX,1),%ZMM17 |
(1058) 0x27433f VFMADD231PD -0xc0(%R12,%RDX,1),%ZMM14,%ZMM10 |
(1058) 0x274347 VFMADD231PD -0x80(%R12,%RDX,1),%ZMM15,%ZMM11 |
(1058) 0x27434f VFMADD231PD -0x40(%R12,%RDX,1),%ZMM16,%ZMM12 |
(1058) 0x274357 VFMADD231PD (%R12,%RDX,1),%ZMM17,%ZMM13 |
(1058) 0x27435e ADD $0x100,%RDX |
(1058) 0x274365 CMP %RDX,%R13 |
(1058) 0x274368 JNE 274320 |
(1060) 0x27436a VADDPD %ZMM10,%ZMM11,%ZMM10 |
(1060) 0x274370 VADDPD %ZMM12,%ZMM13,%ZMM12 |
(1060) 0x274376 MOV -0x30(%RBP),%RDX |
(1060) 0x27437a VADDPD %ZMM10,%ZMM12,%ZMM10 |
(1060) 0x274380 VEXTRACTF64X4 $0x1,%ZMM10,%YMM11 |
(1060) 0x274387 VADDPD %ZMM11,%ZMM10,%ZMM10 |
(1060) 0x27438d VEXTRACTF128 $0x1,%YMM10,%XMM11 |
(1060) 0x274393 VADDPD %XMM11,%XMM10,%XMM10 |
(1060) 0x274398 VPERMILPD $0x1,%XMM10,%XMM11 |
(1060) 0x27439e VADDSD %XMM11,%XMM10,%XMM10 |
(1060) 0x2743a3 CMP %RCX,%RDX |
(1060) 0x2743a6 JE 274413 |
(1060) 0x2743a8 MOV %RDX,%R9 |
(1060) 0x2743ab TEST $0x1c,%CL |
(1060) 0x2743ae JE 274400 |
(1060) 0x2743b0 VMOVQ %XMM10,%XMM10 |
(1060) 0x2743b5 NOPW %CS:(%RAX,%RAX,1) |
(1064) 0x2743c0 VMOVUPD (%RDI,%R9,8),%YMM11 |
(1064) 0x2743c6 VFMADD231PD (%RSI,%R9,8),%YMM11,%YMM10 |
(1064) 0x2743cc ADD $0x4,%R9 |
(1064) 0x2743d0 CMP %R9,%R15 |
(1064) 0x2743d3 JNE 2743c0 |
(1060) 0x2743d5 VEXTRACTF128 $0x1,%YMM10,%XMM11 |
(1060) 0x2743db MOV %R15,%RDX |
(1060) 0x2743de VADDPD %XMM11,%XMM10,%XMM10 |
(1060) 0x2743e3 VPERMILPD $0x1,%XMM10,%XMM11 |
(1060) 0x2743e9 VADDSD %XMM11,%XMM10,%XMM10 |
(1060) 0x2743ee CMP %RCX,%R15 |
(1060) 0x2743f1 JE 274413 |
(1060) 0x2743f3 NOPW %CS:(%RAX,%RAX,1) |
(1063) 0x274400 VMOVSD (%RDI,%RDX,8),%XMM11 |
(1063) 0x274405 VFMADD231SD (%RSI,%RDX,8),%XMM11,%XMM10 |
(1063) 0x27440b INC %RDX |
(1063) 0x27440e CMP %RDX,%RCX |
(1063) 0x274411 JNE 274400 |
(1060) 0x274413 CMP $0x4,%ECX |
(1060) 0x274416 JAE 274430 |
(1060) 0x274418 VXORPD %XMM11,%XMM11,%XMM11 |
(1060) 0x27441d XOR %EDX,%EDX |
(1060) 0x27441f JMP 2747d0 |
0x274424 NOPW %CS:(%RAX,%RAX,1) |
(1060) 0x274430 CMP $0x20,%ECX |
(1060) 0x274433 JAE 274450 |
(1060) 0x274435 VXORPD %XMM11,%XMM11,%XMM11 |
(1060) 0x27443a VXORPD %XMM19,%XMM19,%XMM19 |
(1060) 0x274440 XOR %EDX,%EDX |
(1060) 0x274442 JMP 2746f3 |
0x274447 NOPW (%RAX,%RAX,1) |
(1060) 0x274450 VXORPD %XMM11,%XMM11,%XMM11 |
(1060) 0x274455 VXORPD %XMM12,%XMM12,%XMM12 |
(1060) 0x27445a VXORPD %XMM13,%XMM13,%XMM13 |
(1060) 0x27445f VXORPD %XMM14,%XMM14,%XMM14 |
(1060) 0x274464 VXORPD %XMM15,%XMM15,%XMM15 |
(1060) 0x274469 VXORPD %XMM16,%XMM16,%XMM16 |
(1060) 0x27446f VXORPD %XMM17,%XMM17,%XMM17 |
(1060) 0x274475 VXORPD %XMM18,%XMM18,%XMM18 |
(1060) 0x27447b VXORPD %XMM19,%XMM19,%XMM19 |
(1060) 0x274481 VXORPD %XMM20,%XMM20,%XMM20 |
(1060) 0x274487 VXORPD %XMM21,%XMM21,%XMM21 |
(1060) 0x27448d VXORPD %XMM22,%XMM22,%XMM22 |
(1060) 0x274493 XOR %EDX,%EDX |
(1060) 0x274495 MOV %R8,%R9 |
(1060) 0x274498 NOPL (%RAX,%RAX,1) |
(1059) 0x2744a0 VMOVUPD (%R9),%ZMM24 |
(1059) 0x2744a6 VMOVUPD 0x40(%R9),%ZMM23 |
(1059) 0x2744ad VMOVUPD 0x80(%R9),%ZMM28 |
(1059) 0x2744b4 VMOVUPD -0xc0(%R12,%RDX,1),%ZMM27 |
(1059) 0x2744bc VMOVUPD -0x80(%R12,%RDX,1),%ZMM29 |
(1059) 0x2744c4 VMOVAPD %ZMM24,%ZMM25 |
(1059) 0x2744ca VPERMT2PD %ZMM23,%ZMM4,%ZMM25 |
(1059) 0x2744d0 VMOVAPD %ZMM24,%ZMM26 |
(1059) 0x2744d6 VPERMT2PD %ZMM23,%ZMM6,%ZMM26 |
(1059) 0x2744dc VPERMT2PD %ZMM24,%ZMM8,%ZMM23 |
(1059) 0x2744e2 VPERMT2PD %ZMM28,%ZMM5,%ZMM25 |
(1059) 0x2744e8 VPERMT2PD %ZMM28,%ZMM7,%ZMM26 |
(1059) 0x2744ee VPERMT2PD %ZMM28,%ZMM9,%ZMM23 |
(1059) 0x2744f4 VMOVUPD 0xc0(%R9),%ZMM28 |
(1059) 0x2744fb VFMADD231PD %ZMM25,%ZMM27,%ZMM11 |
(1059) 0x274501 VMOVUPD 0x100(%R9),%ZMM25 |
(1059) 0x274508 VFMADD231PD %ZMM23,%ZMM27,%ZMM19 |
(1059) 0x27450e VFMADD231PD %ZMM26,%ZMM27,%ZMM15 |
(1059) 0x274514 VMOVUPD 0x140(%R9),%ZMM27 |
(1059) 0x27451b VMOVAPD %ZMM28,%ZMM23 |
(1059) 0x274521 VMOVAPD %ZMM28,%ZMM24 |
(1059) 0x274527 VPERMT2PD %ZMM25,%ZMM4,%ZMM23 |
(1059) 0x27452d VPERMT2PD %ZMM25,%ZMM6,%ZMM24 |
(1059) 0x274533 VPERMT2PD %ZMM28,%ZMM8,%ZMM25 |
(1059) 0x274539 VMOVUPD 0x180(%R9),%ZMM28 |
(1059) 0x274540 VPERMT2PD %ZMM27,%ZMM5,%ZMM23 |
(1059) 0x274546 VPERMT2PD %ZMM27,%ZMM9,%ZMM25 |
(1059) 0x27454c VPERMT2PD %ZMM27,%ZMM7,%ZMM24 |
(1059) 0x274552 VMOVUPD 0x200(%R9),%ZMM27 |
(1059) 0x274559 VMOVAPD %ZMM28,%ZMM26 |
(1059) 0x27455f VFMADD231PD %ZMM23,%ZMM29,%ZMM12 |
(1059) 0x274565 VMOVUPD 0x1c0(%R9),%ZMM23 |
(1059) 0x27456c VFMADD231PD %ZMM25,%ZMM29,%ZMM20 |
(1059) 0x274572 VMOVAPD %ZMM28,%ZMM25 |
(1059) 0x274578 VFMADD231PD %ZMM24,%ZMM29,%ZMM16 |
(1059) 0x27457e VPERMT2PD %ZMM23,%ZMM4,%ZMM25 |
(1059) 0x274584 VPERMT2PD %ZMM23,%ZMM6,%ZMM26 |
(1059) 0x27458a VPERMT2PD %ZMM28,%ZMM8,%ZMM23 |
(1059) 0x274590 VMOVUPD -0x40(%R12,%RDX,1),%ZMM28 |
(1059) 0x274598 VPERMT2PD %ZMM27,%ZMM5,%ZMM25 |
(1059) 0x27459e VPERMT2PD %ZMM27,%ZMM7,%ZMM26 |
(1059) 0x2745a4 VPERMT2PD %ZMM27,%ZMM9,%ZMM23 |
(1059) 0x2745aa VMOVUPD 0x240(%R9),%ZMM27 |
(1059) 0x2745b1 VFMADD231PD %ZMM25,%ZMM28,%ZMM13 |
(1059) 0x2745b7 VMOVUPD 0x280(%R9),%ZMM25 |
(1059) 0x2745be VFMADD231PD %ZMM23,%ZMM28,%ZMM21 |
(1059) 0x2745c4 VFMADD231PD %ZMM26,%ZMM28,%ZMM17 |
(1059) 0x2745ca VMOVUPD 0x2c0(%R9),%ZMM28 |
(1059) 0x2745d1 ADD $0x300,%R9 |
(1059) 0x2745d8 VMOVAPD %ZMM27,%ZMM23 |
(1059) 0x2745de VMOVAPD %ZMM27,%ZMM24 |
(1059) 0x2745e4 VPERMT2PD %ZMM25,%ZMM4,%ZMM23 |
(1059) 0x2745ea VPERMT2PD %ZMM25,%ZMM6,%ZMM24 |
(1059) 0x2745f0 VPERMT2PD %ZMM27,%ZMM8,%ZMM25 |
(1059) 0x2745f6 VMOVUPD (%R12,%RDX,1),%ZMM27 |
(1059) 0x2745fd ADD $0x100,%RDX |
(1059) 0x274604 VPERMT2PD %ZMM28,%ZMM5,%ZMM23 |
(1059) 0x27460a VPERMT2PD %ZMM28,%ZMM7,%ZMM24 |
(1059) 0x274610 VPERMT2PD %ZMM28,%ZMM9,%ZMM25 |
(1059) 0x274616 VFMADD231PD %ZMM23,%ZMM27,%ZMM14 |
(1059) 0x27461c VFMADD231PD %ZMM24,%ZMM27,%ZMM18 |
(1059) 0x274622 VFMADD231PD %ZMM25,%ZMM27,%ZMM22 |
(1059) 0x274628 CMP %RDX,%R13 |
(1059) 0x27462b JNE 2744a0 |
(1060) 0x274631 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(1060) 0x274637 VADDPD %ZMM17,%ZMM18,%ZMM16 |
(1060) 0x27463d VADDPD %ZMM11,%ZMM12,%ZMM11 |
(1060) 0x274643 VADDPD %ZMM13,%ZMM14,%ZMM13 |
(1060) 0x274649 VADDPD %ZMM19,%ZMM20,%ZMM19 |
(1060) 0x27464f VADDPD %ZMM21,%ZMM22,%ZMM20 |
(1060) 0x274655 MOV -0x30(%RBP),%RDX |
(1060) 0x274659 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(1060) 0x27465f VADDPD %ZMM11,%ZMM13,%ZMM11 |
(1060) 0x274665 VADDPD %ZMM19,%ZMM20,%ZMM19 |
(1060) 0x27466b VEXTRACTF64X4 $0x1,%ZMM15,%YMM16 |
(1060) 0x274672 VEXTRACTF64X4 $0x1,%ZMM11,%YMM12 |
(1060) 0x274679 VEXTRACTF64X4 $0x1,%ZMM19,%YMM20 |
(1060) 0x274680 VADDPD %ZMM16,%ZMM15,%ZMM15 |
(1060) 0x274686 VADDPD %ZMM12,%ZMM11,%ZMM11 |
(1060) 0x27468c VADDPD %ZMM20,%ZMM19,%ZMM19 |
(1060) 0x274692 VEXTRACTF32X4 $0x1,%YMM15,%XMM16 |
(1060) 0x274699 VEXTRACTF128 $0x1,%YMM11,%XMM12 |
(1060) 0x27469f VEXTRACTF32X4 $0x1,%YMM19,%XMM20 |
(1060) 0x2746a6 VADDPD %XMM16,%XMM15,%XMM15 |
(1060) 0x2746ac VADDPD %XMM12,%XMM11,%XMM11 |
(1060) 0x2746b1 VADDPD %XMM20,%XMM19,%XMM19 |
(1060) 0x2746b7 VPERMILPD $0x1,%XMM15,%XMM16 |
(1060) 0x2746be VPERMILPD $0x1,%XMM11,%XMM12 |
(1060) 0x2746c4 VPERMILPD $0x1,%XMM19,%XMM20 |
(1060) 0x2746cb VADDSD %XMM16,%XMM15,%XMM15 |
(1060) 0x2746d1 VADDSD %XMM12,%XMM11,%XMM12 |
(1060) 0x2746d6 VADDSD %XMM20,%XMM19,%XMM19 |
(1060) 0x2746dc VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(1060) 0x2746e1 CMP %RCX,%RDX |
(1060) 0x2746e4 JE 274260 |
(1060) 0x2746ea TEST $0x1c,%CL |
(1060) 0x2746ed JE 2747b4 |
(1060) 0x2746f3 LEA (%RDX,%RDX,2),%R9 |
(1060) 0x2746f7 VMOVQ %XMM11,%XMM12 |
(1060) 0x2746fc VUNPCKHPD %XMM0,%XMM11,%XMM11 |
(1060) 0x274700 VMOVQ %XMM19,%XMM13 |
(1060) 0x274706 LEA (%R8,%R9,8),%R9 |
(1060) 0x27470a NOPW (%RAX,%RAX,1) |
(1062) 0x274710 VMOVUPD (%R9),%ZMM15 |
(1062) 0x274716 VMOVUPD 0x40(%R9),%YMM16 |
(1062) 0x27471d VMOVUPD (%RSI,%RDX,8),%YMM14 |
(1062) 0x274722 ADD $0x4,%RDX |
(1062) 0x274726 ADD $0x60,%R9 |
(1062) 0x27472a VMOVAPD %ZMM15,%ZMM17 |
(1062) 0x274730 VMOVAPD %ZMM15,%ZMM18 |
(1062) 0x274736 VPERMT2PD %ZMM16,%ZMM1,%ZMM17 |
(1062) 0x27473c VPERMT2PD %ZMM16,%ZMM2,%ZMM18 |
(1062) 0x274742 VPERMT2PD %ZMM16,%ZMM3,%ZMM15 |
(1062) 0x274748 VFMADD231PD %YMM17,%YMM14,%YMM12 |
(1062) 0x27474e VFMADD231PD %YMM18,%YMM14,%YMM11 |
(1062) 0x274754 VFMADD231PD %YMM15,%YMM14,%YMM13 |
(1062) 0x274759 CMP %RDX,%R15 |
(1062) 0x27475c JNE 274710 |
(1060) 0x27475e VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(1060) 0x274764 VADDPD %YMM14,%YMM13,%YMM13 |
(1060) 0x274769 VPERMILPD $0x1,%XMM13,%XMM14 |
(1060) 0x27476f VADDPD %YMM14,%YMM13,%YMM19 |
(1060) 0x274775 VEXTRACTF128 $0x1,%YMM11,%XMM13 |
(1060) 0x27477b VADDPD %XMM13,%XMM11,%XMM11 |
(1060) 0x274780 VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(1060) 0x274786 VADDPD %XMM13,%XMM12,%XMM12 |
(1060) 0x27478b VUNPCKHPD %XMM11,%XMM12,%XMM13 |
(1060) 0x274790 VUNPCKLPD %XMM11,%XMM12,%XMM11 |
(1060) 0x274795 VADDPD %XMM13,%XMM11,%XMM11 |
(1060) 0x27479a CMP %RCX,%R15 |
(1060) 0x27479d JE 274260 |
(1060) 0x2747a3 VBROADCASTSD %XMM19,%YMM12 |
(1060) 0x2747a9 MOV %R15,%RDX |
(1060) 0x2747ac VBLENDPD $0xc,%YMM12,%YMM11,%YMM11 |
(1060) 0x2747b2 JMP 2747d0 |
(1060) 0x2747b4 VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(1060) 0x2747b9 VINSERTF32X4 $0x1,%XMM19,%YMM11,%YMM11 |
(1060) 0x2747c0 VPERMILPD $0x2,%YMM11,%YMM11 |
(1060) 0x2747c6 NOPW %CS:(%RAX,%RAX,1) |
(1060) 0x2747d0 LEA (%RDX,%RDX,2),%R9 |
(1060) 0x2747d4 LEA (%R8,%R9,8),%R9 |
(1060) 0x2747d8 NOPL (%RAX,%RAX,1) |
(1061) 0x2747e0 VPERMILPD $0x2,(%R9),%YMM12 |
(1061) 0x2747e6 ADD $0x18,%R9 |
(1061) 0x2747ea VFMADD231PD (%RSI,%RDX,8){1to4},%YMM12,%YMM11 |
(1061) 0x2747f1 INC %RDX |
(1061) 0x2747f4 CMP %RDX,%RCX |
(1061) 0x2747f7 JNE 2747e0 |
(1060) 0x2747f9 VEXTRACTF32X4 $0x1,%YMM11,%XMM19 |
(1060) 0x274800 JMP 274260 |
0x274805 NOPW %CS:(%RAX,%RAX,1) |
(1057) 0x274810 DEC %RAX |
(1057) 0x274813 JNE 274810 |
0x274815 ADD $0x38,%RSP |
0x274819 POP %RBX |
0x27481a POP %R12 |
0x27481c POP %R13 |
0x27481e POP %R14 |
0x274820 POP %R15 |
0x274822 POP %RBP |
0x274823 VZEROUPPER |
0x274826 RET |
0x274827 INT $0x3 |
0x274828 INT $0x3 |
0x274829 INT $0x3 |
0x27482a INT $0x3 |
0x27482b INT $0x3 |
0x27482c INT $0x3 |
0x27482d INT $0x3 |
0x27482e INT $0x3 |
0x27482f INT $0x3 |
Path / |
Source file and lines | DiracDeterminantRef.cpp:156-181 |
Module | exec |
nb instructions | 120 |
nb uops | 111 |
loop length | 569 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 5.00 | 17.67 | 17.67 | 17.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 5.00 | 19.67 | 19.67 | 19.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.50 |
Dispatch | 19.67 |
Overall L1 | 19.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 62% |
load | 68% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 36% |
load | 55% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 23% |
all | 12% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 42% |
load | 53% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 45% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMPL $0,0xc(%RDI) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JNE 2740e3 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x73> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2a0a60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x470(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x478(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x47c(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x90(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x110(%R14),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x150(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALLQ 0x28(%R10) | 4 | 0.50 | 0 | 0 | 0 | 0.50 | 0.67 | 0.67 | 0.67 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.20 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 2a0b50 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x484(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 274815 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMP $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 27416a <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xfa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
JMP 274815 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV 0x480(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 274810 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x118(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x6fb6c(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6fc14(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6fbfc(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x69326(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x692f0(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x692ba(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x69284(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x6924e(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x69218(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x158(%R14),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%R14),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x100(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x8,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0x478(%R14),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%R9 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RSI),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RDX,%RDX,2),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%RDI),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R9,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 2742cf <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x25f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | DiracDeterminantRef.cpp:156-181 |
Module | exec |
nb instructions | 120 |
nb uops | 111 |
loop length | 569 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 5.00 | 17.67 | 17.67 | 17.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 5.00 | 19.67 | 19.67 | 19.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.50 |
Dispatch | 19.67 |
Overall L1 | 19.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 62% |
load | 68% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 36% |
load | 55% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 23% |
all | 12% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 42% |
load | 53% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 45% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMPL $0,0xc(%RDI) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JNE 2740e3 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x73> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2a0a60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x470(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x478(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x47c(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x90(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x110(%R14),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x150(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALLQ 0x28(%R10) | 4 | 0.50 | 0 | 0 | 0 | 0.50 | 0.67 | 0.67 | 0.67 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.20 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 2a0b50 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x484(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 274815 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMP $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 27416a <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xfa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
JMP 274815 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV 0x480(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 274810 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x118(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x6fb6c(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6fc14(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6fbfc(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x69326(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x692f0(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x692ba(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x69284(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x6924e(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x69218(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x158(%R14),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%R14),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x100(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x8,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0x478(%R14),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%R9 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RSI),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RDX,%RDX,2),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%RDI),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R9,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 2742cf <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x25f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb– | 0.48 | 1 |
▼Loop 1060 - inner_product.hpp:82-155 - exec– | 0 | 0 |
○Loop 1059 - inner_product.hpp:155-155 - exec | 0.3 | 0.57 |
○Loop 1058 - inner_product.hpp:82-83 - exec | 0.18 | 0.33 |
○Loop 1064 - inner_product.hpp:82-83 - exec | 0 | 0 |
○Loop 1063 - inner_product.hpp:82-83 - exec | 0 | 0 |
○Loop 1062 - inner_product.hpp:155-155 - exec | 0 | 0 |
○Loop 1061 - inner_product.hpp:155-155 - exec | 0 | 0 |
○Loop 1057 - DiracDeterminantRef.cpp:173-173 - exec | 0 | 0 |