Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:156-181 [...] | Coverage: 0.53% |
---|
Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:156-181 [...] | Coverage: 0.53% |
---|
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 155 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
/usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h: 173 - 173 |
-------------------------------------------------------------------------------- |
173: pointer _M_ptr() const { return std::get<0>(_M_t); } |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 156 - 181 |
-------------------------------------------------------------------------------- |
156: { |
157: if (UpdateMode == ORB_PBYP_RATIO) |
158: { //need to compute dpsiM and d2psiM. Do not touch psiM! |
159: SPOVGLTimer->start(); |
160: Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); |
161: SPOVGLTimer->stop(); |
162: } |
163: |
164: if (NumPtcls == 1) |
165: { |
166: ValueType y = psiM(0, 0); |
167: GradType rv = y * dpsiM(0, 0); |
168: G[FirstIndex] += rv; |
169: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
170: } |
171: else |
172: { |
173: for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) |
174: { |
175: mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
176: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
177: G[iat] += rv; |
178: L[iat] += dot_temp - dot(rv, rv); |
179: } |
180: } |
181: } |
0x4d2d0 PUSH %RBP |
0x4d2d1 MOV %RSP,%RBP |
0x4d2d4 PUSH %R15 |
0x4d2d6 PUSH %R14 |
0x4d2d8 PUSH %R13 |
0x4d2da PUSH %R12 |
0x4d2dc PUSH %RBX |
0x4d2dd SUB $0x38,%RSP |
0x4d2e1 CMPL $0,0xc(%RDI) |
0x4d2e5 MOV %RCX,%R13 |
0x4d2e8 MOV %RDX,%R15 |
0x4d2eb MOV %RDI,%R14 |
0x4d2ee JNE 4d343 |
0x4d2f0 MOV 0x468(%R14),%RDI |
0x4d2f7 MOV %RSI,%R12 |
0x4d2fa CALL 4e900 <@plt_start@+0x250> |
0x4d2ff MOV 0x470(%R14),%RDI |
0x4d306 MOV 0x478(%R14),%EDX |
0x4d30d MOV 0x47c(%R14),%ECX |
0x4d314 LEA 0x90(%R14),%R8 |
0x4d31b LEA 0x110(%R14),%R9 |
0x4d322 LEA 0x150(%R14),%RAX |
0x4d329 MOV %R12,%RSI |
0x4d32c MOV (%RDI),%R10 |
0x4d32f MOV %RAX,(%RSP) |
0x4d333 CALLQ 0x28(%R10) |
0x4d337 MOV 0x468(%R14),%RDI |
0x4d33e CALL 4e910 <@plt_start@+0x260> |
0x4d343 MOVSXD 0x484(%R14),%R10 |
0x4d34a TEST %R10,%R10 |
0x4d34d JE 4d3c2 |
0x4d34f CMP $0x1,%R10D |
0x4d353 JNE 4d3d4 |
0x4d355 MOV 0x100(%R14),%RAX |
0x4d35c MOV 0x140(%R14),%RCX |
0x4d363 MOV 0x18(%R15),%RDX |
0x4d367 VMOVDDUP (%RAX),%XMM0 |
0x4d36b MOVSXD 0x478(%R14),%RAX |
0x4d372 VMULPD (%RCX),%XMM0,%XMM2 |
0x4d376 VMULSD 0x10(%RCX),%XMM0,%XMM1 |
0x4d37b LEA (%RAX,%RAX,2),%RSI |
0x4d37f VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 |
0x4d384 VMULPD %XMM2,%XMM2,%XMM2 |
0x4d388 VMOVUPD %XMM3,(%RDX,%RSI,8) |
0x4d38d VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 |
0x4d393 VMOVSD %XMM3,0x10(%RDX,%RSI,8) |
0x4d399 VPERMILPD $0x1,%XMM2,%XMM3 |
0x4d39f MOV 0x180(%R14),%RCX |
0x4d3a6 VADDSD %XMM2,%XMM3,%XMM2 |
0x4d3aa MOV 0x18(%R13),%RDX |
0x4d3ae VFMADD231SD %XMM1,%XMM1,%XMM2 |
0x4d3b3 VFMSUB231SD (%RCX),%XMM0,%XMM2 |
0x4d3b8 VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 |
0x4d3bd VMOVSD %XMM0,(%RDX,%RAX,8) |
0x4d3c2 ADD $0x38,%RSP |
0x4d3c6 POP %RBX |
0x4d3c7 POP %R12 |
0x4d3c9 POP %R13 |
0x4d3cb POP %R14 |
0x4d3cd POP %R15 |
0x4d3cf POP %RBP |
0x4d3d0 VZEROUPPER |
0x4d3d3 RET |
0x4d3d4 MOV 0x118(%R14),%RAX |
0x4d3db VMOVAPD -0x44fe3(%RIP),%YMM1 |
0x4d3e3 VMOVAPD -0x4506b(%RIP),%YMM2 |
0x4d3eb VMOVAPD -0x45053(%RIP),%YMM3 |
0x4d3f3 VMOVAPD -0x43e3d(%RIP),%ZMM4 |
0x4d3fd VMOVAPD -0x43e07(%RIP),%ZMM5 |
0x4d407 VMOVAPD -0x43dd1(%RIP),%ZMM6 |
0x4d411 VMOVAPD -0x43d9b(%RIP),%ZMM7 |
0x4d41b VMOVAPD -0x43d65(%RIP),%ZMM8 |
0x4d425 VMOVAPD -0x43d2f(%RIP),%ZMM9 |
0x4d42f MOV 0x480(%R14),%R9D |
0x4d436 MOV 0xd8(%R14),%RDX |
0x4d43d MOV 0x158(%R14),%R8 |
0x4d444 MOVSXD 0x478(%R14),%RCX |
0x4d44b MOV 0x100(%R14),%RSI |
0x4d452 MOV 0x180(%R14),%RDI |
0x4d459 MOV 0x140(%R14),%RBX |
0x4d460 MOV 0x18(%R13),%R14 |
0x4d464 MOV 0x18(%R15),%R11 |
0x4d468 VXORPD %XMM0,%XMM0,%XMM0 |
0x4d46c MOV %R10,-0x30(%RBP) |
0x4d470 SAL $0x3,%RAX |
0x4d474 MOV %R9,%R12 |
0x4d477 SHR $0x5,%R12 |
0x4d47b MOV %R9D,%R15D |
0x4d47e MOV %R14,-0x48(%RBP) |
0x4d482 MOV %R9D,%R14D |
0x4d485 AND $-0x20,%R14D |
0x4d489 SAL $0x3,%RDX |
0x4d48d AND $-0x4,%R15D |
0x4d491 SAL $0x3,%R8 |
0x4d495 LEA 0xc0(%RDI),%R13 |
0x4d49c LEA (%RAX,%RAX,2),%RAX |
0x4d4a0 SAL $0x8,%R12 |
0x4d4a4 MOV %R14,-0x38(%RBP) |
0x4d4a8 MOV %RDX,-0x50(%RBP) |
0x4d4ac XOR %R14D,%R14D |
0x4d4af MOV %RAX,-0x40(%RBP) |
0x4d4b3 LEA 0xc0(%RSI),%RAX |
0x4d4ba JMP 4d53b |
0x4d4bc NOPL (%RAX) |
(913) 0x4d4c0 VXORPD %XMM19,%XMM19,%XMM19 |
(913) 0x4d4c6 VXORPD %XMM10,%XMM10,%XMM10 |
(913) 0x4d4cb VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d4d0 LEA (%RCX,%RCX,2),%RDX |
(913) 0x4d4d4 ADD -0x40(%RBP),%RBX |
(913) 0x4d4d8 INC %R14 |
(913) 0x4d4db ADD %R8,%R13 |
(913) 0x4d4de ADD %R8,%RDI |
(913) 0x4d4e1 VADDPD (%R11,%RDX,8),%XMM11,%XMM12 |
(913) 0x4d4e7 VMULPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d4ec VMOVUPD %XMM12,(%R11,%RDX,8) |
(913) 0x4d4f2 VADDSD 0x10(%R11,%RDX,8),%XMM19,%XMM12 |
(913) 0x4d4fa VMOVSD %XMM12,0x10(%R11,%RDX,8) |
(913) 0x4d501 VPERMILPD $0x1,%XMM11,%XMM12 |
(913) 0x4d507 MOV -0x48(%RBP),%RDX |
(913) 0x4d50b VADDSD %XMM11,%XMM12,%XMM11 |
(913) 0x4d510 VFNMSUB231SD %XMM19,%XMM19,%XMM11 |
(913) 0x4d516 VADDSD %XMM11,%XMM10,%XMM10 |
(913) 0x4d51b VADDSD (%RDX,%RCX,8),%XMM10,%XMM10 |
(913) 0x4d520 VMOVSD %XMM10,(%RDX,%RCX,8) |
(913) 0x4d525 MOV -0x50(%RBP),%RDX |
(913) 0x4d529 INC %RCX |
(913) 0x4d52c ADD %RDX,%RAX |
(913) 0x4d52f ADD %RDX,%RSI |
(913) 0x4d532 CMP %R10,%R14 |
(913) 0x4d535 JE 4d3c2 |
(913) 0x4d53b TEST %R9D,%R9D |
(913) 0x4d53e JLE 4d4c0 |
(913) 0x4d540 CMP $0x4,%R9D |
(913) 0x4d544 JAE 4d560 |
(913) 0x4d546 VXORPD %XMM10,%XMM10,%XMM10 |
(913) 0x4d54b XOR %EDX,%EDX |
(913) 0x4d54d JMP 4d670 |
0x4d552 NOPW %CS:(%RAX,%RAX,1) |
(913) 0x4d560 CMP $0x20,%R9D |
(913) 0x4d564 JAE 4d573 |
(913) 0x4d566 VXORPD %XMM10,%XMM10,%XMM10 |
(913) 0x4d56b XOR %R10D,%R10D |
(913) 0x4d56e JMP 4d622 |
(913) 0x4d573 VXORPD %XMM10,%XMM10,%XMM10 |
(913) 0x4d578 VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d57d VXORPD %XMM12,%XMM12,%XMM12 |
(913) 0x4d582 VXORPD %XMM13,%XMM13,%XMM13 |
(913) 0x4d587 XOR %EDX,%EDX |
(913) 0x4d589 NOPL (%RAX) |
(914) 0x4d590 VMOVUPD -0xc0(%R13,%RDX,1),%ZMM14 |
(914) 0x4d598 VMOVUPD -0x80(%R13,%RDX,1),%ZMM15 |
(914) 0x4d5a0 VMOVUPD -0x40(%R13,%RDX,1),%ZMM16 |
(914) 0x4d5a8 VMOVUPD (%R13,%RDX,1),%ZMM17 |
(914) 0x4d5b0 VFMADD231PD -0xc0(%RAX,%RDX,1),%ZMM14,%ZMM10 |
(914) 0x4d5b8 VFMADD231PD -0x80(%RAX,%RDX,1),%ZMM15,%ZMM11 |
(914) 0x4d5c0 VFMADD231PD -0x40(%RAX,%RDX,1),%ZMM16,%ZMM12 |
(914) 0x4d5c8 VFMADD231PD (%RAX,%RDX,1),%ZMM17,%ZMM13 |
(914) 0x4d5cf ADD $0x100,%RDX |
(914) 0x4d5d6 CMP %RDX,%R12 |
(914) 0x4d5d9 JNE 4d590 |
(913) 0x4d5db VADDPD %ZMM10,%ZMM11,%ZMM10 |
(913) 0x4d5e1 VADDPD %ZMM12,%ZMM13,%ZMM12 |
(913) 0x4d5e7 MOV -0x38(%RBP),%RDX |
(913) 0x4d5eb VADDPD %ZMM10,%ZMM12,%ZMM10 |
(913) 0x4d5f1 VEXTRACTF64X4 $0x1,%ZMM10,%YMM11 |
(913) 0x4d5f8 VADDPD %ZMM11,%ZMM10,%ZMM10 |
(913) 0x4d5fe VEXTRACTF128 $0x1,%YMM10,%XMM11 |
(913) 0x4d604 VADDPD %XMM11,%XMM10,%XMM10 |
(913) 0x4d609 VPERMILPD $0x1,%XMM10,%XMM11 |
(913) 0x4d60f VADDSD %XMM11,%XMM10,%XMM10 |
(913) 0x4d614 CMP %R9,%RDX |
(913) 0x4d617 JE 4d683 |
(913) 0x4d619 MOV %RDX,%R10 |
(913) 0x4d61c TEST $0x1c,%R9B |
(913) 0x4d620 JE 4d670 |
(913) 0x4d622 VMOVQ %XMM10,%XMM10 |
(913) 0x4d627 NOPW (%RAX,%RAX,1) |
(919) 0x4d630 VMOVUPD (%RDI,%R10,8),%YMM11 |
(919) 0x4d636 VFMADD231PD (%RSI,%R10,8),%YMM11,%YMM10 |
(919) 0x4d63c ADD $0x4,%R10 |
(919) 0x4d640 CMP %R10,%R15 |
(919) 0x4d643 JNE 4d630 |
(913) 0x4d645 VEXTRACTF128 $0x1,%YMM10,%XMM11 |
(913) 0x4d64b MOV %R15,%RDX |
(913) 0x4d64e VADDPD %XMM11,%XMM10,%XMM10 |
(913) 0x4d653 VPERMILPD $0x1,%XMM10,%XMM11 |
(913) 0x4d659 VADDSD %XMM11,%XMM10,%XMM10 |
(913) 0x4d65e CMP %R9,%R15 |
(913) 0x4d661 JE 4d683 |
(913) 0x4d663 NOPW %CS:(%RAX,%RAX,1) |
(918) 0x4d670 VMOVSD (%RDI,%RDX,8),%XMM11 |
(918) 0x4d675 VFMADD231SD (%RSI,%RDX,8),%XMM11,%XMM10 |
(918) 0x4d67b INC %RDX |
(918) 0x4d67e CMP %RDX,%R9 |
(918) 0x4d681 JNE 4d670 |
(913) 0x4d683 CMP $0x4,%R9D |
(913) 0x4d687 JAE 4d6a0 |
(913) 0x4d689 VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d68e XOR %EDX,%EDX |
(913) 0x4d690 JMP 4da30 |
0x4d695 NOPW %CS:(%RAX,%RAX,1) |
(913) 0x4d6a0 CMP $0x20,%R9D |
(913) 0x4d6a4 JAE 4d6b8 |
(913) 0x4d6a6 VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d6ab VXORPD %XMM19,%XMM19,%XMM19 |
(913) 0x4d6b1 XOR %EDX,%EDX |
(913) 0x4d6b3 JMP 4d954 |
(913) 0x4d6b8 VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d6bd VXORPD %XMM12,%XMM12,%XMM12 |
(913) 0x4d6c2 VXORPD %XMM13,%XMM13,%XMM13 |
(913) 0x4d6c7 VXORPD %XMM14,%XMM14,%XMM14 |
(913) 0x4d6cc VXORPD %XMM15,%XMM15,%XMM15 |
(913) 0x4d6d1 VXORPD %XMM16,%XMM16,%XMM16 |
(913) 0x4d6d7 VXORPD %XMM17,%XMM17,%XMM17 |
(913) 0x4d6dd VXORPD %XMM18,%XMM18,%XMM18 |
(913) 0x4d6e3 VXORPD %XMM19,%XMM19,%XMM19 |
(913) 0x4d6e9 VXORPD %XMM20,%XMM20,%XMM20 |
(913) 0x4d6ef VXORPD %XMM21,%XMM21,%XMM21 |
(913) 0x4d6f5 VXORPD %XMM22,%XMM22,%XMM22 |
(913) 0x4d6fb MOV %RBX,%R10 |
(913) 0x4d6fe XOR %EDX,%EDX |
(915) 0x4d700 VMOVUPD (%R10),%ZMM24 |
(915) 0x4d706 VMOVUPD 0x40(%R10),%ZMM23 |
(915) 0x4d70d VMOVUPD 0x80(%R10),%ZMM28 |
(915) 0x4d714 VMOVUPD -0xc0(%RAX,%RDX,1),%ZMM27 |
(915) 0x4d71c VMOVUPD -0x80(%RAX,%RDX,1),%ZMM29 |
(915) 0x4d724 VMOVAPD %ZMM24,%ZMM25 |
(915) 0x4d72a VPERMT2PD %ZMM23,%ZMM4,%ZMM25 |
(915) 0x4d730 VMOVAPD %ZMM24,%ZMM26 |
(915) 0x4d736 VPERMT2PD %ZMM23,%ZMM6,%ZMM26 |
(915) 0x4d73c VPERMT2PD %ZMM24,%ZMM8,%ZMM23 |
(915) 0x4d742 VPERMT2PD %ZMM28,%ZMM5,%ZMM25 |
(915) 0x4d748 VPERMT2PD %ZMM28,%ZMM7,%ZMM26 |
(915) 0x4d74e VPERMT2PD %ZMM28,%ZMM9,%ZMM23 |
(915) 0x4d754 VMOVUPD 0xc0(%R10),%ZMM28 |
(915) 0x4d75b VFMADD231PD %ZMM25,%ZMM27,%ZMM11 |
(915) 0x4d761 VMOVUPD 0x100(%R10),%ZMM25 |
(915) 0x4d768 VFMADD231PD %ZMM23,%ZMM27,%ZMM19 |
(915) 0x4d76e VFMADD231PD %ZMM26,%ZMM27,%ZMM15 |
(915) 0x4d774 VMOVUPD 0x140(%R10),%ZMM27 |
(915) 0x4d77b VMOVAPD %ZMM28,%ZMM23 |
(915) 0x4d781 VMOVAPD %ZMM28,%ZMM24 |
(915) 0x4d787 VPERMT2PD %ZMM25,%ZMM4,%ZMM23 |
(915) 0x4d78d VPERMT2PD %ZMM25,%ZMM6,%ZMM24 |
(915) 0x4d793 VPERMT2PD %ZMM28,%ZMM8,%ZMM25 |
(915) 0x4d799 VMOVUPD 0x180(%R10),%ZMM28 |
(915) 0x4d7a0 VPERMT2PD %ZMM27,%ZMM5,%ZMM23 |
(915) 0x4d7a6 VPERMT2PD %ZMM27,%ZMM9,%ZMM25 |
(915) 0x4d7ac VPERMT2PD %ZMM27,%ZMM7,%ZMM24 |
(915) 0x4d7b2 VMOVUPD 0x200(%R10),%ZMM27 |
(915) 0x4d7b9 VMOVAPD %ZMM28,%ZMM26 |
(915) 0x4d7bf VFMADD231PD %ZMM23,%ZMM29,%ZMM12 |
(915) 0x4d7c5 VMOVUPD 0x1c0(%R10),%ZMM23 |
(915) 0x4d7cc VFMADD231PD %ZMM25,%ZMM29,%ZMM20 |
(915) 0x4d7d2 VMOVAPD %ZMM28,%ZMM25 |
(915) 0x4d7d8 VFMADD231PD %ZMM24,%ZMM29,%ZMM16 |
(915) 0x4d7de VPERMT2PD %ZMM23,%ZMM4,%ZMM25 |
(915) 0x4d7e4 VPERMT2PD %ZMM23,%ZMM6,%ZMM26 |
(915) 0x4d7ea VPERMT2PD %ZMM28,%ZMM8,%ZMM23 |
(915) 0x4d7f0 VMOVUPD -0x40(%RAX,%RDX,1),%ZMM28 |
(915) 0x4d7f8 VPERMT2PD %ZMM27,%ZMM5,%ZMM25 |
(915) 0x4d7fe VPERMT2PD %ZMM27,%ZMM7,%ZMM26 |
(915) 0x4d804 VPERMT2PD %ZMM27,%ZMM9,%ZMM23 |
(915) 0x4d80a VMOVUPD 0x240(%R10),%ZMM27 |
(915) 0x4d811 VFMADD231PD %ZMM25,%ZMM28,%ZMM13 |
(915) 0x4d817 VMOVUPD 0x280(%R10),%ZMM25 |
(915) 0x4d81e VFMADD231PD %ZMM23,%ZMM28,%ZMM21 |
(915) 0x4d824 VFMADD231PD %ZMM26,%ZMM28,%ZMM17 |
(915) 0x4d82a VMOVUPD 0x2c0(%R10),%ZMM28 |
(915) 0x4d831 ADD $0x300,%R10 |
(915) 0x4d838 VMOVAPD %ZMM27,%ZMM23 |
(915) 0x4d83e VMOVAPD %ZMM27,%ZMM24 |
(915) 0x4d844 VPERMT2PD %ZMM25,%ZMM4,%ZMM23 |
(915) 0x4d84a VPERMT2PD %ZMM25,%ZMM6,%ZMM24 |
(915) 0x4d850 VPERMT2PD %ZMM27,%ZMM8,%ZMM25 |
(915) 0x4d856 VMOVUPD (%RAX,%RDX,1),%ZMM27 |
(915) 0x4d85d ADD $0x100,%RDX |
(915) 0x4d864 VPERMT2PD %ZMM28,%ZMM5,%ZMM23 |
(915) 0x4d86a VPERMT2PD %ZMM28,%ZMM7,%ZMM24 |
(915) 0x4d870 VPERMT2PD %ZMM28,%ZMM9,%ZMM25 |
(915) 0x4d876 VFMADD231PD %ZMM23,%ZMM27,%ZMM14 |
(915) 0x4d87c VFMADD231PD %ZMM24,%ZMM27,%ZMM18 |
(915) 0x4d882 VFMADD231PD %ZMM25,%ZMM27,%ZMM22 |
(915) 0x4d888 CMP %RDX,%R12 |
(915) 0x4d88b JNE 4d700 |
(913) 0x4d891 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(913) 0x4d897 VADDPD %ZMM17,%ZMM18,%ZMM16 |
(913) 0x4d89d VADDPD %ZMM11,%ZMM12,%ZMM11 |
(913) 0x4d8a3 VADDPD %ZMM13,%ZMM14,%ZMM13 |
(913) 0x4d8a9 VADDPD %ZMM19,%ZMM20,%ZMM19 |
(913) 0x4d8af VADDPD %ZMM21,%ZMM22,%ZMM20 |
(913) 0x4d8b5 MOV -0x38(%RBP),%RDX |
(913) 0x4d8b9 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(913) 0x4d8bf VADDPD %ZMM11,%ZMM13,%ZMM11 |
(913) 0x4d8c5 VADDPD %ZMM19,%ZMM20,%ZMM19 |
(913) 0x4d8cb VEXTRACTF64X4 $0x1,%ZMM15,%YMM16 |
(913) 0x4d8d2 VEXTRACTF64X4 $0x1,%ZMM11,%YMM12 |
(913) 0x4d8d9 VEXTRACTF64X4 $0x1,%ZMM19,%YMM20 |
(913) 0x4d8e0 VADDPD %ZMM16,%ZMM15,%ZMM15 |
(913) 0x4d8e6 VADDPD %ZMM12,%ZMM11,%ZMM11 |
(913) 0x4d8ec VADDPD %ZMM20,%ZMM19,%ZMM19 |
(913) 0x4d8f2 VEXTRACTF32X4 $0x1,%YMM15,%XMM16 |
(913) 0x4d8f9 VEXTRACTF128 $0x1,%YMM11,%XMM12 |
(913) 0x4d8ff VEXTRACTF32X4 $0x1,%YMM19,%XMM20 |
(913) 0x4d906 VADDPD %XMM16,%XMM15,%XMM15 |
(913) 0x4d90c VADDPD %XMM12,%XMM11,%XMM11 |
(913) 0x4d911 VADDPD %XMM20,%XMM19,%XMM19 |
(913) 0x4d917 VPERMILPD $0x1,%XMM15,%XMM16 |
(913) 0x4d91e VPERMILPD $0x1,%XMM11,%XMM12 |
(913) 0x4d924 VPERMILPD $0x1,%XMM19,%XMM20 |
(913) 0x4d92b VADDSD %XMM16,%XMM15,%XMM15 |
(913) 0x4d931 VADDSD %XMM12,%XMM11,%XMM12 |
(913) 0x4d936 VADDSD %XMM20,%XMM19,%XMM19 |
(913) 0x4d93c VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(913) 0x4d941 CMP %R9,%RDX |
(913) 0x4d944 JE 4da60 |
(913) 0x4d94a TEST $0x1c,%R9B |
(913) 0x4d94e JE 4da10 |
(913) 0x4d954 LEA (%RDX,%RDX,2),%R10 |
(913) 0x4d958 VMOVQ %XMM11,%XMM12 |
(913) 0x4d95d VUNPCKHPD %XMM0,%XMM11,%XMM11 |
(913) 0x4d961 VMOVQ %XMM19,%XMM13 |
(913) 0x4d967 LEA (%RBX,%R10,8),%R10 |
(913) 0x4d96b NOPL (%RAX,%RAX,1) |
(917) 0x4d970 VMOVUPD (%R10),%ZMM15 |
(917) 0x4d976 VMOVUPD 0x40(%R10),%YMM16 |
(917) 0x4d97d VMOVUPD (%RSI,%RDX,8),%YMM14 |
(917) 0x4d982 ADD $0x4,%RDX |
(917) 0x4d986 ADD $0x60,%R10 |
(917) 0x4d98a VMOVAPD %ZMM15,%ZMM17 |
(917) 0x4d990 VMOVAPD %ZMM15,%ZMM18 |
(917) 0x4d996 VPERMT2PD %ZMM16,%ZMM1,%ZMM17 |
(917) 0x4d99c VPERMT2PD %ZMM16,%ZMM2,%ZMM18 |
(917) 0x4d9a2 VPERMT2PD %ZMM16,%ZMM3,%ZMM15 |
(917) 0x4d9a8 VFMADD231PD %YMM17,%YMM14,%YMM12 |
(917) 0x4d9ae VFMADD231PD %YMM18,%YMM14,%YMM11 |
(917) 0x4d9b4 VFMADD231PD %YMM15,%YMM14,%YMM13 |
(917) 0x4d9b9 CMP %RDX,%R15 |
(917) 0x4d9bc JNE 4d970 |
(913) 0x4d9be VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(913) 0x4d9c4 VADDPD %YMM14,%YMM13,%YMM13 |
(913) 0x4d9c9 VPERMILPD $0x1,%XMM13,%XMM14 |
(913) 0x4d9cf VADDPD %YMM14,%YMM13,%YMM19 |
(913) 0x4d9d5 VEXTRACTF128 $0x1,%YMM11,%XMM13 |
(913) 0x4d9db VADDPD %XMM13,%XMM11,%XMM11 |
(913) 0x4d9e0 VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(913) 0x4d9e6 VADDPD %XMM13,%XMM12,%XMM12 |
(913) 0x4d9eb VUNPCKHPD %XMM11,%XMM12,%XMM13 |
(913) 0x4d9f0 VUNPCKLPD %XMM11,%XMM12,%XMM11 |
(913) 0x4d9f5 VADDPD %XMM13,%XMM11,%XMM11 |
(913) 0x4d9fa CMP %R9,%R15 |
(913) 0x4d9fd JE 4da60 |
(913) 0x4d9ff VBROADCASTSD %XMM19,%YMM12 |
(913) 0x4da05 MOV %R15,%RDX |
(913) 0x4da08 VBLENDPD $0xc,%YMM12,%YMM11,%YMM11 |
(913) 0x4da0e JMP 4da30 |
(913) 0x4da10 VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(913) 0x4da15 VINSERTF32X4 $0x1,%XMM19,%YMM11,%YMM11 |
(913) 0x4da1c VPERMILPD $0x2,%YMM11,%YMM11 |
(913) 0x4da22 NOPW %CS:(%RAX,%RAX,1) |
(913) 0x4da30 LEA (%RDX,%RDX,2),%R10 |
(913) 0x4da34 LEA (%RBX,%R10,8),%R10 |
(913) 0x4da38 NOPL (%RAX,%RAX,1) |
(916) 0x4da40 VPERMILPD $0x2,(%R10),%YMM12 |
(916) 0x4da46 ADD $0x18,%R10 |
(916) 0x4da4a VFMADD231PD (%RSI,%RDX,8){1to4},%YMM12,%YMM11 |
(916) 0x4da51 INC %RDX |
(916) 0x4da54 CMP %RDX,%R9 |
(916) 0x4da57 JNE 4da40 |
(913) 0x4da59 VEXTRACTF32X4 $0x1,%YMM11,%XMM19 |
(913) 0x4da60 MOV -0x30(%RBP),%R10 |
(913) 0x4da64 JMP 4d4d0 |
0x4da69 INT $0x3 |
0x4da6a INT $0x3 |
0x4da6b INT $0x3 |
0x4da6c INT $0x3 |
0x4da6d INT $0x3 |
0x4da6e INT $0x3 |
0x4da6f INT $0x3 |
Path / |
Source file and lines | DiracDeterminantRef.cpp:156-181 |
Module | libqmcwfs.so |
nb instructions | 113 |
nb uops | 108 |
loop length | 528 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 18.00 cycles |
front end | 18.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 6.50 | 6.50 | 4.00 | 17.67 | 17.67 | 17.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 6.50 | 6.50 | 6.50 | 6.50 | 4.00 | 19.67 | 19.67 | 19.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.00 |
Dispatch | 19.67 |
Overall L1 | 19.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 62% |
load | 68% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 37% |
load | 52% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 12% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 42% |
load | 53% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 43% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMPL $0,0xc(%RDI) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JNE 4d343 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x73> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 4e900 <@plt_start@+0x250> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x470(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x478(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x47c(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x90(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x110(%R14),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x150(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALLQ 0x28(%R10) | 4 | 0.50 | 0 | 0 | 0 | 0.50 | 0.67 | 0.67 | 0.67 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.20 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 4e910 <@plt_start@+0x260> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x484(%R14),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 4d3c2 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xf2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMP $0x1,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 4d3d4 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x104> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%R13),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x118(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x44fe3(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x4506b(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x45053(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x43e3d(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43e07(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43dd1(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43d9b(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43d65(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43d2f(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x480(%R14),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x158(%R14),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD 0x478(%R14),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x100(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R9D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RDI),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x8,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%RSI),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JMP 4d53b <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x26b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | DiracDeterminantRef.cpp:156-181 |
Module | libqmcwfs.so |
nb instructions | 113 |
nb uops | 108 |
loop length | 528 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 18.00 cycles |
front end | 18.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 6.50 | 6.50 | 4.00 | 17.67 | 17.67 | 17.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 6.50 | 6.50 | 6.50 | 6.50 | 4.00 | 19.67 | 19.67 | 19.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.00 |
Dispatch | 19.67 |
Overall L1 | 19.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 62% |
load | 68% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 37% |
load | 52% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 12% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 42% |
load | 53% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 43% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMPL $0,0xc(%RDI) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JNE 4d343 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x73> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 4e900 <@plt_start@+0x250> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x470(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x478(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x47c(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x90(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x110(%R14),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x150(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALLQ 0x28(%R10) | 4 | 0.50 | 0 | 0 | 0 | 0.50 | 0.67 | 0.67 | 0.67 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.20 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 4e910 <@plt_start@+0x260> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x484(%R14),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 4d3c2 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xf2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMP $0x1,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 4d3d4 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x104> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%R13),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x118(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x44fe3(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x4506b(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x45053(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x43e3d(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43e07(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43dd1(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43d9b(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43d65(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43d2f(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x480(%R14),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x158(%R14),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD 0x478(%R14),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x100(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R9D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RDI),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x8,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%RSI),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JMP 4d53b <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x26b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::DiracDeterminantRef | 0.53 | 1 |
▼Loop 913 - OhmmsVector.h:223-223 - libqmcwfs.so– | 0 | 0.01 |
○Loop 915 - inner_product.hpp:155-155 - libqmcwfs.so | 0.33 | 0.6 |
○Loop 914 - inner_product.hpp:82-83 - libqmcwfs.so | 0.21 | 0.38 |
○Loop 916 - inner_product.hpp:155-155 - libqmcwfs.so | 0 | 0 |
○Loop 918 - inner_product.hpp:82-83 - libqmcwfs.so | 0 | 0 |
○Loop 919 - inner_product.hpp:82-83 - libqmcwfs.so | 0 | 0 |
○Loop 917 - inner_product.hpp:155-155 - libqmcwfs.so | 0 | 0 |