Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGL ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:156-181 [...] | Coverage: 0.48% |
---|
Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGL ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:156-181 [...] | Coverage: 0.48% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 155 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/unique_ptr.h: 173 - 173 |
-------------------------------------------------------------------------------- |
173: pointer _M_ptr() const { return std::get<0>(_M_t); } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 156 - 181 |
-------------------------------------------------------------------------------- |
156: { |
157: if (UpdateMode == ORB_PBYP_RATIO) |
158: { //need to compute dpsiM and d2psiM. Do not touch psiM! |
159: SPOVGLTimer->start(); |
160: Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); |
161: SPOVGLTimer->stop(); |
162: } |
163: |
164: if (NumPtcls == 1) |
165: { |
166: ValueType y = psiM(0, 0); |
167: GradType rv = y * dpsiM(0, 0); |
168: G[FirstIndex] += rv; |
169: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
170: } |
171: else |
172: { |
173: for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) |
174: { |
175: mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
176: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
177: G[iat] += rv; |
178: L[iat] += dot_temp - dot(rv, rv); |
179: } |
180: } |
181: } |
0x4d5c0 PUSH %RBP |
0x4d5c1 MOV %RSP,%RBP |
0x4d5c4 PUSH %R15 |
0x4d5c6 PUSH %R14 |
0x4d5c8 PUSH %R13 |
0x4d5ca PUSH %R12 |
0x4d5cc PUSH %RBX |
0x4d5cd SUB $0x38,%RSP |
0x4d5d1 CMPL $0,0xc(%RDI) |
0x4d5d5 MOV %RCX,%R13 |
0x4d5d8 MOV %RDX,%R15 |
0x4d5db MOV %RDI,%R14 |
0x4d5de JNE 4d633 |
0x4d5e0 MOV 0x468(%R14),%RDI |
0x4d5e7 MOV %RSI,%R12 |
0x4d5ea CALL 4ebf0 <@plt_start@+0x250> |
0x4d5ef MOV 0x470(%R14),%RDI |
0x4d5f6 MOV 0x478(%R14),%EDX |
0x4d5fd MOV 0x47c(%R14),%ECX |
0x4d604 LEA 0x90(%R14),%R8 |
0x4d60b LEA 0x110(%R14),%R9 |
0x4d612 LEA 0x150(%R14),%RAX |
0x4d619 MOV %R12,%RSI |
0x4d61c MOV (%RDI),%R10 |
0x4d61f MOV %RAX,(%RSP) |
0x4d623 CALLQ 0x28(%R10) |
0x4d627 MOV 0x468(%R14),%RDI |
0x4d62e CALL 4ec00 <@plt_start@+0x260> |
0x4d633 MOVSXD 0x484(%R14),%R10 |
0x4d63a TEST %R10,%R10 |
0x4d63d JE 4d6b2 |
0x4d63f CMP $0x1,%R10D |
0x4d643 JNE 4d6c4 |
0x4d645 MOV 0x100(%R14),%RAX |
0x4d64c MOV 0x140(%R14),%RCX |
0x4d653 MOV 0x18(%R15),%RDX |
0x4d657 VMOVDDUP (%RAX),%XMM0 |
0x4d65b MOVSXD 0x478(%R14),%RAX |
0x4d662 VMULPD (%RCX),%XMM0,%XMM2 |
0x4d666 VMULSD 0x10(%RCX),%XMM0,%XMM1 |
0x4d66b LEA (%RAX,%RAX,2),%RSI |
0x4d66f VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 |
0x4d674 VMULPD %XMM2,%XMM2,%XMM2 |
0x4d678 VMOVUPD %XMM3,(%RDX,%RSI,8) |
0x4d67d VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 |
0x4d683 VMOVSD %XMM3,0x10(%RDX,%RSI,8) |
0x4d689 VPERMILPD $0x1,%XMM2,%XMM3 |
0x4d68f MOV 0x180(%R14),%RCX |
0x4d696 VADDSD %XMM2,%XMM3,%XMM2 |
0x4d69a MOV 0x18(%R13),%RDX |
0x4d69e VFMADD231SD %XMM1,%XMM1,%XMM2 |
0x4d6a3 VFMSUB231SD (%RCX),%XMM0,%XMM2 |
0x4d6a8 VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 |
0x4d6ad VMOVSD %XMM0,(%RDX,%RAX,8) |
0x4d6b2 ADD $0x38,%RSP |
0x4d6b6 POP %RBX |
0x4d6b7 POP %R12 |
0x4d6b9 POP %R13 |
0x4d6bb POP %R14 |
0x4d6bd POP %R15 |
0x4d6bf POP %RBP |
0x4d6c0 VZEROUPPER |
0x4d6c3 RET |
0x4d6c4 MOV 0x118(%R14),%RAX |
0x4d6cb VMOVAPD -0x451d3(%RIP),%YMM1 |
0x4d6d3 VMOVAPD -0x4525b(%RIP),%YMM2 |
0x4d6db VMOVAPD -0x45243(%RIP),%YMM3 |
0x4d6e3 VMOVAPD -0x4402d(%RIP),%ZMM4 |
0x4d6ed VMOVAPD -0x43ff7(%RIP),%ZMM5 |
0x4d6f7 VMOVAPD -0x43fc1(%RIP),%ZMM6 |
0x4d701 VMOVAPD -0x43f8b(%RIP),%ZMM7 |
0x4d70b VMOVAPD -0x43f55(%RIP),%ZMM8 |
0x4d715 VMOVAPD -0x43f1f(%RIP),%ZMM9 |
0x4d71f MOV 0x480(%R14),%R9D |
0x4d726 MOV 0xd8(%R14),%RDX |
0x4d72d MOV 0x158(%R14),%R8 |
0x4d734 MOVSXD 0x478(%R14),%RCX |
0x4d73b MOV 0x100(%R14),%RSI |
0x4d742 MOV 0x180(%R14),%RDI |
0x4d749 MOV 0x140(%R14),%RBX |
0x4d750 MOV 0x18(%R13),%R14 |
0x4d754 MOV 0x18(%R15),%R11 |
0x4d758 VXORPD %XMM0,%XMM0,%XMM0 |
0x4d75c MOV %R10,-0x30(%RBP) |
0x4d760 SAL $0x3,%RAX |
0x4d764 MOV %R9,%R12 |
0x4d767 SHR $0x5,%R12 |
0x4d76b MOV %R9D,%R15D |
0x4d76e MOV %R14,-0x48(%RBP) |
0x4d772 MOV %R9D,%R14D |
0x4d775 AND $-0x20,%R14D |
0x4d779 SAL $0x3,%RDX |
0x4d77d AND $-0x4,%R15D |
0x4d781 SAL $0x3,%R8 |
0x4d785 LEA 0xc0(%RDI),%R13 |
0x4d78c LEA (%RAX,%RAX,2),%RAX |
0x4d790 SAL $0x8,%R12 |
0x4d794 MOV %R14,-0x38(%RBP) |
0x4d798 MOV %RDX,-0x50(%RBP) |
0x4d79c XOR %R14D,%R14D |
0x4d79f MOV %RAX,-0x40(%RBP) |
0x4d7a3 LEA 0xc0(%RSI),%RAX |
0x4d7aa JMP 4d82b |
0x4d7ac NOPL (%RAX) |
(913) 0x4d7b0 VXORPD %XMM19,%XMM19,%XMM19 |
(913) 0x4d7b6 VXORPD %XMM10,%XMM10,%XMM10 |
(913) 0x4d7bb VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d7c0 LEA (%RCX,%RCX,2),%RDX |
(913) 0x4d7c4 ADD -0x40(%RBP),%RBX |
(913) 0x4d7c8 INC %R14 |
(913) 0x4d7cb ADD %R8,%R13 |
(913) 0x4d7ce ADD %R8,%RDI |
(913) 0x4d7d1 VADDPD (%R11,%RDX,8),%XMM11,%XMM12 |
(913) 0x4d7d7 VMULPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d7dc VMOVUPD %XMM12,(%R11,%RDX,8) |
(913) 0x4d7e2 VADDSD 0x10(%R11,%RDX,8),%XMM19,%XMM12 |
(913) 0x4d7ea VMOVSD %XMM12,0x10(%R11,%RDX,8) |
(913) 0x4d7f1 VPERMILPD $0x1,%XMM11,%XMM12 |
(913) 0x4d7f7 MOV -0x48(%RBP),%RDX |
(913) 0x4d7fb VADDSD %XMM11,%XMM12,%XMM11 |
(913) 0x4d800 VFNMSUB231SD %XMM19,%XMM19,%XMM11 |
(913) 0x4d806 VADDSD %XMM11,%XMM10,%XMM10 |
(913) 0x4d80b VADDSD (%RDX,%RCX,8),%XMM10,%XMM10 |
(913) 0x4d810 VMOVSD %XMM10,(%RDX,%RCX,8) |
(913) 0x4d815 MOV -0x50(%RBP),%RDX |
(913) 0x4d819 INC %RCX |
(913) 0x4d81c ADD %RDX,%RAX |
(913) 0x4d81f ADD %RDX,%RSI |
(913) 0x4d822 CMP %R10,%R14 |
(913) 0x4d825 JE 4d6b2 |
(913) 0x4d82b TEST %R9D,%R9D |
(913) 0x4d82e JLE 4d7b0 |
(913) 0x4d830 CMP $0x4,%R9D |
(913) 0x4d834 JAE 4d850 |
(913) 0x4d836 VXORPD %XMM10,%XMM10,%XMM10 |
(913) 0x4d83b XOR %EDX,%EDX |
(913) 0x4d83d JMP 4d960 |
0x4d842 NOPW %CS:(%RAX,%RAX,1) |
(913) 0x4d850 CMP $0x20,%R9D |
(913) 0x4d854 JAE 4d863 |
(913) 0x4d856 VXORPD %XMM10,%XMM10,%XMM10 |
(913) 0x4d85b XOR %R10D,%R10D |
(913) 0x4d85e JMP 4d912 |
(913) 0x4d863 VXORPD %XMM10,%XMM10,%XMM10 |
(913) 0x4d868 VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d86d VXORPD %XMM12,%XMM12,%XMM12 |
(913) 0x4d872 VXORPD %XMM13,%XMM13,%XMM13 |
(913) 0x4d877 XOR %EDX,%EDX |
(913) 0x4d879 NOPL (%RAX) |
(914) 0x4d880 VMOVUPD -0xc0(%R13,%RDX,1),%ZMM14 |
(914) 0x4d888 VMOVUPD -0x80(%R13,%RDX,1),%ZMM15 |
(914) 0x4d890 VMOVUPD -0x40(%R13,%RDX,1),%ZMM16 |
(914) 0x4d898 VMOVUPD (%R13,%RDX,1),%ZMM17 |
(914) 0x4d8a0 VFMADD231PD -0xc0(%RAX,%RDX,1),%ZMM14,%ZMM10 |
(914) 0x4d8a8 VFMADD231PD -0x80(%RAX,%RDX,1),%ZMM15,%ZMM11 |
(914) 0x4d8b0 VFMADD231PD -0x40(%RAX,%RDX,1),%ZMM16,%ZMM12 |
(914) 0x4d8b8 VFMADD231PD (%RAX,%RDX,1),%ZMM17,%ZMM13 |
(914) 0x4d8bf ADD $0x100,%RDX |
(914) 0x4d8c6 CMP %RDX,%R12 |
(914) 0x4d8c9 JNE 4d880 |
(913) 0x4d8cb VADDPD %ZMM10,%ZMM11,%ZMM10 |
(913) 0x4d8d1 VADDPD %ZMM12,%ZMM13,%ZMM12 |
(913) 0x4d8d7 MOV -0x38(%RBP),%RDX |
(913) 0x4d8db VADDPD %ZMM10,%ZMM12,%ZMM10 |
(913) 0x4d8e1 VEXTRACTF64X4 $0x1,%ZMM10,%YMM11 |
(913) 0x4d8e8 VADDPD %ZMM11,%ZMM10,%ZMM10 |
(913) 0x4d8ee VEXTRACTF128 $0x1,%YMM10,%XMM11 |
(913) 0x4d8f4 VADDPD %XMM11,%XMM10,%XMM10 |
(913) 0x4d8f9 VPERMILPD $0x1,%XMM10,%XMM11 |
(913) 0x4d8ff VADDSD %XMM11,%XMM10,%XMM10 |
(913) 0x4d904 CMP %R9,%RDX |
(913) 0x4d907 JE 4d973 |
(913) 0x4d909 MOV %RDX,%R10 |
(913) 0x4d90c TEST $0x1c,%R9B |
(913) 0x4d910 JE 4d960 |
(913) 0x4d912 VMOVQ %XMM10,%XMM10 |
(913) 0x4d917 NOPW (%RAX,%RAX,1) |
(919) 0x4d920 VMOVUPD (%RDI,%R10,8),%YMM11 |
(919) 0x4d926 VFMADD231PD (%RSI,%R10,8),%YMM11,%YMM10 |
(919) 0x4d92c ADD $0x4,%R10 |
(919) 0x4d930 CMP %R10,%R15 |
(919) 0x4d933 JNE 4d920 |
(913) 0x4d935 VEXTRACTF128 $0x1,%YMM10,%XMM11 |
(913) 0x4d93b MOV %R15,%RDX |
(913) 0x4d93e VADDPD %XMM11,%XMM10,%XMM10 |
(913) 0x4d943 VPERMILPD $0x1,%XMM10,%XMM11 |
(913) 0x4d949 VADDSD %XMM11,%XMM10,%XMM10 |
(913) 0x4d94e CMP %R9,%R15 |
(913) 0x4d951 JE 4d973 |
(913) 0x4d953 NOPW %CS:(%RAX,%RAX,1) |
(918) 0x4d960 VMOVSD (%RDI,%RDX,8),%XMM11 |
(918) 0x4d965 VFMADD231SD (%RSI,%RDX,8),%XMM11,%XMM10 |
(918) 0x4d96b INC %RDX |
(918) 0x4d96e CMP %RDX,%R9 |
(918) 0x4d971 JNE 4d960 |
(913) 0x4d973 CMP $0x4,%R9D |
(913) 0x4d977 JAE 4d990 |
(913) 0x4d979 VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d97e XOR %EDX,%EDX |
(913) 0x4d980 JMP 4dd20 |
0x4d985 NOPW %CS:(%RAX,%RAX,1) |
(913) 0x4d990 CMP $0x20,%R9D |
(913) 0x4d994 JAE 4d9a8 |
(913) 0x4d996 VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d99b VXORPD %XMM19,%XMM19,%XMM19 |
(913) 0x4d9a1 XOR %EDX,%EDX |
(913) 0x4d9a3 JMP 4dc44 |
(913) 0x4d9a8 VXORPD %XMM11,%XMM11,%XMM11 |
(913) 0x4d9ad VXORPD %XMM12,%XMM12,%XMM12 |
(913) 0x4d9b2 VXORPD %XMM13,%XMM13,%XMM13 |
(913) 0x4d9b7 VXORPD %XMM14,%XMM14,%XMM14 |
(913) 0x4d9bc VXORPD %XMM15,%XMM15,%XMM15 |
(913) 0x4d9c1 VXORPD %XMM16,%XMM16,%XMM16 |
(913) 0x4d9c7 VXORPD %XMM17,%XMM17,%XMM17 |
(913) 0x4d9cd VXORPD %XMM18,%XMM18,%XMM18 |
(913) 0x4d9d3 VXORPD %XMM19,%XMM19,%XMM19 |
(913) 0x4d9d9 VXORPD %XMM20,%XMM20,%XMM20 |
(913) 0x4d9df VXORPD %XMM21,%XMM21,%XMM21 |
(913) 0x4d9e5 VXORPD %XMM22,%XMM22,%XMM22 |
(913) 0x4d9eb MOV %RBX,%R10 |
(913) 0x4d9ee XOR %EDX,%EDX |
(915) 0x4d9f0 VMOVUPD (%R10),%ZMM24 |
(915) 0x4d9f6 VMOVUPD 0x40(%R10),%ZMM23 |
(915) 0x4d9fd VMOVUPD 0x80(%R10),%ZMM28 |
(915) 0x4da04 VMOVUPD -0xc0(%RAX,%RDX,1),%ZMM27 |
(915) 0x4da0c VMOVUPD -0x80(%RAX,%RDX,1),%ZMM29 |
(915) 0x4da14 VMOVAPD %ZMM24,%ZMM25 |
(915) 0x4da1a VPERMT2PD %ZMM23,%ZMM4,%ZMM25 |
(915) 0x4da20 VMOVAPD %ZMM24,%ZMM26 |
(915) 0x4da26 VPERMT2PD %ZMM23,%ZMM6,%ZMM26 |
(915) 0x4da2c VPERMT2PD %ZMM24,%ZMM8,%ZMM23 |
(915) 0x4da32 VPERMT2PD %ZMM28,%ZMM5,%ZMM25 |
(915) 0x4da38 VPERMT2PD %ZMM28,%ZMM7,%ZMM26 |
(915) 0x4da3e VPERMT2PD %ZMM28,%ZMM9,%ZMM23 |
(915) 0x4da44 VMOVUPD 0xc0(%R10),%ZMM28 |
(915) 0x4da4b VFMADD231PD %ZMM25,%ZMM27,%ZMM11 |
(915) 0x4da51 VMOVUPD 0x100(%R10),%ZMM25 |
(915) 0x4da58 VFMADD231PD %ZMM23,%ZMM27,%ZMM19 |
(915) 0x4da5e VFMADD231PD %ZMM26,%ZMM27,%ZMM15 |
(915) 0x4da64 VMOVUPD 0x140(%R10),%ZMM27 |
(915) 0x4da6b VMOVAPD %ZMM28,%ZMM23 |
(915) 0x4da71 VMOVAPD %ZMM28,%ZMM24 |
(915) 0x4da77 VPERMT2PD %ZMM25,%ZMM4,%ZMM23 |
(915) 0x4da7d VPERMT2PD %ZMM25,%ZMM6,%ZMM24 |
(915) 0x4da83 VPERMT2PD %ZMM28,%ZMM8,%ZMM25 |
(915) 0x4da89 VMOVUPD 0x180(%R10),%ZMM28 |
(915) 0x4da90 VPERMT2PD %ZMM27,%ZMM5,%ZMM23 |
(915) 0x4da96 VPERMT2PD %ZMM27,%ZMM9,%ZMM25 |
(915) 0x4da9c VPERMT2PD %ZMM27,%ZMM7,%ZMM24 |
(915) 0x4daa2 VMOVUPD 0x200(%R10),%ZMM27 |
(915) 0x4daa9 VMOVAPD %ZMM28,%ZMM26 |
(915) 0x4daaf VFMADD231PD %ZMM23,%ZMM29,%ZMM12 |
(915) 0x4dab5 VMOVUPD 0x1c0(%R10),%ZMM23 |
(915) 0x4dabc VFMADD231PD %ZMM25,%ZMM29,%ZMM20 |
(915) 0x4dac2 VMOVAPD %ZMM28,%ZMM25 |
(915) 0x4dac8 VFMADD231PD %ZMM24,%ZMM29,%ZMM16 |
(915) 0x4dace VPERMT2PD %ZMM23,%ZMM4,%ZMM25 |
(915) 0x4dad4 VPERMT2PD %ZMM23,%ZMM6,%ZMM26 |
(915) 0x4dada VPERMT2PD %ZMM28,%ZMM8,%ZMM23 |
(915) 0x4dae0 VMOVUPD -0x40(%RAX,%RDX,1),%ZMM28 |
(915) 0x4dae8 VPERMT2PD %ZMM27,%ZMM5,%ZMM25 |
(915) 0x4daee VPERMT2PD %ZMM27,%ZMM7,%ZMM26 |
(915) 0x4daf4 VPERMT2PD %ZMM27,%ZMM9,%ZMM23 |
(915) 0x4dafa VMOVUPD 0x240(%R10),%ZMM27 |
(915) 0x4db01 VFMADD231PD %ZMM25,%ZMM28,%ZMM13 |
(915) 0x4db07 VMOVUPD 0x280(%R10),%ZMM25 |
(915) 0x4db0e VFMADD231PD %ZMM23,%ZMM28,%ZMM21 |
(915) 0x4db14 VFMADD231PD %ZMM26,%ZMM28,%ZMM17 |
(915) 0x4db1a VMOVUPD 0x2c0(%R10),%ZMM28 |
(915) 0x4db21 ADD $0x300,%R10 |
(915) 0x4db28 VMOVAPD %ZMM27,%ZMM23 |
(915) 0x4db2e VMOVAPD %ZMM27,%ZMM24 |
(915) 0x4db34 VPERMT2PD %ZMM25,%ZMM4,%ZMM23 |
(915) 0x4db3a VPERMT2PD %ZMM25,%ZMM6,%ZMM24 |
(915) 0x4db40 VPERMT2PD %ZMM27,%ZMM8,%ZMM25 |
(915) 0x4db46 VMOVUPD (%RAX,%RDX,1),%ZMM27 |
(915) 0x4db4d ADD $0x100,%RDX |
(915) 0x4db54 VPERMT2PD %ZMM28,%ZMM5,%ZMM23 |
(915) 0x4db5a VPERMT2PD %ZMM28,%ZMM7,%ZMM24 |
(915) 0x4db60 VPERMT2PD %ZMM28,%ZMM9,%ZMM25 |
(915) 0x4db66 VFMADD231PD %ZMM23,%ZMM27,%ZMM14 |
(915) 0x4db6c VFMADD231PD %ZMM24,%ZMM27,%ZMM18 |
(915) 0x4db72 VFMADD231PD %ZMM25,%ZMM27,%ZMM22 |
(915) 0x4db78 CMP %RDX,%R12 |
(915) 0x4db7b JNE 4d9f0 |
(913) 0x4db81 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(913) 0x4db87 VADDPD %ZMM17,%ZMM18,%ZMM16 |
(913) 0x4db8d VADDPD %ZMM11,%ZMM12,%ZMM11 |
(913) 0x4db93 VADDPD %ZMM13,%ZMM14,%ZMM13 |
(913) 0x4db99 VADDPD %ZMM19,%ZMM20,%ZMM19 |
(913) 0x4db9f VADDPD %ZMM21,%ZMM22,%ZMM20 |
(913) 0x4dba5 MOV -0x38(%RBP),%RDX |
(913) 0x4dba9 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(913) 0x4dbaf VADDPD %ZMM11,%ZMM13,%ZMM11 |
(913) 0x4dbb5 VADDPD %ZMM19,%ZMM20,%ZMM19 |
(913) 0x4dbbb VEXTRACTF64X4 $0x1,%ZMM15,%YMM16 |
(913) 0x4dbc2 VEXTRACTF64X4 $0x1,%ZMM11,%YMM12 |
(913) 0x4dbc9 VEXTRACTF64X4 $0x1,%ZMM19,%YMM20 |
(913) 0x4dbd0 VADDPD %ZMM16,%ZMM15,%ZMM15 |
(913) 0x4dbd6 VADDPD %ZMM12,%ZMM11,%ZMM11 |
(913) 0x4dbdc VADDPD %ZMM20,%ZMM19,%ZMM19 |
(913) 0x4dbe2 VEXTRACTF32X4 $0x1,%YMM15,%XMM16 |
(913) 0x4dbe9 VEXTRACTF128 $0x1,%YMM11,%XMM12 |
(913) 0x4dbef VEXTRACTF32X4 $0x1,%YMM19,%XMM20 |
(913) 0x4dbf6 VADDPD %XMM16,%XMM15,%XMM15 |
(913) 0x4dbfc VADDPD %XMM12,%XMM11,%XMM11 |
(913) 0x4dc01 VADDPD %XMM20,%XMM19,%XMM19 |
(913) 0x4dc07 VPERMILPD $0x1,%XMM15,%XMM16 |
(913) 0x4dc0e VPERMILPD $0x1,%XMM11,%XMM12 |
(913) 0x4dc14 VPERMILPD $0x1,%XMM19,%XMM20 |
(913) 0x4dc1b VADDSD %XMM16,%XMM15,%XMM15 |
(913) 0x4dc21 VADDSD %XMM12,%XMM11,%XMM12 |
(913) 0x4dc26 VADDSD %XMM20,%XMM19,%XMM19 |
(913) 0x4dc2c VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(913) 0x4dc31 CMP %R9,%RDX |
(913) 0x4dc34 JE 4dd50 |
(913) 0x4dc3a TEST $0x1c,%R9B |
(913) 0x4dc3e JE 4dd00 |
(913) 0x4dc44 LEA (%RDX,%RDX,2),%R10 |
(913) 0x4dc48 VMOVQ %XMM11,%XMM12 |
(913) 0x4dc4d VUNPCKHPD %XMM0,%XMM11,%XMM11 |
(913) 0x4dc51 VMOVQ %XMM19,%XMM13 |
(913) 0x4dc57 LEA (%RBX,%R10,8),%R10 |
(913) 0x4dc5b NOPL (%RAX,%RAX,1) |
(917) 0x4dc60 VMOVUPD (%R10),%ZMM15 |
(917) 0x4dc66 VMOVUPD 0x40(%R10),%YMM16 |
(917) 0x4dc6d VMOVUPD (%RSI,%RDX,8),%YMM14 |
(917) 0x4dc72 ADD $0x4,%RDX |
(917) 0x4dc76 ADD $0x60,%R10 |
(917) 0x4dc7a VMOVAPD %ZMM15,%ZMM17 |
(917) 0x4dc80 VMOVAPD %ZMM15,%ZMM18 |
(917) 0x4dc86 VPERMT2PD %ZMM16,%ZMM1,%ZMM17 |
(917) 0x4dc8c VPERMT2PD %ZMM16,%ZMM2,%ZMM18 |
(917) 0x4dc92 VPERMT2PD %ZMM16,%ZMM3,%ZMM15 |
(917) 0x4dc98 VFMADD231PD %YMM17,%YMM14,%YMM12 |
(917) 0x4dc9e VFMADD231PD %YMM18,%YMM14,%YMM11 |
(917) 0x4dca4 VFMADD231PD %YMM15,%YMM14,%YMM13 |
(917) 0x4dca9 CMP %RDX,%R15 |
(917) 0x4dcac JNE 4dc60 |
(913) 0x4dcae VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(913) 0x4dcb4 VADDPD %YMM14,%YMM13,%YMM13 |
(913) 0x4dcb9 VPERMILPD $0x1,%XMM13,%XMM14 |
(913) 0x4dcbf VADDPD %YMM14,%YMM13,%YMM19 |
(913) 0x4dcc5 VEXTRACTF128 $0x1,%YMM11,%XMM13 |
(913) 0x4dccb VADDPD %XMM13,%XMM11,%XMM11 |
(913) 0x4dcd0 VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(913) 0x4dcd6 VADDPD %XMM13,%XMM12,%XMM12 |
(913) 0x4dcdb VUNPCKHPD %XMM11,%XMM12,%XMM13 |
(913) 0x4dce0 VUNPCKLPD %XMM11,%XMM12,%XMM11 |
(913) 0x4dce5 VADDPD %XMM13,%XMM11,%XMM11 |
(913) 0x4dcea CMP %R9,%R15 |
(913) 0x4dced JE 4dd50 |
(913) 0x4dcef VBROADCASTSD %XMM19,%YMM12 |
(913) 0x4dcf5 MOV %R15,%RDX |
(913) 0x4dcf8 VBLENDPD $0xc,%YMM12,%YMM11,%YMM11 |
(913) 0x4dcfe JMP 4dd20 |
(913) 0x4dd00 VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(913) 0x4dd05 VINSERTF32X4 $0x1,%XMM19,%YMM11,%YMM11 |
(913) 0x4dd0c VPERMILPD $0x2,%YMM11,%YMM11 |
(913) 0x4dd12 NOPW %CS:(%RAX,%RAX,1) |
(913) 0x4dd20 LEA (%RDX,%RDX,2),%R10 |
(913) 0x4dd24 LEA (%RBX,%R10,8),%R10 |
(913) 0x4dd28 NOPL (%RAX,%RAX,1) |
(916) 0x4dd30 VPERMILPD $0x2,(%R10),%YMM12 |
(916) 0x4dd36 ADD $0x18,%R10 |
(916) 0x4dd3a VFMADD231PD (%RSI,%RDX,8){1to4},%YMM12,%YMM11 |
(916) 0x4dd41 INC %RDX |
(916) 0x4dd44 CMP %RDX,%R9 |
(916) 0x4dd47 JNE 4dd30 |
(913) 0x4dd49 VEXTRACTF32X4 $0x1,%YMM11,%XMM19 |
(913) 0x4dd50 MOV -0x30(%RBP),%R10 |
(913) 0x4dd54 JMP 4d7c0 |
0x4dd59 INT $0x3 |
0x4dd5a INT $0x3 |
0x4dd5b INT $0x3 |
0x4dd5c INT $0x3 |
0x4dd5d INT $0x3 |
0x4dd5e INT $0x3 |
0x4dd5f INT $0x3 |
Path / |
Source file and lines | DiracDeterminantRef.cpp:156-181 |
Module | libqmcwfs.so |
nb instructions | 113 |
nb uops | 108 |
loop length | 528 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 18.00 cycles |
front end | 18.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 6.50 | 6.50 | 4.00 | 17.67 | 17.67 | 17.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 6.50 | 6.50 | 6.50 | 6.50 | 4.00 | 19.67 | 19.67 | 19.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.00 |
Dispatch | 19.67 |
Overall L1 | 19.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 62% |
load | 68% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 37% |
load | 52% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 12% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 42% |
load | 53% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 43% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMPL $0,0xc(%RDI) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JNE 4d633 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x73> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 4ebf0 <@plt_start@+0x250> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x470(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x478(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x47c(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x90(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x110(%R14),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x150(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALLQ 0x28(%R10) | 4 | 0.50 | 0 | 0 | 0 | 0.50 | 0.67 | 0.67 | 0.67 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.20 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 4ec00 <@plt_start@+0x260> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x484(%R14),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 4d6b2 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xf2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMP $0x1,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 4d6c4 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x104> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%R13),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x118(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x451d3(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x4525b(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x45243(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x4402d(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43ff7(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43fc1(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43f8b(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43f55(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43f1f(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x480(%R14),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x158(%R14),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD 0x478(%R14),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x100(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R9D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RDI),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x8,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%RSI),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JMP 4d82b <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x26b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | DiracDeterminantRef.cpp:156-181 |
Module | libqmcwfs.so |
nb instructions | 113 |
nb uops | 108 |
loop length | 528 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 18.00 cycles |
front end | 18.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 6.50 | 6.50 | 4.00 | 17.67 | 17.67 | 17.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 6.50 | 6.50 | 6.50 | 6.50 | 4.00 | 19.67 | 19.67 | 19.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.00 |
Dispatch | 19.67 |
Overall L1 | 19.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 62% |
load | 68% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 37% |
load | 52% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 12% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 42% |
load | 53% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 43% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMPL $0,0xc(%RDI) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JNE 4d633 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x73> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 4ebf0 <@plt_start@+0x250> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x470(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x478(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x47c(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x90(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x110(%R14),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x150(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALLQ 0x28(%R10) | 4 | 0.50 | 0 | 0 | 0 | 0.50 | 0.67 | 0.67 | 0.67 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.20 |
MOV 0x468(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 4ec00 <@plt_start@+0x260> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x484(%R14),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 4d6b2 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xf2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMP $0x1,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 4d6c4 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x104> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%R13),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x118(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x451d3(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x4525b(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x45243(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x4402d(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43ff7(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43fc1(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43f8b(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43f55(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x43f1f(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x480(%R14),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x158(%R14),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD 0x478(%R14),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x100(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%R14),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R9D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RDI),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x8,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%RSI),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JMP 4d82b <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x26b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb– | 0.48 | 0.99 |
▼Loop 913 - OhmmsVector.h:223-223 - libqmcwfs.so– | 0 | 0 |
○Loop 915 - inner_product.hpp:155-155 - libqmcwfs.so | 0.31 | 0.57 |
○Loop 914 - inner_product.hpp:82-83 - libqmcwfs.so | 0.18 | 0.33 |
○Loop 916 - inner_product.hpp:155-155 - libqmcwfs.so | 0 | 0 |
○Loop 918 - inner_product.hpp:82-83 - libqmcwfs.so | 0 | 0 |
○Loop 919 - inner_product.hpp:82-83 - libqmcwfs.so | 0 | 0 |
○Loop 917 - inner_product.hpp:155-155 - libqmcwfs.so | 0 | 0 |