Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:152-181 [...] | Coverage: 0.73% |
---|
Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:152-181 [...] | Coverage: 0.73% |
---|
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 183 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
[...] |
181: inline typename BinaryReturn<T1, T2, OpAssign>::Type_t operator()(const T1& a, const T2& b) const |
182: { |
183: return (const_cast<T1&>(a) = b); |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 130 - 131 |
-------------------------------------------------------------------------------- |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
/usr/include/c++/13.1.1/bits/unique_ptr.h: 199 - 199 |
-------------------------------------------------------------------------------- |
199: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 81 - 154 |
-------------------------------------------------------------------------------- |
81: for (int i = 0; i < n; i++) |
82: res += a[i] * b[i]; |
[...] |
154: for (int i = 0; i < n; i++) |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 152 - 181 |
-------------------------------------------------------------------------------- |
152: void DiracDeterminantRef<DU_TYPE>::evaluateGL(ParticleSet& P, |
153: ParticleSet::ParticleGradient_t& G, |
154: ParticleSet::ParticleLaplacian_t& L, |
155: bool fromscratch) |
156: { |
157: if (UpdateMode == ORB_PBYP_RATIO) |
158: { //need to compute dpsiM and d2psiM. Do not touch psiM! |
159: SPOVGLTimer->start(); |
160: Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); |
161: SPOVGLTimer->stop(); |
162: } |
163: |
164: if (NumPtcls == 1) |
165: { |
166: ValueType y = psiM(0, 0); |
167: GradType rv = y * dpsiM(0, 0); |
168: G[FirstIndex] += rv; |
169: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
170: } |
171: else |
172: { |
173: for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) |
174: { |
175: mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
176: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
177: G[iat] += rv; |
178: L[iat] += dot_temp - dot(rv, rv); |
179: } |
180: } |
181: } |
0x58760 PUSH %RBP |
0x58761 MOV %RSP,%RBP |
0x58764 PUSH %R15 |
0x58766 PUSH %R14 |
0x58768 PUSH %R13 |
0x5876a MOV %RCX,%R13 |
0x5876d PUSH %R12 |
0x5876f MOV %RDX,%R12 |
0x58772 PUSH %RBX |
0x58773 MOV %RDI,%RBX |
0x58776 SUB $0x38,%RSP |
0x5877a MOV %FS:0x28,%RAX |
0x58783 MOV %RAX,-0x38(%RBP) |
0x58787 XOR %EAX,%EAX |
0x58789 MOV 0xc(%RDI),%EDI |
0x5878c TEST %EDI,%EDI |
0x5878e JE 58bd5 |
0x58794 MOV 0x484(%RBX),%EDX |
0x5879a MOVSXD 0x478(%RBX),%R11 |
0x587a1 CMP $0x1,%EDX |
0x587a4 JE 58b5c |
0x587aa MOVSXD %EDX,%R15 |
0x587ad TEST %EDX,%EDX |
0x587af JE 58b12 |
0x587b5 MOV 0x158(%RBX),%RAX |
0x587bc MOV 0x18(%R12),%RSI |
0x587c1 LEA (%R11,%R11,2),%RCX |
0x587c5 XOR %R8D,%R8D |
0x587c8 MOVSXD 0x480(%RBX),%R10 |
0x587cf MOV 0xd8(%RBX),%R9 |
0x587d6 VXORPD %XMM4,%XMM4,%XMM4 |
0x587da SAL $0x3,%RAX |
0x587de LEA (%RSI,%RCX,8),%RDI |
0x587e2 MOV 0x18(%R13),%R13 |
0x587e6 MOV 0x100(%RBX),%RCX |
0x587ed MOV %RAX,-0x60(%RBP) |
0x587f1 MOV %R10,%R14 |
0x587f4 MOV 0x180(%RBX),%RSI |
0x587fb SAL $0x3,%R10 |
0x587ff LEA (,%R9,8),%R12 |
0x58807 LEA (%R13,%R11,8),%R13 |
0x5880c MOV %R14D,%R11D |
0x5880f LEA (%RCX,%R10,1),%R9 |
0x58813 NOPL (%RAX,%RAX,1) |
(608) 0x58818 TEST %R11D,%R11D |
(608) 0x5881b JLE 58b41 |
(608) 0x58821 LEA -0x8(%R10),%RDX |
(608) 0x58825 XOR %R14D,%R14D |
(608) 0x58828 VXORPD %XMM1,%XMM1,%XMM1 |
(608) 0x5882c SHR $0x3,%RDX |
(608) 0x58830 INC %RDX |
(608) 0x58833 AND $0x7,%EDX |
(608) 0x58836 JE 588d4 |
(608) 0x5883c CMP $0x1,%RDX |
(608) 0x58840 JE 588bf |
(608) 0x58842 CMP $0x2,%RDX |
(608) 0x58846 JE 588af |
(608) 0x58848 CMP $0x3,%RDX |
(608) 0x5884c JE 5889f |
(608) 0x5884e CMP $0x4,%RDX |
(608) 0x58852 JE 5888f |
(608) 0x58854 CMP $0x5,%RDX |
(608) 0x58858 JE 5887f |
(608) 0x5885a CMP $0x6,%RDX |
(608) 0x5885e JE 5886f |
(608) 0x58860 VMOVSD (%RCX),%XMM7 |
(608) 0x58864 MOV $0x8,%R14D |
(608) 0x5886a VFMADD231SD (%RSI),%XMM7,%XMM1 |
(608) 0x5886f VMOVSD (%RCX,%R14,1),%XMM6 |
(608) 0x58875 VFMADD231SD (%RSI,%R14,1),%XMM6,%XMM1 |
(608) 0x5887b ADD $0x8,%R14 |
(608) 0x5887f VMOVSD (%RCX,%R14,1),%XMM5 |
(608) 0x58885 VFMADD231SD (%RSI,%R14,1),%XMM5,%XMM1 |
(608) 0x5888b ADD $0x8,%R14 |
(608) 0x5888f VMOVSD (%RCX,%R14,1),%XMM0 |
(608) 0x58895 VFMADD231SD (%RSI,%R14,1),%XMM0,%XMM1 |
(608) 0x5889b ADD $0x8,%R14 |
(608) 0x5889f VMOVSD (%RCX,%R14,1),%XMM2 |
(608) 0x588a5 VFMADD231SD (%RSI,%R14,1),%XMM2,%XMM1 |
(608) 0x588ab ADD $0x8,%R14 |
(608) 0x588af VMOVSD (%RCX,%R14,1),%XMM3 |
(608) 0x588b5 VFMADD231SD (%RSI,%R14,1),%XMM3,%XMM1 |
(608) 0x588bb ADD $0x8,%R14 |
(608) 0x588bf VMOVSD (%RCX,%R14,1),%XMM8 |
(608) 0x588c5 VFMADD231SD (%RSI,%R14,1),%XMM8,%XMM1 |
(608) 0x588cb ADD $0x8,%R14 |
(608) 0x588cf CMP %R10,%R14 |
(608) 0x588d2 JE 5894b |
(609) 0x588d4 VMOVSD (%RCX,%R14,1),%XMM9 |
(609) 0x588da VMOVSD 0x8(%RCX,%R14,1),%XMM10 |
(609) 0x588e1 VMOVSD 0x10(%RCX,%R14,1),%XMM11 |
(609) 0x588e8 VMOVSD 0x18(%RCX,%R14,1),%XMM12 |
(609) 0x588ef VFMADD231SD (%RSI,%R14,1),%XMM9,%XMM1 |
(609) 0x588f5 VMOVSD 0x20(%RCX,%R14,1),%XMM13 |
(609) 0x588fc VMOVSD 0x28(%RCX,%R14,1),%XMM14 |
(609) 0x58903 VMOVSD 0x30(%RCX,%R14,1),%XMM15 |
(609) 0x5890a VMOVSD 0x38(%RCX,%R14,1),%XMM7 |
(609) 0x58911 VFMADD231SD 0x8(%RSI,%R14,1),%XMM10,%XMM1 |
(609) 0x58918 VFMADD231SD 0x10(%RSI,%R14,1),%XMM11,%XMM1 |
(609) 0x5891f VFMADD231SD 0x18(%RSI,%R14,1),%XMM12,%XMM1 |
(609) 0x58926 VFMADD231SD 0x20(%RSI,%R14,1),%XMM13,%XMM1 |
(609) 0x5892d VFMADD231SD 0x28(%RSI,%R14,1),%XMM14,%XMM1 |
(609) 0x58934 VFMADD231SD 0x30(%RSI,%R14,1),%XMM15,%XMM1 |
(609) 0x5893b VFMADD231SD 0x38(%RSI,%R14,1),%XMM7,%XMM1 |
(609) 0x58942 ADD $0x40,%R14 |
(609) 0x58946 CMP %R10,%R14 |
(609) 0x58949 JNE 588d4 |
(608) 0x5894b MOV 0x118(%RBX),%RAX |
(608) 0x58952 MOV 0x140(%RBX),%RDX |
(608) 0x58959 VMOVAPD %XMM4,-0x50(%RBP) |
(608) 0x5895e VXORPD %XMM0,%XMM0,%XMM0 |
(608) 0x58962 IMUL %R8,%RAX |
(608) 0x58966 LEA (%RAX,%RAX,2),%R14 |
(608) 0x5896a LEA (%RDX,%R14,8),%RAX |
(608) 0x5896e MOV %R9,%R14 |
(608) 0x58971 MOV %RCX,%RDX |
(608) 0x58974 SUB %RCX,%R14 |
(608) 0x58977 SUB $0x8,%R14 |
(608) 0x5897b SHR $0x3,%R14 |
(608) 0x5897f AND $0x3,%R14D |
(608) 0x58983 JE 58b38 |
(608) 0x58989 VMOVAPD -0x50(%RBP),%XMM5 |
(608) 0x5898e VMOVDDUP (%RCX),%XMM6 |
(608) 0x58992 VMOVSD (%RCX),%XMM2 |
(608) 0x58996 LEA 0x8(%RCX),%RDX |
(608) 0x5899a ADD $0x18,%RAX |
(608) 0x5899e VFMADD132PD -0x18(%RAX),%XMM5,%XMM6 |
(608) 0x589a4 VFMADD231SD -0x8(%RAX),%XMM2,%XMM0 |
(608) 0x589aa VMOVAPD %XMM6,-0x50(%RBP) |
(608) 0x589af CMP $0x1,%R14 |
(608) 0x589b3 JE 58b38 |
(608) 0x589b9 CMP $0x2,%R14 |
(608) 0x589bd JE 589e5 |
(608) 0x589bf VMOVAPD -0x50(%RBP),%XMM8 |
(608) 0x589c4 VMOVDDUP (%RDX),%XMM3 |
(608) 0x589c8 VMOVSD (%RDX),%XMM9 |
(608) 0x589cc ADD $0x18,%RAX |
(608) 0x589d0 LEA 0x10(%RCX),%RDX |
(608) 0x589d4 VFMADD132PD -0x18(%RAX),%XMM8,%XMM3 |
(608) 0x589da VFMADD231SD -0x8(%RAX),%XMM9,%XMM0 |
(608) 0x589e0 VMOVAPD %XMM3,-0x50(%RBP) |
(608) 0x589e5 VMOVAPD -0x50(%RBP),%XMM11 |
(608) 0x589ea VMOVDDUP (%RDX),%XMM10 |
(608) 0x589ee VMOVSD (%RDX),%XMM12 |
(608) 0x589f2 MOV %R11D,-0x58(%RBP) |
(608) 0x589f6 ADD $0x8,%RDX |
(608) 0x589fa ADD $0x18,%RAX |
(608) 0x589fe VFMADD132PD -0x18(%RAX),%XMM11,%XMM10 |
(608) 0x58a04 VFMADD231SD -0x8(%RAX),%XMM12,%XMM0 |
(608) 0x58a0a VMOVAPD %XMM10,-0x50(%RBP) |
(608) 0x58a0f JMP 58a6c |
0x58a11 NOPL (%RAX) |
(610) 0x58a18 VMOVAPD -0x50(%RBP),%XMM12 |
(610) 0x58a1d VMOVDDUP 0x8(%RDX),%XMM11 |
(610) 0x58a22 VMOVSD 0x8(%RDX),%XMM13 |
(610) 0x58a27 VMOVDDUP 0x10(%RDX),%XMM14 |
(610) 0x58a2c VMOVSD 0x10(%RDX),%XMM15 |
(610) 0x58a31 VMOVDDUP 0x18(%RDX),%XMM7 |
(610) 0x58a36 VMOVSD 0x18(%RDX),%XMM6 |
(610) 0x58a3b ADD $0x60,%RAX |
(610) 0x58a3f VFMADD132PD -0x48(%RAX),%XMM12,%XMM11 |
(610) 0x58a45 VFMADD231SD 0x10(%R14),%XMM13,%XMM0 |
(610) 0x58a4b ADD $0x20,%RDX |
(610) 0x58a4f VFMADD231PD -0x30(%RAX),%XMM14,%XMM11 |
(610) 0x58a55 VFMADD231SD -0x20(%RAX),%XMM15,%XMM0 |
(610) 0x58a5b VFMADD231PD -0x18(%RAX),%XMM7,%XMM11 |
(610) 0x58a61 VFMADD231SD -0x8(%RAX),%XMM6,%XMM0 |
(610) 0x58a67 VMOVAPD %XMM11,-0x50(%RBP) |
(610) 0x58a6c VMOVSD (%RDX),%XMM14 |
(610) 0x58a70 VMOVAPD -0x50(%RBP),%XMM7 |
(610) 0x58a75 LEA 0x8(%RDX),%R11 |
(610) 0x58a79 LEA 0x18(%RAX),%R14 |
(610) 0x58a7d VMOVSD -0x50(%RBP),%XMM6 |
(610) 0x58a82 VMOVSD (%RAX),%XMM13 |
(610) 0x58a86 VMOVDDUP %XMM14,%XMM15 |
(610) 0x58a8b VFMADD231SD 0x10(%RAX),%XMM14,%XMM0 |
(610) 0x58a91 VFMADD132PD (%RAX),%XMM7,%XMM15 |
(610) 0x58a96 VMOVAPD %XMM15,-0x50(%RBP) |
(610) 0x58a9b CMP %R11,%R9 |
(610) 0x58a9e JNE 58a18 |
(608) 0x58aa4 VMOVSD %XMM0,-0x40(%RBP) |
(608) 0x58aa9 VMULSD %XMM0,%XMM0,%XMM0 |
(608) 0x58aad VUNPCKHPD %XMM15,%XMM15,%XMM5 |
(608) 0x58ab2 MOV -0x58(%RBP),%R11D |
(608) 0x58ab6 VFMADD132SD %XMM13,%XMM6,%XMM14 |
(608) 0x58abb VFMADD132SD %XMM5,%XMM0,%XMM5 |
(608) 0x58ac0 VFMADD132SD %XMM14,%XMM5,%XMM14 |
(608) 0x58ac5 VMOVUPD (%RDI),%XMM2 |
(608) 0x58ac9 VMOVSD 0x10(%RDI),%XMM8 |
(608) 0x58ace ADD %R12,%RCX |
(608) 0x58ad1 ADD $0x18,%RDI |
(608) 0x58ad5 MOV -0x60(%RBP),%RAX |
(608) 0x58ad9 ADD %R12,%R9 |
(608) 0x58adc VADDPD -0x50(%RBP),%XMM2,%XMM3 |
(608) 0x58ae1 VADDSD -0x40(%RBP),%XMM8,%XMM9 |
(608) 0x58ae6 ADD %RAX,%RSI |
(608) 0x58ae9 VMOVUPD %XMM3,-0x18(%RDI) |
(608) 0x58aee VMOVSD %XMM9,-0x8(%RDI) |
(608) 0x58af3 VADDSD (%R13,%R8,8),%XMM1,%XMM1 |
(608) 0x58afa VSUBSD %XMM14,%XMM1,%XMM10 |
(608) 0x58aff VMOVSD %XMM10,(%R13,%R8,8) |
(608) 0x58b06 INC %R8 |
(608) 0x58b09 CMP %R15,%R8 |
(608) 0x58b0c JNE 58818 |
0x58b12 MOV -0x38(%RBP),%RAX |
0x58b16 SUB %FS:0x28,%RAX |
0x58b1f JNE 58c30 |
0x58b25 LEA -0x28(%RBP),%RSP |
0x58b29 POP %RBX |
0x58b2a POP %R12 |
0x58b2c POP %R13 |
0x58b2e POP %R14 |
0x58b30 POP %R15 |
0x58b32 POP %RBP |
0x58b33 RET |
0x58b34 NOPL (%RAX) |
(608) 0x58b38 MOV %R11D,-0x58(%RBP) |
(608) 0x58b3c JMP 58a6c |
(608) 0x58b41 VXORPD %XMM14,%XMM14,%XMM14 |
(608) 0x58b46 MOVQ $0,-0x40(%RBP) |
(608) 0x58b4e VMOVAPD %XMM4,-0x50(%RBP) |
(608) 0x58b53 VMOVSD %XMM14,%XMM14,%XMM1 |
(608) 0x58b57 JMP 58ac5 |
0x58b5c MOV 0x100(%RBX),%R15 |
0x58b63 MOV 0x140(%RBX),%R10 |
0x58b6a LEA (%R11,%R11,2),%RDI |
0x58b6e MOV 0x18(%R12),%R12 |
0x58b73 MOV 0x18(%R13),%R9 |
0x58b77 VMOVSD (%R15),%XMM5 |
0x58b7c MOV 0x180(%RBX),%RBX |
0x58b83 LEA (%R12,%RDI,8),%RCX |
0x58b87 LEA (%R9,%R11,8),%RSI |
0x58b8b VMULSD 0x10(%R10),%XMM5,%XMM0 |
0x58b91 VMOVDDUP %XMM5,%XMM4 |
0x58b95 VMULPD (%R10),%XMM4,%XMM2 |
0x58b9a VADDSD 0x10(%RCX),%XMM0,%XMM1 |
0x58b9f VUNPCKHPD %XMM2,%XMM2,%XMM8 |
0x58ba3 VADDPD (%RCX),%XMM2,%XMM9 |
0x58ba7 VMOVSD %XMM2,%XMM2,%XMM3 |
0x58bab VMULSD %XMM8,%XMM8,%XMM10 |
0x58bb0 VMOVSD %XMM1,0x10(%RCX) |
0x58bb5 VMOVUPD %XMM9,(%RCX) |
0x58bb9 VFNMADD213SD (%RSI),%XMM0,%XMM0 |
0x58bbe VFMADD132SD %XMM2,%XMM10,%XMM3 |
0x58bc3 VSUBSD %XMM3,%XMM0,%XMM11 |
0x58bc7 VFMADD132SD (%RBX),%XMM11,%XMM5 |
0x58bcc VMOVSD %XMM5,(%RSI) |
0x58bd0 JMP 58b12 |
0x58bd5 MOV 0x468(%RBX),%RDI |
0x58bdc MOV %RSI,-0x58(%RBP) |
0x58be0 CALL 8460 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE5startEv@plt> |
0x58be5 MOV 0x470(%RBX),%RDI |
0x58bec SUB $0x8,%RSP |
0x58bf0 LEA 0x150(%RBX),%R8 |
0x58bf7 MOV 0x47c(%RBX),%ECX |
0x58bfd MOV 0x478(%RBX),%EDX |
0x58c03 LEA 0x110(%RBX),%R9 |
0x58c0a MOV (%RDI),%RAX |
0x58c0d MOV -0x58(%RBP),%RSI |
0x58c11 PUSH %R8 |
0x58c13 LEA 0x90(%RBX),%R8 |
0x58c1a CALLQ 0x28(%RAX) |
0x58c1d POP %RCX |
0x58c1e MOV 0x468(%RBX),%RDI |
0x58c25 POP %RSI |
0x58c26 CALL 8420 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE4stopEv@plt> |
0x58c2b JMP 58794 |
0x58c30 CALL 80d0 <__stack_chk_fail@plt> |
0x58c35 NOP |
0x58c36 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:256 | libqmcwfs.so |
○ | main._omp_fn.1 | stl_vector.h:1123 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | DiracDeterminantRef.cpp:152-181 |
Module | libqmcwfs.so |
nb instructions | 104 |
nb uops | 109 |
loop length | 457 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 27.25 cycles |
front end | 27.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 11.00 | 11.00 | 20.50 | 20.50 | 17.00 | 11.00 | 11.00 | 18.00 |
cycles | 11.00 | 11.00 | 20.50 | 20.50 | 17.00 | 11.00 | 11.00 | 18.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.60 |
Stall cycles | 0.00 |
Front-end | 27.25 |
Dispatch | 20.50 |
Overall L1 | 27.25 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 23% |
load | 28% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 14% |
load | 22% |
store | 16% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 8% |
load | 6% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 15% |
load | 16% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 12% |
load | 13% |
store | 14% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %FS:0x28,%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xc(%RDI),%EDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %EDI,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 58bd5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x484(%RBX),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0x478(%RBX),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 58b5c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %EDX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 58b12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x158(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R11,%R11,2),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVSXD 0x480(%RBX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xd8(%RBX),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RSI,%RCX,8),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%R13),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x100(%RBX),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R10,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x180(%RBX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SAL $0x3,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (,%R9,8),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R13,%R11,8),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%RCX,%R10,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB %FS:0x28,%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 58c30 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x100(%RBX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RBX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R11,%R11,2),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%R12),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%R13),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%R15),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x180(%RBX),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R12,%RDI,8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R9,%R11,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD 0x10(%R10),%XMM5,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDDUP %XMM5,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMULPD (%R10),%XMM4,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VUNPCKHPD %XMM2,%XMM2,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPD (%RCX),%XMM2,%XMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM2,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMULSD %XMM8,%XMM8,%XMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM1,0x10(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM9,(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VFNMADD213SD (%RSI),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM2,%XMM10,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM3,%XMM0,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD (%RBX),%XMM11,%XMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM5,(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 58b12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV 0x468(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 8460 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE5startEv@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x470(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x150(%RBX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x47c(%RBX),%ECX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x478(%RBX),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x110(%RBX),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x58(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R8 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x90(%RBX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALLQ 0x28(%RAX) | 3 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 1 | 0.33 | 0 | 4 |
POP %RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOV 0x468(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
POP %RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
CALL 8420 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE4stopEv@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
JMP 58794 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CALL 80d0 <__stack_chk_fail@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | DiracDeterminantRef.cpp:152-181 |
Module | libqmcwfs.so |
nb instructions | 104 |
nb uops | 109 |
loop length | 457 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 27.25 cycles |
front end | 27.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 11.00 | 11.00 | 20.50 | 20.50 | 17.00 | 11.00 | 11.00 | 18.00 |
cycles | 11.00 | 11.00 | 20.50 | 20.50 | 17.00 | 11.00 | 11.00 | 18.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.60 |
Stall cycles | 0.00 |
Front-end | 27.25 |
Dispatch | 20.50 |
Overall L1 | 27.25 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 23% |
load | 28% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 14% |
load | 22% |
store | 16% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 8% |
load | 6% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 15% |
load | 16% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 12% |
load | 13% |
store | 14% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %FS:0x28,%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xc(%RDI),%EDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %EDI,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 58bd5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x484(%RBX),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0x478(%RBX),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 58b5c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %EDX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 58b12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x158(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R11,%R11,2),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVSXD 0x480(%RBX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xd8(%RBX),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RSI,%RCX,8),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%R13),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x100(%RBX),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R10,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x180(%RBX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SAL $0x3,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (,%R9,8),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R13,%R11,8),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%RCX,%R10,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB %FS:0x28,%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 58c30 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x100(%RBX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RBX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R11,%R11,2),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%R12),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%R13),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%R15),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x180(%RBX),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R12,%RDI,8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R9,%R11,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD 0x10(%R10),%XMM5,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDDUP %XMM5,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMULPD (%R10),%XMM4,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VUNPCKHPD %XMM2,%XMM2,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPD (%RCX),%XMM2,%XMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM2,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMULSD %XMM8,%XMM8,%XMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM1,0x10(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM9,(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VFNMADD213SD (%RSI),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM2,%XMM10,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM3,%XMM0,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD (%RBX),%XMM11,%XMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM5,(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 58b12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV 0x468(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 8460 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE5startEv@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x470(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x150(%RBX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x47c(%RBX),%ECX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x478(%RBX),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x110(%RBX),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x58(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R8 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x90(%RBX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALLQ 0x28(%RAX) | 3 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 1 | 0.33 | 0 | 4 |
POP %RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOV 0x468(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
POP %RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
CALL 8420 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE4stopEv@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
JMP 58794 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CALL 80d0 <__stack_chk_fail@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::DiracDeterminantRef | 0.73 | 0.01 |
▼Loop 608 - DiracDeterminantRef.cpp:173-181 - libqmcwfs.so– | 0 | 0 |
○Loop 609 - inner_product.hpp:81-82 - libqmcwfs.so | 0.37 | 0 |
○Loop 610 - inner_product.hpp:154-154 - libqmcwfs.so | 0.37 | 0 |