Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: exec | Source: DiracDeterminantRef.cpp:231-273 [...] | Coverage: 0.11% |
---|
Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: exec | Source: DiracDeterminantRef.cpp:231-273 [...] | Coverage: 0.11% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 130 - 131 |
-------------------------------------------------------------------------------- |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 217 - 217 |
-------------------------------------------------------------------------------- |
217: inline Type_t* operator[](size_type i) { return X.data() + i * D2; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 183 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
[...] |
183: return (const_cast<T1&>(a) = b); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 56 - 248 |
-------------------------------------------------------------------------------- |
56: explicit inline Vector(T* ref, size_t n) : nLocal(n), X(ref) {} |
[...] |
144: virtual ~Vector() { free(); } |
[...] |
210: if (nAllocated) |
[...] |
221: inline Type_t& operator[](size_t i) |
222: { |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/SPOSet.h: 106 - 112 |
-------------------------------------------------------------------------------- |
106: for (int iat = first, i = 0; iat < last; ++iat, ++i) |
107: { |
108: ValueVector_t v(logdet[i], OrbitalSetSize); |
109: GradVector_t g(dlogdet[i], OrbitalSetSize); |
110: ValueVector_t l(d2logdet[i], OrbitalSetSize); |
111: evaluate(P, iat, v, g, l); |
112: } |
/usr/include/c++/13.1.1/bits/new_allocator.h: 168 - 168 |
-------------------------------------------------------------------------------- |
168: _GLIBCXX_OPERATOR_DELETE(_GLIBCXX_SIZED_DEALLOC(__p, __n)); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 81 - 154 |
-------------------------------------------------------------------------------- |
81: for (int i = 0; i < n; i++) |
82: res += a[i] * b[i]; |
[...] |
154: for (int i = 0; i < n; i++) |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 231 - 273 |
-------------------------------------------------------------------------------- |
231: typename DiracDeterminantRef<DU_TYPE>::RealType DiracDeterminantRef<DU_TYPE>::evaluateLog( |
[...] |
238: if (NumPtcls == 1) |
239: { |
240: ValueType y = psiM(0, 0); |
241: GradType rv = y * dpsiM(0, 0); |
242: G[FirstIndex] += rv; |
243: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
244: } |
245: else |
246: { |
247: for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++) |
248: { |
249: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
250: mValueType lap = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
251: G[iat] += rv; |
252: L[iat] += lap - dot(rv, rv); |
253: } |
254: } |
255: return LogValue; |
256: } |
257: |
258: template<typename DU_TYPE> |
259: void DiracDeterminantRef<DU_TYPE>::recompute(ParticleSet& P) |
260: { |
261: SPOVGLTimer->start(); |
262: Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); |
263: SPOVGLTimer->stop(); |
264: if (NumPtcls == 1) |
265: { |
266: //CurrentDet=psiM(0,0); |
267: ValueType det = psiM_temp(0, 0); |
268: psiM(0, 0) = RealType(1) / det; |
269: LogValue = evaluateLogAndPhase(det, PhaseValue); |
270: } |
271: else |
272: { |
273: invertPsiM(psiM_temp, psiM); |
/usr/include/c++/13.1.1/bits/unique_ptr.h: 199 - 199 |
-------------------------------------------------------------------------------- |
199: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 150 - 150 |
-------------------------------------------------------------------------------- |
150: ~TinyVector() {} |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DeterminantHelper.h: 40 - 48 |
-------------------------------------------------------------------------------- |
40: if (psi < 0.0) |
41: { |
42: phase = M_PI; |
43: return std::log(-psi); |
44: } |
45: else |
46: { |
47: phase = 0.0; |
48: return std::log(psi); |
0x470420 PUSH %RBP |
0x470421 MOV %RSP,%RBP |
0x470424 PUSH %R15 |
0x470426 PUSH %R14 |
0x470428 PUSH %R13 |
0x47042a PUSH %R12 |
0x47042c PUSH %RBX |
0x47042d MOV %RDI,%RBX |
0x470430 SUB $0x108,%RSP |
0x470437 MOV %RSI,-0xf0(%RBP) |
0x47043e MOV %RDX,-0x118(%RBP) |
0x470445 MOV 0x468(%RDI),%RDI |
0x47044c MOV %RCX,-0x120(%RBP) |
0x470453 MOV %FS:0x28,%RAX |
0x47045c MOV %RAX,-0x38(%RBP) |
0x470460 XOR %EAX,%EAX |
0x470462 CALL 4a40e0 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE5startEv> |
0x470467 MOV 0x470(%RBX),%R14 |
0x47046e MOV 0x478(%RBX),%ESI |
0x470474 LEA 0x90(%RBX),%RDI |
0x47047b LEA -0x1f3f2(%RIP),%RDX |
0x470482 MOV %RDI,-0x128(%RBP) |
0x470489 MOV 0x47c(%RBX),%ECX |
0x47048f MOV (%R14),%RAX |
0x470492 MOV %ESI,-0xe4(%RBP) |
0x470498 MOV 0x28(%RAX),%R10 |
0x47049c CMP %RDX,%R10 |
0x47049f JNE 470b4c |
0x4704a5 CMP %ESI,%ECX |
0x4704a7 JLE 47064c |
0x4704ad SUB %ESI,%ECX |
0x4704af LEA -0x80(%RBP),%R11 |
0x4704b3 LEA -0xb0(%RBP),%RAX |
0x4704ba XOR %R12D,%R12D |
0x4704bd LEA -0xe0(%RBP),%R10 |
0x4704c4 MOV %RCX,-0x110(%RBP) |
0x4704cb LEA 0x6e15e(%RIP),%R13 |
0x4704d2 MOV %R11,-0x100(%RBP) |
0x4704d9 LEA 0x6e110(%RIP),%R15 |
0x4704e0 MOV %RAX,-0xf8(%RBP) |
0x4704e7 MOV %R10,-0x108(%RBP) |
0x4704ee JMP 47052e |
(862) 0x4704f0 MOV -0xa0(%RBP),%R9 |
(862) 0x4704f7 MOV %R15,-0xb0(%RBP) |
(862) 0x4704fe TEST %R9,%R9 |
(862) 0x470501 JNE 470a40 |
(862) 0x470507 MOV -0xd0(%RBP),%RCX |
(862) 0x47050e MOV %R13,-0xe0(%RBP) |
(862) 0x470515 TEST %RCX,%RCX |
(862) 0x470518 JNE 470628 |
(862) 0x47051e INC %R12 |
(862) 0x470521 CMP %R12,-0x110(%RBP) |
(862) 0x470528 JE 47064c |
(862) 0x47052e MOV 0x98(%RBX),%RSI |
(862) 0x470535 MOV 0x118(%RBX),%R9 |
(862) 0x47053c MOV %R13,-0xe0(%RBP) |
(862) 0x470543 MOV 0x158(%RBX),%R10 |
(862) 0x47054a MOV 0xc0(%RBX),%RDX |
(862) 0x470551 MOV %R15,-0xb0(%RBP) |
(862) 0x470558 IMUL %R12,%RSI |
(862) 0x47055c MOV 0x140(%RBX),%R11 |
(862) 0x470563 MOVSXD 0x8(%R14),%RDI |
(862) 0x470567 MOV %R13,-0x80(%RBP) |
(862) 0x47056b IMUL %R12,%R9 |
(862) 0x47056f MOVQ $0,-0xd0(%RBP) |
(862) 0x47057a IMUL %R12,%R10 |
(862) 0x47057e MOV %RDI,-0xd8(%RBP) |
(862) 0x470585 LEA (%RDX,%RSI,8),%R8 |
(862) 0x470589 MOV 0x180(%RBX),%RSI |
(862) 0x470590 MOV %RDI,-0xa8(%RBP) |
(862) 0x470597 LEA (%R9,%R9,2),%RCX |
(862) 0x47059b MOV %R8,-0xc8(%RBP) |
(862) 0x4705a2 MOV -0x100(%RBP),%R9 |
(862) 0x4705a9 LEA (%R11,%RCX,8),%RAX |
(862) 0x4705ad LEA (%RSI,%R10,8),%RDX |
(862) 0x4705b1 MOV %RDI,-0x78(%RBP) |
(862) 0x4705b5 MOV -0xe4(%RBP),%EDI |
(862) 0x4705bb MOV %RDX,-0x68(%RBP) |
(862) 0x4705bf MOV (%R14),%R11 |
(862) 0x4705c2 MOV -0xf8(%RBP),%R8 |
(862) 0x4705c9 MOV -0x108(%RBP),%RCX |
(862) 0x4705d0 MOVQ $0,-0xa0(%RBP) |
(862) 0x4705db LEA (%RDI,%R12,1),%EDX |
(862) 0x4705df MOV %RAX,-0x98(%RBP) |
(862) 0x4705e6 MOV -0xf0(%RBP),%RSI |
(862) 0x4705ed MOV %R14,%RDI |
(862) 0x4705f0 MOVQ $0,-0x70(%RBP) |
(862) 0x4705f8 CALLQ 0x18(%R11) |
(862) 0x4705fc MOV -0x70(%RBP),%R8 |
(862) 0x470600 MOV %R13,-0x80(%RBP) |
(862) 0x470604 TEST %R8,%R8 |
(862) 0x470607 JE 4704f0 |
(862) 0x47060d MOV -0x68(%RBP),%RDI |
(862) 0x470611 LEA (,%R8,8),%RSI |
(862) 0x470619 CALL 404070 <_ZdlPvm@plt> |
(862) 0x47061e JMP 4704f0 |
0x470623 NOPL (%RAX,%RAX,1) |
(862) 0x470628 MOV -0xc8(%RBP),%RDI |
(862) 0x47062f LEA (,%RCX,8),%RSI |
(862) 0x470637 INC %R12 |
(862) 0x47063a CALL 404070 <_ZdlPvm@plt> |
(862) 0x47063f CMP %R12,-0x110(%RBP) |
(862) 0x470646 JNE 47052e |
0x47064c MOV 0x468(%RBX),%RDI |
0x470653 CALL 4a4310 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE4stopEv> |
0x470658 CMPL $0x1,0x484(%RBX) |
0x47065f JE 470a79 |
0x470665 MOV -0x128(%RBP),%RSI |
0x47066c LEA 0xd0(%RBX),%RDX |
0x470673 MOV %RBX,%RDI |
0x470676 CALL 46ed80 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10invertPsiMERKNS1_6MatrixIdSaIdEEERS7_> |
0x47067b MOV 0x484(%RBX),%R13D |
0x470682 CMP $0x1,%R13D |
0x470686 JE 470b9b |
0x47068c MOVSXD 0x478(%RBX),%RAX |
0x470693 TEST %R13D,%R13D |
0x470696 JLE 470a10 |
0x47069c MOVSXD %R13D,%R9 |
0x47069f MOV 0x140(%RBX),%R14 |
0x4706a6 MOV -0x118(%RBP),%RSI |
0x4706ad LEA (%RAX,%RAX,2),%R12 |
0x4706b1 MOV -0x120(%RBP),%R13 |
0x4706b8 MOVSXD 0x480(%RBX),%R11 |
0x4706bf MOV %R9,-0xf0(%RBP) |
0x4706c6 VXORPD %XMM4,%XMM4,%XMM4 |
0x4706ca MOV 0xd8(%RBX),%RDI |
0x4706d1 MOV 0x18(%RSI),%RDX |
0x4706d5 MOV %R14,-0xf8(%RBP) |
0x4706dc XOR %R9D,%R9D |
0x4706df MOV 0x100(%RBX),%RCX |
0x4706e6 MOV 0x18(%R13),%R14 |
0x4706ea MOV %R11,%R10 |
0x4706ed SAL $0x3,%R11 |
0x4706f1 MOV 0x118(%RBX),%R15 |
0x4706f8 LEA (%RDX,%R12,8),%RSI |
0x4706fc LEA (,%RDI,8),%R12 |
0x470704 XOR %EDI,%EDI |
0x470706 LEA (%R11,%RCX,1),%R8 |
0x47070a LEA (%R14,%RAX,8),%R13 |
0x47070e XCHG %AX,%AX |
(859) 0x470710 MOVQ $0,-0x40(%RBP) |
(859) 0x470718 VMOVAPD %XMM4,-0x50(%RBP) |
(859) 0x47071d TEST %R10D,%R10D |
(859) 0x470720 JLE 470a6c |
(859) 0x470726 MOV %R8,%R14 |
(859) 0x470729 MOV -0xf8(%RBP),%RDX |
(859) 0x470730 LEA (%R9,%R9,2),%RAX |
(859) 0x470734 VXORPD %XMM0,%XMM0,%XMM0 |
(859) 0x470738 SUB %RCX,%R14 |
(859) 0x47073b SUB $0x8,%R14 |
(859) 0x47073f LEA (%RDX,%RAX,8),%RAX |
(859) 0x470743 MOV %RCX,%RDX |
(859) 0x470746 SHR $0x3,%R14 |
(859) 0x47074a AND $0x3,%R14D |
(859) 0x47074e JE 470a60 |
(859) 0x470754 VMOVAPD -0x50(%RBP),%XMM5 |
(859) 0x470759 VMOVDDUP (%RCX),%XMM3 |
(859) 0x47075d VMOVSD (%RCX),%XMM6 |
(859) 0x470761 LEA 0x8(%RCX),%RDX |
(859) 0x470765 ADD $0x18,%RAX |
(859) 0x470769 VFMADD132PD -0x18(%RAX),%XMM5,%XMM3 |
(859) 0x47076f VFMADD231SD -0x8(%RAX),%XMM6,%XMM0 |
(859) 0x470775 VMOVAPD %XMM3,-0x50(%RBP) |
(859) 0x47077a CMP $0x1,%R14 |
(859) 0x47077e JE 470a60 |
(859) 0x470784 CMP $0x2,%R14 |
(859) 0x470788 JE 4707b0 |
(859) 0x47078a VMOVAPD -0x50(%RBP),%XMM8 |
(859) 0x47078f VMOVDDUP (%RDX),%XMM7 |
(859) 0x470793 VMOVSD (%RDX),%XMM9 |
(859) 0x470797 ADD $0x18,%RAX |
(859) 0x47079b LEA 0x10(%RCX),%RDX |
(859) 0x47079f VFMADD132PD -0x18(%RAX),%XMM8,%XMM7 |
(859) 0x4707a5 VFMADD231SD -0x8(%RAX),%XMM9,%XMM0 |
(859) 0x4707ab VMOVAPD %XMM7,-0x50(%RBP) |
(859) 0x4707b0 VMOVAPD -0x50(%RBP),%XMM11 |
(859) 0x4707b5 VMOVDDUP (%RDX),%XMM10 |
(859) 0x4707b9 VMOVSD (%RDX),%XMM12 |
(859) 0x4707bd MOV %R10D,-0xe4(%RBP) |
(859) 0x4707c4 ADD $0x8,%RDX |
(859) 0x4707c8 ADD $0x18,%RAX |
(859) 0x4707cc VFMADD132PD -0x18(%RAX),%XMM11,%XMM10 |
(859) 0x4707d2 VFMADD231SD -0x8(%RAX),%XMM12,%XMM0 |
(859) 0x4707d8 VMOVAPD %XMM10,-0x50(%RBP) |
(859) 0x4707dd JMP 470834 |
0x4707df NOP |
(860) 0x4707e0 VMOVAPD -0x50(%RBP),%XMM5 |
(860) 0x4707e5 VMOVDDUP 0x8(%RDX),%XMM15 |
(860) 0x4707ea VMOVSD 0x8(%RDX),%XMM6 |
(860) 0x4707ef VMOVDDUP 0x10(%RDX),%XMM7 |
(860) 0x4707f4 VMOVSD 0x10(%RDX),%XMM8 |
(860) 0x4707f9 VMOVDDUP 0x18(%RDX),%XMM9 |
(860) 0x4707fe VMOVSD 0x18(%RDX),%XMM10 |
(860) 0x470803 ADD $0x60,%RAX |
(860) 0x470807 VFMADD132PD -0x48(%RAX),%XMM5,%XMM15 |
(860) 0x47080d VFMADD231SD 0x10(%R14),%XMM6,%XMM0 |
(860) 0x470813 ADD $0x20,%RDX |
(860) 0x470817 VFMADD231PD -0x30(%RAX),%XMM7,%XMM15 |
(860) 0x47081d VFMADD231SD -0x20(%RAX),%XMM8,%XMM0 |
(860) 0x470823 VFMADD231PD -0x18(%RAX),%XMM9,%XMM15 |
(860) 0x470829 VFMADD231SD -0x8(%RAX),%XMM10,%XMM0 |
(860) 0x47082f VMOVAPD %XMM15,-0x50(%RBP) |
(860) 0x470834 VMOVSD (%RDX),%XMM2 |
(860) 0x470838 VMOVAPD -0x50(%RBP),%XMM15 |
(860) 0x47083d LEA 0x8(%RDX),%R10 |
(860) 0x470841 LEA 0x18(%RAX),%R14 |
(860) 0x470845 VMOVSD -0x50(%RBP),%XMM3 |
(860) 0x47084a VMOVSD (%RAX),%XMM13 |
(860) 0x47084e VMOVDDUP %XMM2,%XMM14 |
(860) 0x470852 VFMADD231SD 0x10(%RAX),%XMM2,%XMM0 |
(860) 0x470858 VFMADD132PD (%RAX),%XMM15,%XMM14 |
(860) 0x47085d VMOVAPD %XMM14,-0x50(%RBP) |
(860) 0x470862 CMP %R8,%R10 |
(860) 0x470865 JNE 4707e0 |
(859) 0x47086b MOV 0x158(%RBX),%RDX |
(859) 0x470872 LEA -0x8(%R11),%R14 |
(859) 0x470876 MOV 0x180(%RBX),%RAX |
(859) 0x47087d VMOVSD %XMM0,-0x40(%RBP) |
(859) 0x470882 SHR $0x3,%R14 |
(859) 0x470886 MOV -0xe4(%RBP),%R10D |
(859) 0x47088d VXORPD %XMM1,%XMM1,%XMM1 |
(859) 0x470891 IMUL %RDI,%RDX |
(859) 0x470895 INC %R14 |
(859) 0x470898 LEA (%RAX,%RDX,8),%RDX |
(859) 0x47089c XOR %EAX,%EAX |
(859) 0x47089e AND $0x7,%R14D |
(859) 0x4708a2 JE 470939 |
(859) 0x4708a8 CMP $0x1,%R14 |
(859) 0x4708ac JE 470925 |
(859) 0x4708ae CMP $0x2,%R14 |
(859) 0x4708b2 JE 470916 |
(859) 0x4708b4 CMP $0x3,%R14 |
(859) 0x4708b8 JE 470907 |
(859) 0x4708ba CMP $0x4,%R14 |
(859) 0x4708be JE 4708f8 |
(859) 0x4708c0 CMP $0x5,%R14 |
(859) 0x4708c4 JE 4708e9 |
(859) 0x4708c6 CMP $0x6,%R14 |
(859) 0x4708ca JE 4708da |
(859) 0x4708cc VMOVSD (%RCX),%XMM5 |
(859) 0x4708d0 MOV $0x8,%EAX |
(859) 0x4708d5 VFMADD231SD (%RDX),%XMM5,%XMM1 |
(859) 0x4708da VMOVSD (%RCX,%RAX,1),%XMM6 |
(859) 0x4708df VFMADD231SD (%RDX,%RAX,1),%XMM6,%XMM1 |
(859) 0x4708e5 ADD $0x8,%RAX |
(859) 0x4708e9 VMOVSD (%RCX,%RAX,1),%XMM7 |
(859) 0x4708ee VFMADD231SD (%RDX,%RAX,1),%XMM7,%XMM1 |
(859) 0x4708f4 ADD $0x8,%RAX |
(859) 0x4708f8 VMOVSD (%RCX,%RAX,1),%XMM8 |
(859) 0x4708fd VFMADD231SD (%RDX,%RAX,1),%XMM8,%XMM1 |
(859) 0x470903 ADD $0x8,%RAX |
(859) 0x470907 VMOVSD (%RCX,%RAX,1),%XMM9 |
(859) 0x47090c VFMADD231SD (%RDX,%RAX,1),%XMM9,%XMM1 |
(859) 0x470912 ADD $0x8,%RAX |
(859) 0x470916 VMOVSD (%RCX,%RAX,1),%XMM10 |
(859) 0x47091b VFMADD231SD (%RDX,%RAX,1),%XMM10,%XMM1 |
(859) 0x470921 ADD $0x8,%RAX |
(859) 0x470925 VMOVSD (%RCX,%RAX,1),%XMM11 |
(859) 0x47092a VFMADD231SD (%RDX,%RAX,1),%XMM11,%XMM1 |
(859) 0x470930 ADD $0x8,%RAX |
(859) 0x470934 CMP %RAX,%R11 |
(859) 0x470937 JE 4709a8 |
(861) 0x470939 VMOVSD (%RCX,%RAX,1),%XMM12 |
(861) 0x47093e VMOVSD 0x8(%RCX,%RAX,1),%XMM15 |
(861) 0x470944 VMOVSD 0x10(%RCX,%RAX,1),%XMM5 |
(861) 0x47094a VMOVSD 0x18(%RCX,%RAX,1),%XMM6 |
(861) 0x470950 VFMADD231SD (%RDX,%RAX,1),%XMM12,%XMM1 |
(861) 0x470956 VMOVSD 0x20(%RCX,%RAX,1),%XMM7 |
(861) 0x47095c VMOVSD 0x28(%RCX,%RAX,1),%XMM8 |
(861) 0x470962 VMOVSD 0x30(%RCX,%RAX,1),%XMM9 |
(861) 0x470968 VMOVSD 0x38(%RCX,%RAX,1),%XMM10 |
(861) 0x47096e VFMADD231SD 0x8(%RDX,%RAX,1),%XMM15,%XMM1 |
(861) 0x470975 VFMADD231SD 0x10(%RDX,%RAX,1),%XMM5,%XMM1 |
(861) 0x47097c VFMADD231SD 0x18(%RDX,%RAX,1),%XMM6,%XMM1 |
(861) 0x470983 VFMADD231SD 0x20(%RDX,%RAX,1),%XMM7,%XMM1 |
(861) 0x47098a VFMADD231SD 0x28(%RDX,%RAX,1),%XMM8,%XMM1 |
(861) 0x470991 VFMADD231SD 0x30(%RDX,%RAX,1),%XMM9,%XMM1 |
(861) 0x470998 VFMADD231SD 0x38(%RDX,%RAX,1),%XMM10,%XMM1 |
(861) 0x47099f ADD $0x40,%RAX |
(861) 0x4709a3 CMP %RAX,%R11 |
(861) 0x4709a6 JNE 470939 |
(859) 0x4709a8 VFMADD132SD %XMM13,%XMM3,%XMM2 |
(859) 0x4709ad VUNPCKHPD %XMM14,%XMM14,%XMM13 |
(859) 0x4709b2 VMULSD %XMM13,%XMM13,%XMM14 |
(859) 0x4709b7 VFMADD132SD %XMM0,%XMM14,%XMM0 |
(859) 0x4709bc VFMADD132SD %XMM2,%XMM0,%XMM2 |
(859) 0x4709c1 VMOVUPD (%RSI),%XMM0 |
(859) 0x4709c5 VMOVSD 0x10(%RSI),%XMM11 |
(859) 0x4709ca ADD %R12,%RCX |
(859) 0x4709cd ADD $0x18,%RSI |
(859) 0x4709d1 MOV -0xf0(%RBP),%RDX |
(859) 0x4709d8 ADD %R15,%R9 |
(859) 0x4709db ADD %R12,%R8 |
(859) 0x4709de VADDPD -0x50(%RBP),%XMM0,%XMM3 |
(859) 0x4709e3 VADDSD -0x40(%RBP),%XMM11,%XMM12 |
(859) 0x4709e8 VMOVUPD %XMM3,-0x18(%RSI) |
(859) 0x4709ed VMOVSD %XMM12,-0x8(%RSI) |
(859) 0x4709f2 VADDSD (%R13,%RDI,8),%XMM1,%XMM1 |
(859) 0x4709f9 VSUBSD %XMM2,%XMM1,%XMM2 |
(859) 0x4709fd VMOVSD %XMM2,(%R13,%RDI,8) |
(859) 0x470a04 INC %RDI |
(859) 0x470a07 CMP %RDX,%RDI |
(859) 0x470a0a JNE 470710 |
0x470a10 VMOVSD 0x10(%RBX),%XMM0 |
0x470a15 MOV -0x38(%RBP),%RAX |
0x470a19 SUB %FS:0x28,%RAX |
0x470a22 JNE 470bdd |
0x470a28 LEA -0x28(%RBP),%RSP |
0x470a2c POP %RBX |
0x470a2d POP %R12 |
0x470a2f POP %R13 |
0x470a31 POP %R14 |
0x470a33 POP %R15 |
0x470a35 POP %RBP |
0x470a36 RET |
0x470a37 NOPW (%RAX,%RAX,1) |
(862) 0x470a40 LEA (%R9,%R9,2),%RSI |
(862) 0x470a44 MOV -0x98(%RBP),%RDI |
(862) 0x470a4b SAL $0x3,%RSI |
(862) 0x470a4f CALL 404070 <_ZdlPvm@plt> |
(862) 0x470a54 JMP 470507 |
0x470a59 NOPL (%RAX) |
(859) 0x470a60 MOV %R10D,-0xe4(%RBP) |
(859) 0x470a67 JMP 470834 |
(859) 0x470a6c VXORPD %XMM2,%XMM2,%XMM2 |
(859) 0x470a70 VMOVSD %XMM2,%XMM2,%XMM1 |
(859) 0x470a74 JMP 4709c1 |
0x470a79 MOV 0xc0(%RBX),%R11 |
0x470a80 VMOVSD 0x5f558(%RIP),%XMM4 |
0x470a88 VXORPD %XMM14,%XMM14,%XMM14 |
0x470a8d MOV 0x100(%RBX),%R15 |
0x470a94 VMOVSD (%R11),%XMM0 |
0x470a99 VDIVSD %XMM0,%XMM4,%XMM13 |
0x470a9d VCOMISD %XMM0,%XMM14 |
0x470aa1 VMOVSD %XMM13,(%R15) |
0x470aa6 JA 470b81 |
0x470aac CALL 4040b0 <log@plt> |
0x470ab1 VXORPD %XMM3,%XMM3,%XMM3 |
0x470ab5 VUNPCKLPD %XMM3,%XMM0,%XMM0 |
0x470ab9 VMOVUPD %XMM0,0x10(%RBX) |
0x470abe VMOVSD (%R15),%XMM11 |
0x470ac3 MOV 0x140(%RBX),%RSI |
0x470aca MOV -0x118(%RBP),%R8 |
0x470ad1 MOVSXD 0x478(%RBX),%R12 |
0x470ad8 VMOVDDUP %XMM11,%XMM12 |
0x470add MOV -0x120(%RBP),%R10 |
0x470ae4 MOV 0x180(%RBX),%RDI |
0x470aeb VMULPD (%RSI),%XMM12,%XMM1 |
0x470aef MOV 0x18(%R8),%R13 |
0x470af3 LEA (%R12,%R12,2),%RCX |
0x470af7 VMULSD 0x10(%RSI),%XMM11,%XMM2 |
0x470afc MOV 0x18(%R10),%R14 |
0x470b00 LEA (%R13,%RCX,8),%R9 |
0x470b05 LEA (%R14,%R12,8),%RAX |
0x470b09 VADDPD (%R9),%XMM1,%XMM6 |
0x470b0e VUNPCKHPD %XMM1,%XMM1,%XMM15 |
0x470b12 VMOVSD %XMM1,%XMM1,%XMM5 |
0x470b16 VADDSD 0x10(%R9),%XMM2,%XMM7 |
0x470b1c VMULSD %XMM15,%XMM15,%XMM8 |
0x470b21 VMOVUPD %XMM6,(%R9) |
0x470b26 VMOVSD %XMM7,0x10(%R9) |
0x470b2c VMOVSD (%RAX),%XMM9 |
0x470b30 VFMADD132SD %XMM1,%XMM8,%XMM5 |
0x470b35 VFMADD132SD (%RDI),%XMM9,%XMM11 |
0x470b3a VSUBSD %XMM5,%XMM11,%XMM10 |
0x470b3e VFNMADD132SD %XMM2,%XMM10,%XMM2 |
0x470b43 VMOVSD %XMM2,(%RAX) |
0x470b47 JMP 470a10 |
0x470b4c SUB $0x8,%RSP |
0x470b50 LEA 0x150(%RBX),%R8 |
0x470b57 MOV -0xe4(%RBP),%EDX |
0x470b5d MOV -0xf0(%RBP),%RSI |
0x470b64 PUSH %R8 |
0x470b66 MOV -0x128(%RBP),%R8 |
0x470b6d LEA 0x110(%RBX),%R9 |
0x470b74 MOV %R14,%RDI |
0x470b77 CALL %R10 |
0x470b7a POP %RAX |
0x470b7b POP %RDX |
0x470b7c JMP 47064c |
0x470b81 VXORPD 0x5e127(%RIP),%XMM0,%XMM0 |
0x470b89 CALL 4040b0 <log@plt> |
0x470b8e VMOVSD 0x5f452(%RIP),%XMM3 |
0x470b96 JMP 470ab5 |
0x470b9b MOV 0x100(%RBX),%R15 |
0x470ba2 JMP 470abe |
(858) 0x470ba7 MOV -0x100(%RBP),%RDI |
(858) 0x470bae VZEROUPPER |
(858) 0x470bb1 CALL 422020 <_ZN11qmcplusplus6VectorIdSaIdEED1Ev> |
(858) 0x470bb6 MOV -0xf8(%RBP),%RDI |
(858) 0x470bbd CALL 40f950 <_ZN11qmcplusplus6VectorINS_10TinyVectorIdLj3EEESaIS2_EED1Ev> |
(858) 0x470bc2 MOV -0x108(%RBP),%RDI |
(858) 0x470bc9 CALL 422020 <_ZN11qmcplusplus6VectorIdSaIdEED1Ev> |
(858) 0x470bce MOV -0x38(%RBP),%RAX |
(858) 0x470bd2 SUB %FS:0x28,%RAX |
(858) 0x470bdb JE 470be7 |
(858) 0x470bdd CALL 404140 <__stack_chk_fail@plt> |
(858) 0x470be2 MOV %RAX,%RBX |
(858) 0x470be5 JMP 470ba7 |
0x470be7 MOV %RBX,%RDI |
0x470bea CALL 404230 <_Unwind_Resume@plt> |
0x470bef NOP |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►50.00+ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:177 | exec |
○ | main._omp_fn.0 | miniqmc.cpp:390 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►50.00+ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:178 | exec |
○ | main._omp_fn.0 | miniqmc.cpp:390 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | DiracDeterminantRef.cpp:231-273 |
Module | exec |
nb instructions | 157 |
nb uops | 164 |
loop length | 776 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 14 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 41.00 cycles |
front end | 41.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 16.50 | 16.50 | 29.33 | 29.33 | 31.00 | 16.50 | 16.50 | 29.33 |
cycles | 16.50 | 16.50 | 29.33 | 29.33 | 31.00 | 16.50 | 16.50 | 29.33 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 39.57 |
Stall cycles | 0.00 |
Front-end | 41.00 |
Dispatch | 31.00 |
DIV/SQRT | 4.00 |
Overall L1 | 41.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 26% |
load | 25% |
store | 40% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | 0% |
other | 44% |
all | 17% |
load | 23% |
store | 11% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | 0% |
other | 28% |
all | 10% |
load | 6% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 15% |
load | 15% |
store | 17% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | 12% |
other | 18% |
all | 13% |
load | 14% |
store | 13% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | 12% |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB $0x108,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0x118(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x468(%RDI),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,-0x120(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %FS:0x28,%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4a40e0 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE5startEv> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x470(%RBX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x478(%RBX),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x90(%RBX),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1f3f2(%RIP),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x128(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x47c(%RBX),%ECX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %ESI,-0xe4(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RAX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RDX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 470b4c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %ESI,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 47064c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %ESI,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x80(%RBP),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0xb0(%RBP),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0xe0(%RBP),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x110(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x6e15e(%RIP),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x6e110(%RIP),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R10,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 47052e | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x468(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CALL 4a4310 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE4stopEv> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPL $0x1,0x484(%RBX) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 470a79 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x128(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0xd0(%RBX),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 46ed80 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10invertPsiMERKNS1_6MatrixIdSaIdEEERS7_> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x484(%RBX),%R13D | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP $0x1,%R13D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 470b9b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD 0x478(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R13D,%R13D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 470a10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %R13D,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x140(%RBX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x118(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RAX,%RAX,2),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x120(%RBP),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0x480(%RBX),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R9,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xd8(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RSI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R14,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x100(%RBX),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%R13),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV 0x118(%RBX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RDX,%R12,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RDI,8),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R11,%RCX,1),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%RAX,8),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD 0x10(%RBX),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB %FS:0x28,%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 470bdd | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xc0(%RBX),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x5f558(%RIP),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x100(%RBX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%R11),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VDIVSD %XMM0,%XMM4,%XMM13 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VCOMISD %XMM0,%XMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM13,(%R15) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JA 470b81 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CALL 4040b0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VUNPCKLPD %XMM3,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVUPD %XMM0,0x10(%RBX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD (%R15),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RBX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x118(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0x478(%RBX),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVDDUP %XMM11,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
MOV -0x120(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x180(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULPD (%RSI),%XMM12,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%R8),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R12,%R12,2),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD 0x10(%RSI),%XMM11,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%R10),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R13,%RCX,8),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VADDPD (%R9),%XMM1,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VUNPCKHPD %XMM1,%XMM1,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVSD %XMM1,%XMM1,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD 0x10(%R9),%XMM2,%XMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM15,%XMM15,%XMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM6,(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM7,0x10(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD (%RAX),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD132SD %XMM1,%XMM8,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD (%RDI),%XMM9,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM5,%XMM11,%XMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD132SD %XMM2,%XMM10,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM2,(%RAX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 470a10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x150(%RBX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0xe4(%RBP),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R8 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x128(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x110(%RBX),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL %R10 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 3 |
POP %RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 47064c | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPD 0x5e127(%RIP),%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
CALL 4040b0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD 0x5f452(%RIP),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 470ab5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV 0x100(%RBX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 470abe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 404230 <_Unwind_Resume@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | DiracDeterminantRef.cpp:231-273 |
Module | exec |
nb instructions | 157 |
nb uops | 164 |
loop length | 776 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 14 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 41.00 cycles |
front end | 41.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 16.50 | 16.50 | 29.33 | 29.33 | 31.00 | 16.50 | 16.50 | 29.33 |
cycles | 16.50 | 16.50 | 29.33 | 29.33 | 31.00 | 16.50 | 16.50 | 29.33 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 39.57 |
Stall cycles | 0.00 |
Front-end | 41.00 |
Dispatch | 31.00 |
DIV/SQRT | 4.00 |
Overall L1 | 41.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 26% |
load | 25% |
store | 40% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | 0% |
other | 44% |
all | 17% |
load | 23% |
store | 11% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | 0% |
other | 28% |
all | 10% |
load | 6% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 15% |
load | 15% |
store | 17% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | 12% |
other | 18% |
all | 13% |
load | 14% |
store | 13% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | 12% |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB $0x108,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0x118(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x468(%RDI),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,-0x120(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %FS:0x28,%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4a40e0 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE5startEv> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x470(%RBX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x478(%RBX),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x90(%RBX),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1f3f2(%RIP),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x128(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x47c(%RBX),%ECX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %ESI,-0xe4(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RAX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RDX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 470b4c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %ESI,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 47064c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %ESI,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x80(%RBP),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0xb0(%RBP),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0xe0(%RBP),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x110(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x6e15e(%RIP),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x6e110(%RIP),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R10,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 47052e | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x468(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CALL 4a4310 <_ZN11qmcplusplus9TimerTypeINS_8CPUClockEE4stopEv> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPL $0x1,0x484(%RBX) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 470a79 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x128(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0xd0(%RBX),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 46ed80 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10invertPsiMERKNS1_6MatrixIdSaIdEEERS7_> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x484(%RBX),%R13D | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP $0x1,%R13D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 470b9b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD 0x478(%RBX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R13D,%R13D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 470a10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %R13D,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x140(%RBX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x118(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RAX,%RAX,2),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x120(%RBP),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0x480(%RBX),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R9,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xd8(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RSI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R14,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x100(%RBX),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%R13),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV 0x118(%RBX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RDX,%R12,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RDI,8),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R11,%RCX,1),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%RAX,8),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD 0x10(%RBX),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB %FS:0x28,%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 470bdd | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xc0(%RBX),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x5f558(%RIP),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x100(%RBX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%R11),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VDIVSD %XMM0,%XMM4,%XMM13 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VCOMISD %XMM0,%XMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM13,(%R15) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JA 470b81 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CALL 4040b0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VUNPCKLPD %XMM3,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVUPD %XMM0,0x10(%RBX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD (%R15),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RBX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x118(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0x478(%RBX),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVDDUP %XMM11,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
MOV -0x120(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x180(%RBX),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULPD (%RSI),%XMM12,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%R8),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R12,%R12,2),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD 0x10(%RSI),%XMM11,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%R10),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R13,%RCX,8),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VADDPD (%R9),%XMM1,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VUNPCKHPD %XMM1,%XMM1,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVSD %XMM1,%XMM1,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD 0x10(%R9),%XMM2,%XMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM15,%XMM15,%XMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM6,(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM7,0x10(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD (%RAX),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD132SD %XMM1,%XMM8,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD (%RDI),%XMM9,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM5,%XMM11,%XMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD132SD %XMM2,%XMM10,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM2,(%RAX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 470a10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x150(%RBX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0xe4(%RBP),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R8 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x128(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x110(%RBX),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL %R10 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 3 |
POP %RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 47064c | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPD 0x5e127(%RIP),%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
CALL 4040b0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD 0x5f452(%RIP),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 470ab5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV 0x100(%RBX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 470abe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 404230 <_Unwind_Resume@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::DiracDeterminantRef | 0.11 | 0.11 |
○Loop 862 - SPOSet.h:106-111 - exec | 0 | 0 |
○Loop 858 - SPOSet.h:112-112 - exec | 0 | 0 |
▼Loop 859 - DiracDeterminantRef.cpp:247-252 - exec– | 0 | 0 |
○Loop 861 - inner_product.hpp:81-82 - exec | 0.06 | 0.05 |
○Loop 860 - inner_product.hpp:154-154 - exec | 0.06 | 0.05 |