Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: exec | Source: DiracDeterminantRef.cpp:152-181 [...] | Coverage: 0.51% |
---|
Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: exec | Source: DiracDeterminantRef.cpp:152-181 [...] | Coverage: 0.51% |
---|
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 217 - 217 |
-------------------------------------------------------------------------------- |
217: inline Type_t* operator[](size_type i) { return X.data() + i * D2; } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/SPOSet.h: 106 - 111 |
-------------------------------------------------------------------------------- |
106: for (int iat = first, i = 0; iat < last; ++iat, ++i) |
107: { |
108: ValueVector_t v(logdet[i], OrbitalSetSize); |
109: GradVector_t g(dlogdet[i], OrbitalSetSize); |
110: ValueVector_t l(d2logdet[i], OrbitalSetSize); |
111: evaluate(P, iat, v, g, l); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
92: inline typename BinaryReturn<T1, T2, OpAddAssign>::Type_t operator()(const T1& a, const T2& b) const |
93: { |
94: (const_cast<T1&>(a) += b); |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/new_allocator.h: 100 - 168 |
-------------------------------------------------------------------------------- |
100: ~__new_allocator() _GLIBCXX_USE_NOEXCEPT { } |
[...] |
168: _GLIBCXX_OPERATOR_DELETE(_GLIBCXX_SIZED_DEALLOC(__p, __n)); |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/unique_ptr.h: 199 - 199 |
-------------------------------------------------------------------------------- |
199: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 56 - 248 |
-------------------------------------------------------------------------------- |
56: explicit inline Vector(T* ref, size_t n) : nLocal(n), X(ref) {} |
[...] |
144: virtual ~Vector() { free(); } |
[...] |
210: if (nAllocated) |
[...] |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 62 - 62 |
-------------------------------------------------------------------------------- |
62: X[d] = T(0); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 152 - 181 |
-------------------------------------------------------------------------------- |
152: void DiracDeterminantRef<DU_TYPE>::evaluateGL(ParticleSet& P, |
153: ParticleSet::ParticleGradient& G, |
154: ParticleSet::ParticleLaplacian& L, |
155: bool fromscratch) |
156: { |
157: if (UpdateMode == ORB_PBYP_RATIO) |
158: { //need to compute dpsiM and d2psiM. Do not touch psiM! |
159: SPOVGLTimer->start(); |
160: Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); |
161: SPOVGLTimer->stop(); |
162: } |
163: |
164: if (NumPtcls == 1) |
165: { |
166: ValueType y = psiM(0, 0); |
167: GradType rv = y * dpsiM(0, 0); |
168: G[FirstIndex] += rv; |
169: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
170: } |
171: else |
172: { |
173: for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) |
174: { |
175: mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
176: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
177: G[iat] += rv; |
178: L[iat] += dot_temp - dot(rv, rv); |
179: } |
180: } |
181: } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 156 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
156: res += a[i] * b[i]; |
0x491a70 LEA 0x8(%RSP),%R10 |
0x491a75 AND $-0x20,%RSP |
0x491a79 PUSHQ -0x8(%R10) |
0x491a7d PUSH %RBP |
0x491a7e MOV %RSP,%RBP |
0x491a81 PUSH %R15 |
0x491a83 MOV %RDI,%R15 |
0x491a86 PUSH %R14 |
0x491a88 PUSH %R13 |
0x491a8a PUSH %R12 |
0x491a8c PUSH %R10 |
0x491a8e PUSH %RBX |
0x491a8f SUB $0xe0,%RSP |
0x491a96 MOV %RDX,-0xd8(%RBP) |
0x491a9d MOV 0xc(%RDI),%EDI |
0x491aa0 MOV %RCX,-0x100(%RBP) |
0x491aa7 TEST %EDI,%EDI |
0x491aa9 JE 492378 |
0x491aaf MOV 0x484(%R15),%EDX |
0x491ab6 MOVSXD 0x478(%R15),%RAX |
0x491abd CMP $0x1,%EDX |
0x491ac0 JE 49229b |
0x491ac6 MOVSXD %EDX,%RBX |
0x491ac9 MOV %RBX,-0xe8(%RBP) |
0x491ad0 TEST %EDX,%EDX |
0x491ad2 JE 492269 |
0x491ad8 MOV 0x158(%R15),%RCX |
0x491adf MOV -0x100(%RBP),%RDI |
0x491ae6 LEA (%RAX,%RAX,2),%R9 |
0x491aea MOV %R15,-0x110(%RBP) |
0x491af1 MOV 0xd8(%R15),%R13 |
0x491af8 MOV -0xd8(%RBP),%R12 |
0x491aff MOV 0x480(%R15),%EBX |
0x491b06 MOV 0x100(%R15),%R11 |
0x491b0d MOV %RCX,-0xf8(%RBP) |
0x491b14 MOV 0x18(%RDI),%RCX |
0x491b18 LEA (,%R13,8),%RSI |
0x491b20 MOV 0x18(%R12),%R14 |
0x491b25 MOV %R13,-0xf0(%RBP) |
0x491b2c MOV %RSI,-0x108(%RBP) |
0x491b33 LEA -0x1(%RBX),%R10D |
0x491b37 MOV %EBX,%R13D |
0x491b3a MOV 0x180(%R15),%R8 |
0x491b41 LEA (%RCX,%RAX,8),%RAX |
0x491b45 MOV %EBX,-0xc8(%RBP) |
0x491b4b VMOVDQA 0xfbf4d(%RIP),%YMM9 |
0x491b53 SHR $0x2,%EBX |
0x491b56 MOV %RAX,-0x100(%RBP) |
0x491b5d VMOVDQA 0xfbf5b(%RIP),%YMM8 |
0x491b65 MOV %R11,%RDX |
0x491b68 LEA (%R14,%R9,8),%R9 |
0x491b6c VMOVDQA 0xfbf6c(%RIP),%YMM7 |
0x491b74 VMOVDQA 0xfbf84(%RIP),%YMM6 |
0x491b7c MOV %R11,-0xd0(%RBP) |
0x491b83 SAL $0x5,%RBX |
0x491b87 VMOVDQA 0xfb771(%RIP),%YMM5 |
0x491b8f VMOVDQA 0xfb789(%RIP),%YMM4 |
0x491b97 MOV %R10D,-0xdc(%RBP) |
0x491b9e AND $-0x4,%R13D |
0x491ba2 XOR %R12D,%R12D |
0x491ba5 XOR %R11D,%R11D |
0x491ba8 XOR %R10D,%R10D |
0x491bab NOPL (%RAX,%RAX,1) |
(980) 0x491bb0 MOV -0xc8(%RBP),%R15D |
(980) 0x491bb7 LEA (,%R12,8),%RCX |
(980) 0x491bbf TEST %R15D,%R15D |
(980) 0x491bc2 JLE 492329 |
(980) 0x491bc8 CMPL $0x2,-0xdc(%RBP) |
(980) 0x491bcf JBE 492348 |
(980) 0x491bd5 LEA -0x20(%RBX),%RDI |
(980) 0x491bd9 LEA (%R8,%RCX,1),%R14 |
(980) 0x491bdd VXORPD %XMM0,%XMM0,%XMM0 |
(980) 0x491be1 XOR %R15D,%R15D |
(980) 0x491be4 SHR $0x5,%RDI |
(980) 0x491be8 INC %RDI |
(980) 0x491beb AND $0x7,%EDI |
(980) 0x491bee JE 491cb0 |
(980) 0x491bf4 CMP $0x1,%RDI |
(980) 0x491bf8 JE 491c93 |
(980) 0x491bfe CMP $0x2,%RDI |
(980) 0x491c02 JE 491c7e |
(980) 0x491c04 CMP $0x3,%RDI |
(980) 0x491c08 JE 491c69 |
(980) 0x491c0a CMP $0x4,%RDI |
(980) 0x491c0e JE 491c55 |
(980) 0x491c10 CMP $0x5,%RDI |
(980) 0x491c14 JE 491c40 |
(980) 0x491c16 CMP $0x6,%RDI |
(980) 0x491c1a JE 491c2b |
(980) 0x491c1c VMOVUPD (%RDX),%YMM13 |
(980) 0x491c20 MOV $0x20,%R15D |
(980) 0x491c26 VMULPD (%R14),%YMM13,%YMM0 |
(980) 0x491c2b VMOVUPD (%RDX,%R15,1),%YMM14 |
(980) 0x491c31 VMULPD (%R14,%R15,1),%YMM14,%YMM15 |
(980) 0x491c37 ADD $0x20,%R15 |
(980) 0x491c3b VADDPD %YMM15,%YMM0,%YMM0 |
(980) 0x491c40 VMOVUPD (%RDX,%R15,1),%YMM2 |
(980) 0x491c46 VMULPD (%R14,%R15,1),%YMM2,%YMM11 |
(980) 0x491c4c ADD $0x20,%R15 |
(980) 0x491c50 VADDPD %YMM11,%YMM0,%YMM0 |
(980) 0x491c55 VMOVUPD (%RDX,%R15,1),%YMM3 |
(980) 0x491c5b VMULPD (%R14,%R15,1),%YMM3,%YMM1 |
(980) 0x491c61 ADD $0x20,%R15 |
(980) 0x491c65 VADDPD %YMM1,%YMM0,%YMM0 |
(980) 0x491c69 VMOVUPD (%RDX,%R15,1),%YMM10 |
(980) 0x491c6f VMULPD (%R14,%R15,1),%YMM10,%YMM12 |
(980) 0x491c75 ADD $0x20,%R15 |
(980) 0x491c79 VADDPD %YMM12,%YMM0,%YMM0 |
(980) 0x491c7e VMOVUPD (%RDX,%R15,1),%YMM13 |
(980) 0x491c84 VMULPD (%R14,%R15,1),%YMM13,%YMM14 |
(980) 0x491c8a ADD $0x20,%R15 |
(980) 0x491c8e VADDPD %YMM14,%YMM0,%YMM0 |
(980) 0x491c93 VMOVUPD (%RDX,%R15,1),%YMM15 |
(980) 0x491c99 VMULPD (%R14,%R15,1),%YMM15,%YMM2 |
(980) 0x491c9f ADD $0x20,%R15 |
(980) 0x491ca3 VADDPD %YMM2,%YMM0,%YMM0 |
(980) 0x491ca7 CMP %RBX,%R15 |
(980) 0x491caa JE 491d6b |
(982) 0x491cb0 VMOVUPD (%RDX,%R15,1),%YMM11 |
(982) 0x491cb6 VMOVUPD 0x20(%RDX,%R15,1),%YMM1 |
(982) 0x491cbd VMOVUPD 0x40(%RDX,%R15,1),%YMM14 |
(982) 0x491cc4 VMULPD (%R14,%R15,1),%YMM11,%YMM3 |
(982) 0x491cca VMULPD 0x20(%R14,%R15,1),%YMM1,%YMM12 |
(982) 0x491cd1 VMULPD 0x40(%R14,%R15,1),%YMM14,%YMM15 |
(982) 0x491cd8 VADDPD %YMM3,%YMM0,%YMM10 |
(982) 0x491cdc VMOVUPD 0x60(%RDX,%R15,1),%YMM0 |
(982) 0x491ce3 VMULPD 0x60(%R14,%R15,1),%YMM0,%YMM11 |
(982) 0x491cea VADDPD %YMM12,%YMM10,%YMM13 |
(982) 0x491cef VMOVUPD 0x80(%RDX,%R15,1),%YMM10 |
(982) 0x491cf9 VMULPD 0x80(%R14,%R15,1),%YMM10,%YMM1 |
(982) 0x491d03 VADDPD %YMM15,%YMM13,%YMM2 |
(982) 0x491d08 VMOVUPD 0xa0(%RDX,%R15,1),%YMM13 |
(982) 0x491d12 VMULPD 0xa0(%R14,%R15,1),%YMM13,%YMM14 |
(982) 0x491d1c VADDPD %YMM11,%YMM2,%YMM3 |
(982) 0x491d21 VMOVUPD 0xc0(%RDX,%R15,1),%YMM2 |
(982) 0x491d2b VMULPD 0xc0(%R14,%R15,1),%YMM2,%YMM0 |
(982) 0x491d35 VADDPD %YMM1,%YMM3,%YMM12 |
(982) 0x491d39 VMOVUPD 0xe0(%RDX,%R15,1),%YMM3 |
(982) 0x491d43 VMULPD 0xe0(%R14,%R15,1),%YMM3,%YMM10 |
(982) 0x491d4d ADD $0x100,%R15 |
(982) 0x491d54 VADDPD %YMM14,%YMM12,%YMM15 |
(982) 0x491d59 VADDPD %YMM0,%YMM15,%YMM11 |
(982) 0x491d5d VADDPD %YMM10,%YMM11,%YMM0 |
(982) 0x491d62 CMP %RBX,%R15 |
(982) 0x491d65 JNE 491cb0 |
(980) 0x491d6b VEXTRACTF64X2 $0x1,%YMM0,%XMM12 |
(980) 0x491d72 VADDPD %XMM0,%XMM12,%XMM13 |
(980) 0x491d76 VUNPCKHPD %XMM13,%XMM13,%XMM1 |
(980) 0x491d7b VADDPD %XMM13,%XMM1,%XMM1 |
(980) 0x491d80 CMP %R13D,-0xc8(%RBP) |
(980) 0x491d87 JE 49227d |
(980) 0x491d8d VADDPD %XMM12,%XMM0,%XMM12 |
(980) 0x491d92 MOV %R13D,%ESI |
(980) 0x491d95 MOV %R13D,%EAX |
(980) 0x491d98 MOV -0xc8(%RBP),%EDI |
(980) 0x491d9e SUB %ESI,%EDI |
(980) 0x491da0 CMP $0x1,%EDI |
(980) 0x491da3 JE 491dd4 |
(980) 0x491da5 MOV -0xd0(%RBP),%R15 |
(980) 0x491dac LEA (%R11,%RSI,1),%R14 |
(980) 0x491db0 ADD %R12,%RSI |
(980) 0x491db3 VMOVUPD (%R15,%R14,8),%XMM14 |
(980) 0x491db9 VFMADD231PD (%R8,%RSI,8),%XMM14,%XMM12 |
(980) 0x491dbf VUNPCKHPD %XMM12,%XMM12,%XMM15 |
(980) 0x491dc4 VADDPD %XMM12,%XMM15,%XMM1 |
(980) 0x491dc9 TEST $0x1,%DIL |
(980) 0x491dcd JE 491de4 |
(980) 0x491dcf AND $-0x2,%EDI |
(980) 0x491dd2 ADD %EDI,%EAX |
(980) 0x491dd4 CLTQ |
(980) 0x491dd6 ADD %R8,%RCX |
(980) 0x491dd9 VMOVSD (%RDX,%RAX,8),%XMM2 |
(980) 0x491dde VFMADD231SD (%RCX,%RAX,8),%XMM2,%XMM1 |
(980) 0x491de4 MOV -0x110(%RBP),%RSI |
(980) 0x491deb MOV 0x118(%RSI),%RCX |
(980) 0x491df2 MOV 0x140(%RSI),%RDI |
(980) 0x491df9 IMUL %R10,%RCX |
(980) 0x491dfd CMPL $0x2,-0xdc(%RBP) |
(980) 0x491e04 JBE 49235a |
(980) 0x491e0a LEA (%RCX,%RCX,2),%R14 |
(980) 0x491e0e VXORPD %XMM3,%XMM3,%XMM3 |
(980) 0x491e12 LEA (%RDX,%RBX,1),%R15 |
(980) 0x491e16 MOV %RDX,%RSI |
(980) 0x491e19 LEA (%RDI,%R14,8),%RAX |
(980) 0x491e1d LEA -0x20(%RBX),%R14 |
(980) 0x491e21 VMOVAPD %YMM3,%YMM11 |
(980) 0x491e25 SHR $0x5,%R14 |
(980) 0x491e29 VMOVAPD %YMM3,%YMM0 |
(980) 0x491e2d INC %R14 |
(980) 0x491e30 AND $0x3,%R14D |
(980) 0x491e34 JE 491f54 |
(980) 0x491e3a CMP $0x1,%R14 |
(980) 0x491e3e JE 491ef5 |
(980) 0x491e44 CMP $0x2,%R14 |
(980) 0x491e48 JE 491e9f |
(980) 0x491e4a VMOVUPD (%RAX),%YMM0 |
(980) 0x491e4e VMOVUPD 0x20(%RAX),%YMM13 |
(980) 0x491e53 LEA 0x20(%RDX),%RSI |
(980) 0x491e57 ADD $0x60,%RAX |
(980) 0x491e5b VMOVUPD -0x20(%RAX),%YMM12 |
(980) 0x491e60 VMOVUPD (%RDX),%YMM2 |
(980) 0x491e64 VMOVAPD %YMM0,%YMM11 |
(980) 0x491e68 VMOVAPD %YMM0,%YMM10 |
(980) 0x491e6c VPERMT2PD %YMM13,%YMM5,%YMM0 |
(980) 0x491e72 VPERMT2PD %YMM13,%YMM9,%YMM11 |
(980) 0x491e78 VPERMT2PD %YMM13,%YMM7,%YMM10 |
(980) 0x491e7e VPERMT2PD %YMM12,%YMM4,%YMM0 |
(980) 0x491e84 VPERMT2PD %YMM12,%YMM8,%YMM11 |
(980) 0x491e8a VFMADD132PD %YMM2,%YMM3,%YMM0 |
(980) 0x491e8f VPERMT2PD %YMM12,%YMM6,%YMM10 |
(980) 0x491e95 VFMADD132PD %YMM2,%YMM3,%YMM11 |
(980) 0x491e9a VFMADD231PD %YMM2,%YMM10,%YMM3 |
(980) 0x491e9f VMOVUPD (%RAX),%YMM2 |
(980) 0x491ea3 VMOVUPD 0x20(%RAX),%YMM15 |
(980) 0x491ea8 ADD $0x20,%RSI |
(980) 0x491eac ADD $0x60,%RAX |
(980) 0x491eb0 VMOVUPD -0x20(%RAX),%YMM14 |
(980) 0x491eb5 VMOVUPD -0x20(%RSI),%YMM10 |
(980) 0x491eba VMOVAPD %YMM2,%YMM13 |
(980) 0x491ebe VMOVAPD %YMM2,%YMM12 |
(980) 0x491ec2 VPERMT2PD %YMM15,%YMM5,%YMM2 |
(980) 0x491ec8 VPERMT2PD %YMM15,%YMM9,%YMM13 |
(980) 0x491ece VPERMT2PD %YMM15,%YMM7,%YMM12 |
(980) 0x491ed4 VPERMT2PD %YMM14,%YMM4,%YMM2 |
(980) 0x491eda VPERMT2PD %YMM14,%YMM8,%YMM13 |
(980) 0x491ee0 VFMADD231PD %YMM10,%YMM2,%YMM0 |
(980) 0x491ee5 VPERMT2PD %YMM14,%YMM6,%YMM12 |
(980) 0x491eeb VFMADD231PD %YMM10,%YMM13,%YMM11 |
(980) 0x491ef0 VFMADD231PD %YMM10,%YMM12,%YMM3 |
(980) 0x491ef5 VMOVUPD (%RAX),%YMM2 |
(980) 0x491ef9 VMOVUPD 0x20(%RAX),%YMM15 |
(980) 0x491efe ADD $0x20,%RSI |
(980) 0x491f02 ADD $0x60,%RAX |
(980) 0x491f06 VMOVUPD -0x20(%RAX),%YMM14 |
(980) 0x491f0b VMOVUPD -0x20(%RSI),%YMM10 |
(980) 0x491f10 VMOVAPD %YMM2,%YMM13 |
(980) 0x491f14 VMOVAPD %YMM2,%YMM12 |
(980) 0x491f18 VPERMT2PD %YMM15,%YMM5,%YMM2 |
(980) 0x491f1e VPERMT2PD %YMM15,%YMM9,%YMM13 |
(980) 0x491f24 VPERMT2PD %YMM15,%YMM7,%YMM12 |
(980) 0x491f2a VPERMT2PD %YMM14,%YMM4,%YMM2 |
(980) 0x491f30 VPERMT2PD %YMM14,%YMM8,%YMM13 |
(980) 0x491f36 VFMADD231PD %YMM10,%YMM2,%YMM0 |
(980) 0x491f3b VPERMT2PD %YMM14,%YMM6,%YMM12 |
(980) 0x491f41 VFMADD231PD %YMM10,%YMM13,%YMM11 |
(980) 0x491f46 VFMADD231PD %YMM10,%YMM12,%YMM3 |
(980) 0x491f4b CMP %R15,%RSI |
(980) 0x491f4e JE 4920b8 |
(981) 0x491f54 VMOVUPD (%RAX),%YMM2 |
(981) 0x491f58 VMOVUPD 0x20(%RAX),%YMM15 |
(981) 0x491f5d SUB $-0x80,%RSI |
(981) 0x491f61 ADD $0x180,%RAX |
(981) 0x491f67 VMOVUPD -0x140(%RAX),%YMM14 |
(981) 0x491f6f VMOVUPD -0x80(%RSI),%YMM13 |
(981) 0x491f74 VMOVAPD %YMM2,%YMM10 |
(981) 0x491f78 VMOVAPD %YMM2,%YMM12 |
(981) 0x491f7c VPERMT2PD %YMM15,%YMM5,%YMM2 |
(981) 0x491f82 VMOVUPD -0x100(%RAX),%YMM16 |
(981) 0x491f89 VPERMT2PD %YMM15,%YMM7,%YMM10 |
(981) 0x491f8f VPERMT2PD %YMM15,%YMM9,%YMM12 |
(981) 0x491f95 VPERMT2PD %YMM14,%YMM4,%YMM2 |
(981) 0x491f9b VMOVUPD -0xa0(%RAX),%YMM17 |
(981) 0x491fa2 VPERMT2PD %YMM14,%YMM6,%YMM10 |
(981) 0x491fa8 VFMADD231PD %YMM13,%YMM2,%YMM0 |
(981) 0x491fad VPERMT2PD %YMM14,%YMM8,%YMM12 |
(981) 0x491fb3 VMOVUPD -0xe0(%RAX),%YMM2 |
(981) 0x491fbb VFMADD231PD %YMM13,%YMM10,%YMM3 |
(981) 0x491fc0 VMOVUPD -0x120(%RAX),%YMM10 |
(981) 0x491fc8 VFMADD132PD %YMM13,%YMM11,%YMM12 |
(981) 0x491fcd VMOVUPD -0x60(%RSI),%YMM14 |
(981) 0x491fd2 VMOVUPD -0x40(%RAX),%YMM15 |
(981) 0x491fd7 VMOVAPD %YMM10,%YMM11 |
(981) 0x491fdc VMOVAPD %YMM10,%YMM13 |
(981) 0x491fe1 VPERMT2PD %YMM16,%YMM5,%YMM10 |
(981) 0x491fe7 VPERMT2PD %YMM16,%YMM7,%YMM11 |
(981) 0x491fed VPERMT2PD %YMM16,%YMM9,%YMM13 |
(981) 0x491ff3 VPERMT2PD %YMM2,%YMM4,%YMM10 |
(981) 0x491ff9 VPERMT2PD %YMM2,%YMM6,%YMM11 |
(981) 0x491fff VFMADD132PD %YMM14,%YMM0,%YMM10 |
(981) 0x492004 VPERMT2PD %YMM2,%YMM8,%YMM13 |
(981) 0x49200a VMOVUPD -0xc0(%RAX),%YMM2 |
(981) 0x492012 VFMADD132PD %YMM14,%YMM3,%YMM11 |
(981) 0x492017 VMOVUPD -0x80(%RAX),%YMM0 |
(981) 0x49201c VFMADD132PD %YMM14,%YMM12,%YMM13 |
(981) 0x492021 VMOVUPD -0x40(%RSI),%YMM14 |
(981) 0x492026 VMOVAPD %YMM2,%YMM3 |
(981) 0x49202a VMOVAPD %YMM2,%YMM12 |
(981) 0x49202e VPERMT2PD %YMM17,%YMM5,%YMM2 |
(981) 0x492034 VPERMT2PD %YMM17,%YMM7,%YMM3 |
(981) 0x49203a VPERMT2PD %YMM17,%YMM9,%YMM12 |
(981) 0x492040 VPERMT2PD %YMM0,%YMM4,%YMM2 |
(981) 0x492046 VPERMT2PD %YMM0,%YMM6,%YMM3 |
(981) 0x49204c VPERMT2PD %YMM0,%YMM8,%YMM12 |
(981) 0x492052 VMOVUPD -0x60(%RAX),%YMM0 |
(981) 0x492057 VFMADD132PD %YMM14,%YMM10,%YMM2 |
(981) 0x49205c VFMADD132PD %YMM14,%YMM11,%YMM3 |
(981) 0x492061 VFMADD132PD %YMM14,%YMM13,%YMM12 |
(981) 0x492066 VMOVAPD %YMM0,%YMM11 |
(981) 0x49206a VMOVUPD -0x20(%RAX),%YMM14 |
(981) 0x49206f VMOVUPD -0x20(%RSI),%YMM13 |
(981) 0x492074 VPERMT2PD %YMM15,%YMM9,%YMM11 |
(981) 0x49207a VPERMT2PD %YMM14,%YMM8,%YMM11 |
(981) 0x492080 VMOVAPD %YMM3,%YMM10 |
(981) 0x492084 VFMADD132PD %YMM13,%YMM12,%YMM11 |
(981) 0x492089 VMOVAPD %YMM0,%YMM3 |
(981) 0x49208d VPERMT2PD %YMM15,%YMM7,%YMM3 |
(981) 0x492093 VPERMT2PD %YMM15,%YMM5,%YMM0 |
(981) 0x492099 VPERMT2PD %YMM14,%YMM6,%YMM3 |
(981) 0x49209f VPERMT2PD %YMM14,%YMM4,%YMM0 |
(981) 0x4920a5 VFMADD132PD %YMM13,%YMM2,%YMM0 |
(981) 0x4920aa VFMADD132PD %YMM13,%YMM10,%YMM3 |
(981) 0x4920af CMP %R15,%RSI |
(981) 0x4920b2 JNE 491f54 |
(980) 0x4920b8 VEXTRACTF64X2 $0x1,%YMM3,%XMM13 |
(980) 0x4920bf VEXTRACTF64X2 $0x1,%YMM11,%XMM14 |
(980) 0x4920c6 VADDPD %XMM3,%XMM13,%XMM12 |
(980) 0x4920ca VADDPD %XMM11,%XMM14,%XMM15 |
(980) 0x4920cf VUNPCKHPD %XMM12,%XMM12,%XMM2 |
(980) 0x4920d4 VADDPD %XMM12,%XMM2,%XMM10 |
(980) 0x4920d9 VUNPCKHPD %XMM15,%XMM15,%XMM12 |
(980) 0x4920de VADDPD %XMM15,%XMM12,%XMM12 |
(980) 0x4920e3 VEXTRACTF64X2 $0x1,%YMM0,%XMM15 |
(980) 0x4920ea VADDPD %XMM0,%XMM15,%XMM18 |
(980) 0x4920f0 VUNPCKHPD %XMM18,%XMM18,%XMM2 |
(980) 0x4920f6 VADDPD %XMM18,%XMM2,%XMM2 |
(980) 0x4920fc VUNPCKLPD %XMM12,%XMM2,%XMM2 |
(980) 0x492101 CMP %R13D,-0xc8(%RBP) |
(980) 0x492108 JE 4921e8 |
(980) 0x49210e VADDPD %XMM0,%XMM15,%XMM15 |
(980) 0x492112 VADDPD %XMM11,%XMM14,%XMM14 |
(980) 0x492117 MOV %R13D,%ESI |
(980) 0x49211a MOV %R13D,%EAX |
(980) 0x49211d VADDPD %XMM3,%XMM13,%XMM13 |
(980) 0x492121 MOV -0xc8(%RBP),%R15D |
(980) 0x492128 SUB %ESI,%R15D |
(980) 0x49212b MOV %R15D,-0xd8(%RBP) |
(980) 0x492132 CMP $0x1,%R15D |
(980) 0x492136 JE 4921c6 |
(980) 0x49213c LEA (%RSI,%RCX,1),%R14 |
(980) 0x492140 ADD %R11,%RSI |
(980) 0x492143 LEA (%R14,%R14,2),%R15 |
(980) 0x492147 MOV -0xd0(%RBP),%R14 |
(980) 0x49214e LEA (%RDI,%R15,8),%R15 |
(980) 0x492152 VMOVUPD (%R15),%XMM3 |
(980) 0x492157 VMOVUPD 0x10(%R15),%XMM11 |
(980) 0x49215d VMOVUPD 0x20(%R15),%XMM12 |
(980) 0x492163 VMOVUPD (%R14,%RSI,8),%XMM0 |
(980) 0x492169 VPERMILPD $0x1,%XMM3,%XMM10 |
(980) 0x49216f MOV -0xd8(%RBP),%R15D |
(980) 0x492176 VUNPCKLPD %XMM12,%XMM10,%XMM2 |
(980) 0x49217b VUNPCKLPD %XMM3,%XMM11,%XMM10 |
(980) 0x49217f VBLENDPD $0x2,%XMM11,%XMM3,%XMM3 |
(980) 0x492185 VFMADD132PD %XMM0,%XMM15,%XMM3 |
(980) 0x49218a VFMADD132PD %XMM0,%XMM14,%XMM2 |
(980) 0x49218f VBLENDPD $0x2,%XMM12,%XMM10,%XMM12 |
(980) 0x492195 MOV %R15D,%ESI |
(980) 0x492198 VFMADD132PD %XMM12,%XMM13,%XMM0 |
(980) 0x49219d AND $0x1,%ESI |
(980) 0x4921a0 VUNPCKHPD %XMM2,%XMM2,%XMM14 |
(980) 0x4921a4 VUNPCKHPD %XMM0,%XMM0,%XMM13 |
(980) 0x4921a8 VADDPD %XMM2,%XMM14,%XMM15 |
(980) 0x4921ac VADDPD %XMM0,%XMM13,%XMM10 |
(980) 0x4921b0 VUNPCKHPD %XMM3,%XMM3,%XMM0 |
(980) 0x4921b4 VADDPD %XMM3,%XMM0,%XMM11 |
(980) 0x4921b8 VUNPCKLPD %XMM15,%XMM11,%XMM2 |
(980) 0x4921bd JE 4921e8 |
(980) 0x4921bf AND $-0x2,%R15D |
(980) 0x4921c3 ADD %R15D,%EAX |
(980) 0x4921c6 CLTQ |
(980) 0x4921c8 ADD %RAX,%RCX |
(980) 0x4921cb VMOVDDUP (%RDX,%RAX,8),%XMM12 |
(980) 0x4921d0 LEA (%RCX,%RCX,2),%RCX |
(980) 0x4921d4 LEA (%RDI,%RCX,8),%RDI |
(980) 0x4921d8 VMOVSD 0x10(%RDI),%XMM3 |
(980) 0x4921dd VFMADD231PD (%RDI),%XMM12,%XMM2 |
(980) 0x4921e2 VFMADD231SD (%RDX,%RAX,8),%XMM3,%XMM10 |
(980) 0x4921e8 VMULSD %XMM2,%XMM2,%XMM3 |
(980) 0x4921ec VUNPCKHPD %XMM2,%XMM2,%XMM0 |
(980) 0x4921f0 VMULSD %XMM0,%XMM0,%XMM0 |
(980) 0x4921f4 VMULSD %XMM10,%XMM10,%XMM11 |
(980) 0x4921f9 VADDPD (%R9),%XMM2,%XMM2 |
(980) 0x4921fe VADDSD 0x10(%R9),%XMM10,%XMM10 |
(980) 0x492204 ADD $0x18,%R9 |
(980) 0x492208 MOV -0x100(%RBP),%RAX |
(980) 0x49220f VADDSD %XMM0,%XMM3,%XMM14 |
(980) 0x492213 MOV -0x108(%RBP),%R14 |
(980) 0x49221a MOV -0xf0(%RBP),%R15 |
(980) 0x492221 MOV -0xf8(%RBP),%RSI |
(980) 0x492228 VMOVUPD %XMM2,-0x18(%R9) |
(980) 0x49222e MOV -0xe8(%RBP),%RCX |
(980) 0x492235 ADD %R14,%RDX |
(980) 0x492238 VMOVSD %XMM10,-0x8(%R9) |
(980) 0x49223e ADD %R15,%R11 |
(980) 0x492241 ADD %RSI,%R12 |
(980) 0x492244 VADDSD (%RAX,%R10,8),%XMM1,%XMM1 |
(980) 0x49224a VSUBSD %XMM14,%XMM1,%XMM15 |
(980) 0x49224f VSUBSD %XMM11,%XMM15,%XMM11 |
(980) 0x492254 VMOVSD %XMM11,(%RAX,%R10,8) |
(980) 0x49225a INC %R10 |
(980) 0x49225d CMP %RCX,%R10 |
(980) 0x492260 JNE 491bb0 |
0x492266 VZEROUPPER |
0x492269 LEA -0x30(%RBP),%RSP |
0x49226d POP %RBX |
0x49226e POP %RSI |
0x49226f POP %R12 |
0x492271 POP %R13 |
0x492273 POP %R14 |
0x492275 POP %R15 |
0x492277 POP %RBP |
0x492278 LEA -0x8(%RSI),%RSP |
0x49227c RET |
(980) 0x49227d MOV -0x110(%RBP),%RCX |
(980) 0x492284 MOV 0x140(%RCX),%RDI |
(980) 0x49228b MOV 0x118(%RCX),%RCX |
(980) 0x492292 IMUL %R10,%RCX |
(980) 0x492296 JMP 491e0a |
0x49229b MOV 0x100(%R15),%RBX |
0x4922a2 MOV 0x140(%R15),%R8 |
0x4922a9 LEA (%RAX,%RAX,2),%R9 |
0x4922ad MOV -0xd8(%RBP),%RDX |
0x4922b4 MOV -0x100(%RBP),%R10 |
0x4922bb VMOVSD (%RBX),%XMM9 |
0x4922bf MOV 0x180(%R15),%R11 |
0x4922c6 MOV 0x18(%RDX),%R13 |
0x4922ca MOV 0x18(%R10),%RDI |
0x4922ce VMULSD 0x10(%R8),%XMM9,%XMM12 |
0x4922d4 VMOVDDUP %XMM9,%XMM8 |
0x4922d9 VMULPD (%R8),%XMM8,%XMM7 |
0x4922de LEA (%R13,%R9,8),%R12 |
0x4922e3 LEA (%RDI,%RAX,8),%RAX |
0x4922e7 VADDSD 0x10(%R12),%XMM12,%XMM3 |
0x4922ee VUNPCKHPD %XMM7,%XMM7,%XMM6 |
0x4922f2 VADDPD (%R12),%XMM7,%XMM4 |
0x4922f8 VMOVSD %XMM7,%XMM7,%XMM5 |
0x4922fc VMULSD %XMM6,%XMM6,%XMM13 |
0x492300 VMOVSD %XMM3,0x10(%R12) |
0x492307 VMOVUPD %XMM4,(%R12) |
0x49230d VFNMADD213SD (%RAX),%XMM12,%XMM12 |
0x492312 VFMADD132SD %XMM7,%XMM13,%XMM5 |
0x492317 VSUBSD %XMM5,%XMM12,%XMM0 |
0x49231b VFMADD132SD (%R11),%XMM0,%XMM9 |
0x492320 VMOVSD %XMM9,(%RAX) |
0x492324 JMP 492269 |
(980) 0x492329 VXORPD %XMM11,%XMM11,%XMM11 |
(980) 0x49232e VXORPD %XMM2,%XMM2,%XMM2 |
(980) 0x492332 VMOVSD %XMM11,%XMM11,%XMM0 |
(980) 0x492336 VMOVSD %XMM11,%XMM11,%XMM3 |
(980) 0x49233a VMOVSD %XMM11,%XMM11,%XMM1 |
(980) 0x49233e VMOVSD %XMM11,%XMM11,%XMM10 |
(980) 0x492343 JMP 4921f9 |
(980) 0x492348 VXORPD %XMM12,%XMM12,%XMM12 |
(980) 0x49234d XOR %ESI,%ESI |
(980) 0x49234f VXORPD %XMM1,%XMM1,%XMM1 |
(980) 0x492353 XOR %EAX,%EAX |
(980) 0x492355 JMP 491d98 |
(980) 0x49235a VXORPD %XMM2,%XMM2,%XMM2 |
(980) 0x49235e XOR %ESI,%ESI |
(980) 0x492360 VXORPD %XMM10,%XMM10,%XMM10 |
(980) 0x492365 XOR %EAX,%EAX |
(980) 0x492367 VMOVAPD %XMM2,%XMM13 |
(980) 0x49236b VMOVAPD %XMM2,%XMM14 |
(980) 0x49236f VMOVAPD %XMM2,%XMM15 |
(980) 0x492373 JMP 492121 |
0x492378 MOV 0x468(%R15),%RDI |
0x49237f MOV %RSI,%R13 |
0x492382 CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x492387 MOV 0x470(%R15),%R12 |
0x49238e MOV 0x47c(%R15),%ECX |
0x492395 MOV 0x478(%R15),%R14D |
0x49239c MOV (%R12),%RAX |
0x4923a0 MOV 0x28(%RAX),%RBX |
0x4923a4 CMP $0x475470,%RBX |
0x4923ab JNE 492549 |
0x4923b1 CMP %R14D,%ECX |
0x4923b4 JLE 4924e9 |
0x4923ba SUB %R14D,%ECX |
0x4923bd XOR %EBX,%EBX |
0x4923bf MOV %RCX,-0xc8(%RBP) |
0x4923c6 NOPW %CS:(%RAX,%RAX,1) |
(983) 0x4923d0 MOV 0x98(%R15),%R8 |
(983) 0x4923d7 MOV 0x118(%R15),%R10 |
(983) 0x4923de MOVQ $0x58c7c8,-0xc0(%RBP) |
(983) 0x4923e9 MOV 0x158(%R15),%RCX |
(983) 0x4923f0 MOV 0xc0(%R15),%R9 |
(983) 0x4923f7 MOVQ $0x58bc98,-0x90(%RBP) |
(983) 0x492402 IMUL %RBX,%R8 |
(983) 0x492406 MOV 0x140(%R15),%RAX |
(983) 0x49240d MOVSXD 0x8(%R12),%RSI |
(983) 0x492412 MOVQ $0x58c7c8,-0x60(%RBP) |
(983) 0x49241a IMUL %RBX,%R10 |
(983) 0x49241e MOVQ $0,-0xb0(%RBP) |
(983) 0x492429 IMUL %RBX,%RCX |
(983) 0x49242d MOV %RSI,-0xb8(%RBP) |
(983) 0x492434 LEA (%R9,%R8,8),%R11 |
(983) 0x492438 MOV 0x180(%R15),%R8 |
(983) 0x49243f MOV %RSI,-0x88(%RBP) |
(983) 0x492446 LEA (%R10,%R10,2),%RDI |
(983) 0x49244a MOV %R11,-0xa8(%RBP) |
(983) 0x492451 MOV (%R12),%R11 |
(983) 0x492455 LEA (%R8,%RCX,8),%R9 |
(983) 0x492459 LEA (%RAX,%RDI,8),%RDX |
(983) 0x49245d MOV %RSI,-0x58(%RBP) |
(983) 0x492461 MOV %R12,%RDI |
(983) 0x492464 MOV %RDX,-0x78(%RBP) |
(983) 0x492468 LEA -0x90(%RBP),%R8 |
(983) 0x49246f LEA (%R14,%RBX,1),%EDX |
(983) 0x492473 MOV %R13,%RSI |
(983) 0x492476 MOV %R9,-0x48(%RBP) |
(983) 0x49247a LEA -0xc0(%RBP),%RCX |
(983) 0x492481 LEA -0x60(%RBP),%R9 |
(983) 0x492485 MOVQ $0,-0x80(%RBP) |
(983) 0x49248d MOVQ $0,-0x50(%RBP) |
(983) 0x492495 CALLQ 0x18(%R11) |
(983) 0x492499 MOV -0x50(%RBP),%RSI |
(983) 0x49249d MOVQ $0x58c7c8,-0x60(%RBP) |
(983) 0x4924a5 TEST %RSI,%RSI |
(983) 0x4924a8 JNE 492533 |
(983) 0x4924ae MOV -0x80(%RBP),%R10 |
(983) 0x4924b2 MOVQ $0x58bc98,-0x90(%RBP) |
(983) 0x4924bd TEST %R10,%R10 |
(983) 0x4924c0 JNE 492520 |
(983) 0x4924c2 MOV -0xb0(%RBP),%RDI |
(983) 0x4924c9 MOVQ $0x58c7c8,-0xc0(%RBP) |
(983) 0x4924d4 TEST %RDI,%RDI |
(983) 0x4924d7 JNE 4924fa |
(983) 0x4924d9 INC %RBX |
(983) 0x4924dc CMP %RBX,-0xc8(%RBP) |
(983) 0x4924e3 JNE 4923d0 |
0x4924e9 MOV 0x468(%R15),%RDI |
0x4924f0 CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x4924f5 JMP 491aaf |
(983) 0x4924fa LEA (,%RDI,8),%RSI |
(983) 0x492502 MOV -0xa8(%RBP),%RDI |
(983) 0x492509 INC %RBX |
(983) 0x49250c CALL 404060 <_ZdlPvm@plt> |
(983) 0x492511 CMP %RBX,-0xc8(%RBP) |
(983) 0x492518 JNE 4923d0 |
0x49251e JMP 4924e9 |
(983) 0x492520 LEA (%R10,%R10,2),%RSI |
(983) 0x492524 MOV -0x78(%RBP),%RDI |
(983) 0x492528 SAL $0x3,%RSI |
(983) 0x49252c CALL 404060 <_ZdlPvm@plt> |
(983) 0x492531 JMP 4924c2 |
(983) 0x492533 MOV -0x48(%RBP),%RDI |
(983) 0x492537 LEA (,%RSI,8),%RSI |
(983) 0x49253f CALL 404060 <_ZdlPvm@plt> |
(983) 0x492544 JMP 4924ae |
0x492549 SUB $0x8,%RSP |
0x49254d LEA 0x150(%R15),%RDX |
0x492554 LEA 0x110(%R15),%R9 |
0x49255b MOV %R13,%RSI |
0x49255e PUSH %RDX |
0x49255f LEA 0x90(%R15),%R8 |
0x492566 MOV %R14D,%EDX |
0x492569 MOV %R12,%RDI |
0x49256c CALL %RBX |
0x49256e POP %RDX |
0x49256f POP %RCX |
0x492570 JMP 4924e9 |
0x492575 MOV %RAX,%R15 |
0x492578 MOV -0x50(%RBP),%R13 |
0x49257c MOVQ $0x58c7c8,-0x60(%RBP) |
0x492584 TEST %R13,%R13 |
0x492587 JE 4925f1 |
0x492589 MOV -0x48(%RBP),%RDI |
0x49258d LEA (,%R13,8),%RSI |
0x492595 VZEROUPPER |
0x492598 CALL 404060 <_ZdlPvm@plt> |
(979) 0x49259d MOV -0x80(%RBP),%R12 |
(979) 0x4925a1 MOVQ $0x58bc98,-0x90(%RBP) |
(979) 0x4925ac TEST %R12,%R12 |
(979) 0x4925af JE 4925be |
(979) 0x4925b1 IMUL $0x18,%R12,%RSI |
(979) 0x4925b5 MOV -0x78(%RBP),%RDI |
(979) 0x4925b9 CALL 404060 <_ZdlPvm@plt> |
(979) 0x4925be MOV -0xb0(%RBP),%R14 |
(979) 0x4925c5 MOVQ $0x58c7c8,-0xc0(%RBP) |
(979) 0x4925d0 TEST %R14,%R14 |
(979) 0x4925d3 JE 4925e9 |
(979) 0x4925d5 MOV -0xa8(%RBP),%RDI |
(979) 0x4925dc LEA (,%R14,8),%RSI |
(979) 0x4925e4 CALL 404060 <_ZdlPvm@plt> |
(979) 0x4925e9 MOV %R15,%RDI |
(979) 0x4925ec CALL 404230 <_Unwind_Resume@plt> |
(979) 0x4925f1 VZEROUPPER |
(979) 0x4925f4 JMP 49259d |
0x4925f6 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | DiracDeterminantRef.cpp:152-181 |
Module | exec |
nb instructions | 143 |
nb uops | 150 |
loop length | 686 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 6 |
used zmm registers | 0 |
nb stack references | 15 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 25.00 cycles |
front end | 25.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.80 | 6.80 | 17.33 | 17.33 | 14.50 | 6.80 | 6.80 | 14.50 | 14.50 | 14.50 | 6.80 | 17.33 |
cycles | 6.80 | 6.80 | 17.33 | 17.33 | 14.50 | 6.80 | 6.80 | 14.50 | 14.50 | 14.50 | 6.80 | 17.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 24.50-24.55 |
Stall cycles | 0.00 |
Front-end | 25.00 |
Dispatch | 17.33 |
Overall L1 | 25.00 |
all | 30% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 28% |
all | 18% |
load | 28% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 26% |
load | 61% |
store | 6% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 20% |
load | 50% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 16% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 31% |
store | 12% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LEA 0x8(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
PUSHQ -0x8(%R10) | 2 | 0 | 0 | 0.33 | 0.33 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0.33 | 5-12 | 0.62 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R10 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xe0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EDI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 492378 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x908> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x484(%R15),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x478(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 49229b <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x82b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDX,%RBX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RBX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 492269 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7f9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x158(%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x100(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xd8(%R15),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xd8(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x480(%R15),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%R15),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R13,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%RBX),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x180(%R15),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0xfbf4d(%RIP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
SHR $0x2,%EBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0xfbf5b(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R14,%R9,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA 0xfbf6c(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0xfbf84(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x5,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0xfb771(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0xfb789(%RIP),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R10D,-0xdc(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x30(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
LEA -0x8(%RSI),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x100(%R15),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%R15),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xd8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x100(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RBX),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x180(%R15),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R10),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x10(%R8),%XMM9,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP %XMM9,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD (%R8),%XMM8,%XMM7 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
LEA (%R13,%R9,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RDI,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VADDSD 0x10(%R12),%XMM12,%XMM3 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VUNPCKHPD %XMM7,%XMM7,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD (%R12),%XMM7,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM7,%XMM7,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD %XMM6,%XMM6,%XMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM3,0x10(%R12) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM4,(%R12) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFNMADD213SD (%RAX),%XMM12,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD132SD %XMM7,%XMM13,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM5,%XMM12,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD (%R11),%XMM0,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM9,(%RAX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 492269 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7f9> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x468(%R15),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x470(%R15),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x47c(%R15),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x478(%R15),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x475470,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 492549 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xad9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4924e9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xa79> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x468(%R15),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
JMP 491aaf <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x3f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
JMP 4924e9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xa79> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x150(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x110(%R15),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RDX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
LEA 0x90(%R15),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL %RBX | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
POP %RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4924e9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xa79> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RAX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x50(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0x58c7c8,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 4925f1 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xb81> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R13,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404060 <_ZdlPvm@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | DiracDeterminantRef.cpp:152-181 |
Module | exec |
nb instructions | 143 |
nb uops | 150 |
loop length | 686 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 6 |
used zmm registers | 0 |
nb stack references | 15 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 25.00 cycles |
front end | 25.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.80 | 6.80 | 17.33 | 17.33 | 14.50 | 6.80 | 6.80 | 14.50 | 14.50 | 14.50 | 6.80 | 17.33 |
cycles | 6.80 | 6.80 | 17.33 | 17.33 | 14.50 | 6.80 | 6.80 | 14.50 | 14.50 | 14.50 | 6.80 | 17.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 24.50-24.55 |
Stall cycles | 0.00 |
Front-end | 25.00 |
Dispatch | 17.33 |
Overall L1 | 25.00 |
all | 30% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 28% |
all | 18% |
load | 28% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 26% |
load | 61% |
store | 6% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 20% |
load | 50% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 16% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 31% |
store | 12% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LEA 0x8(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
PUSHQ -0x8(%R10) | 2 | 0 | 0 | 0.33 | 0.33 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0.33 | 5-12 | 0.62 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R10 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xe0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EDI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 492378 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x908> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x484(%R15),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x478(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 49229b <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x82b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDX,%RBX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RBX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 492269 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7f9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x158(%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x100(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xd8(%R15),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xd8(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x480(%R15),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%R15),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R13,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%RBX),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x180(%R15),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0xfbf4d(%RIP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
SHR $0x2,%EBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0xfbf5b(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R14,%R9,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA 0xfbf6c(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0xfbf84(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x5,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0xfb771(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0xfb789(%RIP),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R10D,-0xdc(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x30(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
LEA -0x8(%RSI),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x100(%R15),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%R15),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xd8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x100(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RBX),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x180(%R15),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R10),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x10(%R8),%XMM9,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP %XMM9,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD (%R8),%XMM8,%XMM7 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
LEA (%R13,%R9,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RDI,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VADDSD 0x10(%R12),%XMM12,%XMM3 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VUNPCKHPD %XMM7,%XMM7,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD (%R12),%XMM7,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM7,%XMM7,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD %XMM6,%XMM6,%XMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM3,0x10(%R12) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM4,(%R12) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFNMADD213SD (%RAX),%XMM12,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD132SD %XMM7,%XMM13,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM5,%XMM12,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD (%R11),%XMM0,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM9,(%RAX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 492269 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x7f9> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x468(%R15),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x470(%R15),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x47c(%R15),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x478(%R15),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x475470,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 492549 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xad9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4924e9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xa79> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x468(%R15),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
JMP 491aaf <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x3f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
JMP 4924e9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xa79> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x150(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x110(%R15),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RDX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
LEA 0x90(%R15),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL %RBX | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
POP %RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4924e9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xa79> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RAX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x50(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0x58c7c8,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 4925f1 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0xb81> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R13,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404060 <_ZdlPvm@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::DiracDeterminantRef | 0.51 | 0.5 |
○Loop 979 - new_allocator.h:100-168 - exec | 0 | 0 |
▼Loop 980 - DiracDeterminantRef.cpp:173-178 - exec– | 0 | 0 |
○Loop 981 - inner_product.hpp:155-155 - exec | 0.32 | 0.28 |
○Loop 982 - inner_product.hpp:82-83 - exec | 0.18 | 0.16 |
○Loop 983 - SPOSet.h:106-111 - exec | 0 | 0 |