Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLo ... | Module: exec | Source: DiracDeterminantRef.cpp:235-255 [...] | Coverage: 0.08% |
---|
Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLo ... | Module: exec | Source: DiracDeterminantRef.cpp:235-255 [...] | Coverage: 0.08% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 217 - 217 |
-------------------------------------------------------------------------------- |
217: inline Type_t* operator[](size_type i) { return X.data() + i * D2; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 155 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 235 - 255 |
-------------------------------------------------------------------------------- |
235: { |
236: recompute(P); |
237: |
238: if (NumPtcls == 1) |
239: { |
240: ValueType y = psiM(0, 0); |
241: GradType rv = y * dpsiM(0, 0); |
242: G[FirstIndex] += rv; |
243: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
244: } |
245: else |
246: { |
247: for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++) |
248: { |
249: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
250: mValueType lap = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
251: G[iat] += rv; |
252: L[iat] += lap - dot(rv, rv); |
253: } |
254: } |
255: return LogValue; |
0x272ac0 PUSH %RBP |
0x272ac1 MOV %RSP,%RBP |
0x272ac4 PUSH %R15 |
0x272ac6 PUSH %R14 |
0x272ac8 PUSH %R13 |
0x272aca PUSH %R12 |
0x272acc PUSH %RBX |
0x272acd SUB $0x38,%RSP |
0x272ad1 MOV %RCX,%R14 |
0x272ad4 MOV %RDX,%R15 |
0x272ad7 MOV %RDI,%RBX |
0x272ada CALL 275320 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> |
0x272adf MOV 0x484(%RBX),%EDX |
0x272ae5 CMP $0x1,%RDX |
0x272ae9 JNE 272b5d |
0x272aeb MOV 0x100(%RBX),%RAX |
0x272af2 MOV 0x140(%RBX),%RCX |
0x272af9 MOV 0x18(%R15),%RDX |
0x272afd VMOVDDUP (%RAX),%XMM0 |
0x272b01 MOVSXD 0x478(%RBX),%RAX |
0x272b08 VMULPD (%RCX),%XMM0,%XMM2 |
0x272b0c VMULSD 0x10(%RCX),%XMM0,%XMM1 |
0x272b11 LEA (%RAX,%RAX,2),%RSI |
0x272b15 VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 |
0x272b1a VMULPD %XMM2,%XMM2,%XMM2 |
0x272b1e VMOVUPD %XMM3,(%RDX,%RSI,8) |
0x272b23 VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 |
0x272b29 VMOVSD %XMM3,0x10(%RDX,%RSI,8) |
0x272b2f VPERMILPD $0x1,%XMM2,%XMM3 |
0x272b35 MOV 0x180(%RBX),%RCX |
0x272b3c VADDSD %XMM2,%XMM3,%XMM2 |
0x272b40 MOV 0x18(%R14),%RDX |
0x272b44 VFMADD231SD %XMM1,%XMM1,%XMM2 |
0x272b49 VFMSUB231SD (%RCX),%XMM0,%XMM2 |
0x272b4e VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 |
0x272b53 VMOVSD %XMM0,(%RDX,%RAX,8) |
0x272b58 JMP 2731f9 |
0x272b5d TEST %EDX,%EDX |
0x272b5f JLE 2731f9 |
0x272b65 MOV 0x480(%RBX),%ECX |
0x272b6b TEST %ECX,%ECX |
0x272b6d JLE 2731f9 |
0x272b73 MOV 0x118(%RBX),%RAX |
0x272b7a MOV 0x18(%R14),%R14 |
0x272b7e VMOVAPD -0x6e566(%RIP),%YMM1 |
0x272b86 VMOVAPD -0x6e60e(%RIP),%YMM2 |
0x272b8e VMOVAPD -0x6e5f6(%RIP),%YMM3 |
0x272b96 VMOVAPD -0x681a0(%RIP),%ZMM4 |
0x272ba0 VMOVAPD -0x6816a(%RIP),%ZMM5 |
0x272baa VMOVAPD -0x68134(%RIP),%ZMM6 |
0x272bb4 VMOVAPD -0x680fe(%RIP),%ZMM7 |
0x272bbe VMOVAPD -0x680c8(%RIP),%ZMM8 |
0x272bc8 VMOVAPD -0x68092(%RIP),%ZMM9 |
0x272bd2 MOV 0x158(%RBX),%R11 |
0x272bd9 MOV 0xd8(%RBX),%RDI |
0x272be0 MOV 0x100(%RBX),%RSI |
0x272be7 MOV 0x180(%RBX),%R8 |
0x272bee MOV 0x18(%R15),%R10 |
0x272bf2 MOV 0x140(%RBX),%R9 |
0x272bf9 MOV %ECX,%R15D |
0x272bfc MOV %ECX,%R12D |
0x272bff AND $-0x20,%R15D |
0x272c03 AND $-0x4,%R12D |
0x272c07 VXORPD %XMM0,%XMM0,%XMM0 |
0x272c0b MOV %RDX,-0x58(%RBP) |
0x272c0f MOV %R15,-0x30(%RBP) |
0x272c13 XOR %R15D,%R15D |
0x272c16 SAL $0x3,%RAX |
0x272c1a MOV %R14,-0x40(%RBP) |
0x272c1e MOVSXD 0x478(%RBX),%R14 |
0x272c25 SAL $0x3,%R11 |
0x272c29 SAL $0x3,%RDI |
0x272c2d LEA 0xc0(%RSI),%R13 |
0x272c34 LEA (%RAX,%RAX,2),%RAX |
0x272c38 MOV %R11,-0x48(%RBP) |
0x272c3c LEA 0xc0(%R8),%R11 |
0x272c43 MOV %RDI,-0x50(%RBP) |
0x272c47 MOV %RAX,-0x38(%RBP) |
0x272c4b MOV %RCX,%RAX |
0x272c4e SHR $0x5,%RAX |
0x272c52 SAL $0x8,%RAX |
0x272c56 JMP 272cd0 |
0x272c58 NOPL (%RAX,%RAX,1) |
(1040) 0x272c60 LEA (%R14,%R14,2),%RDX |
(1040) 0x272c64 MOV -0x48(%RBP),%RDI |
(1040) 0x272c68 ADD -0x38(%RBP),%R9 |
(1040) 0x272c6c INC %R15 |
(1040) 0x272c6f VADDPD (%R10,%RDX,8),%XMM11,%XMM13 |
(1040) 0x272c75 VMULPD %XMM11,%XMM11,%XMM11 |
(1040) 0x272c7a ADD %RDI,%R11 |
(1040) 0x272c7d ADD %RDI,%R8 |
(1040) 0x272c80 VMOVUPD %XMM13,(%R10,%RDX,8) |
(1040) 0x272c86 VADDSD 0x10(%R10,%RDX,8),%XMM10,%XMM13 |
(1040) 0x272c8d VMOVSD %XMM13,0x10(%R10,%RDX,8) |
(1040) 0x272c94 VPERMILPD $0x1,%XMM11,%XMM13 |
(1040) 0x272c9a MOV -0x40(%RBP),%RDX |
(1040) 0x272c9e VADDSD %XMM11,%XMM13,%XMM11 |
(1040) 0x272ca3 VFNMSUB231SD %XMM10,%XMM10,%XMM11 |
(1040) 0x272ca8 VADDSD %XMM11,%XMM12,%XMM10 |
(1040) 0x272cad VADDSD (%RDX,%R14,8),%XMM10,%XMM10 |
(1040) 0x272cb3 VMOVSD %XMM10,(%RDX,%R14,8) |
(1040) 0x272cb9 MOV -0x50(%RBP),%RDX |
(1040) 0x272cbd INC %R14 |
(1040) 0x272cc0 ADD %RDX,%R13 |
(1040) 0x272cc3 ADD %RDX,%RSI |
(1040) 0x272cc6 CMP %R15,-0x58(%RBP) |
(1040) 0x272cca JE 2731f9 |
(1040) 0x272cd0 CMP $0x4,%ECX |
(1040) 0x272cd3 JAE 272cf0 |
(1040) 0x272cd5 VXORPD %XMM11,%XMM11,%XMM11 |
(1040) 0x272cda XOR %EDX,%EDX |
(1040) 0x272cdc JMP 273080 |
0x272ce1 NOPW %CS:(%RAX,%RAX,1) |
(1040) 0x272cf0 CMP $0x20,%ECX |
(1040) 0x272cf3 JAE 272d10 |
(1040) 0x272cf5 VXORPD %XMM11,%XMM11,%XMM11 |
(1040) 0x272cfa VXORPD %XMM10,%XMM10,%XMM10 |
(1040) 0x272cff XOR %EDX,%EDX |
(1040) 0x272d01 JMP 272fb4 |
0x272d06 NOPW %CS:(%RAX,%RAX,1) |
(1040) 0x272d10 VXORPD %XMM11,%XMM11,%XMM11 |
(1040) 0x272d15 VXORPD %XMM12,%XMM12,%XMM12 |
(1040) 0x272d1a VXORPD %XMM13,%XMM13,%XMM13 |
(1040) 0x272d1f VXORPD %XMM14,%XMM14,%XMM14 |
(1040) 0x272d24 VXORPD %XMM15,%XMM15,%XMM15 |
(1040) 0x272d29 VXORPD %XMM16,%XMM16,%XMM16 |
(1040) 0x272d2f VXORPD %XMM17,%XMM17,%XMM17 |
(1040) 0x272d35 VXORPD %XMM18,%XMM18,%XMM18 |
(1040) 0x272d3b VXORPD %XMM10,%XMM10,%XMM10 |
(1040) 0x272d40 VXORPD %XMM19,%XMM19,%XMM19 |
(1040) 0x272d46 VXORPD %XMM20,%XMM20,%XMM20 |
(1040) 0x272d4c VXORPD %XMM21,%XMM21,%XMM21 |
(1040) 0x272d52 XOR %EDX,%EDX |
(1040) 0x272d54 MOV %R9,%RDI |
(1040) 0x272d57 NOPW (%RAX,%RAX,1) |
(1038) 0x272d60 VMOVUPD (%RDI),%ZMM23 |
(1038) 0x272d66 VMOVUPD 0x40(%RDI),%ZMM22 |
(1038) 0x272d6d VMOVUPD 0x80(%RDI),%ZMM27 |
(1038) 0x272d74 VMOVUPD -0xc0(%R13,%RDX,1),%ZMM26 |
(1038) 0x272d7c VMOVUPD -0x80(%R13,%RDX,1),%ZMM28 |
(1038) 0x272d84 VMOVAPD %ZMM23,%ZMM24 |
(1038) 0x272d8a VPERMT2PD %ZMM22,%ZMM4,%ZMM24 |
(1038) 0x272d90 VMOVAPD %ZMM23,%ZMM25 |
(1038) 0x272d96 VPERMT2PD %ZMM22,%ZMM6,%ZMM25 |
(1038) 0x272d9c VPERMT2PD %ZMM23,%ZMM8,%ZMM22 |
(1038) 0x272da2 VPERMT2PD %ZMM27,%ZMM5,%ZMM24 |
(1038) 0x272da8 VPERMT2PD %ZMM27,%ZMM7,%ZMM25 |
(1038) 0x272dae VPERMT2PD %ZMM27,%ZMM9,%ZMM22 |
(1038) 0x272db4 VMOVUPD 0xc0(%RDI),%ZMM27 |
(1038) 0x272dbb VFMADD231PD %ZMM24,%ZMM26,%ZMM11 |
(1038) 0x272dc1 VMOVUPD 0x100(%RDI),%ZMM24 |
(1038) 0x272dc8 VFMADD231PD %ZMM22,%ZMM26,%ZMM10 |
(1038) 0x272dce VFMADD231PD %ZMM25,%ZMM26,%ZMM15 |
(1038) 0x272dd4 VMOVUPD 0x140(%RDI),%ZMM26 |
(1038) 0x272ddb VMOVAPD %ZMM27,%ZMM22 |
(1038) 0x272de1 VMOVAPD %ZMM27,%ZMM23 |
(1038) 0x272de7 VPERMT2PD %ZMM24,%ZMM4,%ZMM22 |
(1038) 0x272ded VPERMT2PD %ZMM24,%ZMM6,%ZMM23 |
(1038) 0x272df3 VPERMT2PD %ZMM27,%ZMM8,%ZMM24 |
(1038) 0x272df9 VMOVUPD 0x180(%RDI),%ZMM27 |
(1038) 0x272e00 VPERMT2PD %ZMM26,%ZMM5,%ZMM22 |
(1038) 0x272e06 VPERMT2PD %ZMM26,%ZMM9,%ZMM24 |
(1038) 0x272e0c VPERMT2PD %ZMM26,%ZMM7,%ZMM23 |
(1038) 0x272e12 VMOVUPD 0x200(%RDI),%ZMM26 |
(1038) 0x272e19 VMOVAPD %ZMM27,%ZMM25 |
(1038) 0x272e1f VFMADD231PD %ZMM22,%ZMM28,%ZMM12 |
(1038) 0x272e25 VMOVUPD 0x1c0(%RDI),%ZMM22 |
(1038) 0x272e2c VFMADD231PD %ZMM24,%ZMM28,%ZMM19 |
(1038) 0x272e32 VMOVAPD %ZMM27,%ZMM24 |
(1038) 0x272e38 VFMADD231PD %ZMM23,%ZMM28,%ZMM16 |
(1038) 0x272e3e VPERMT2PD %ZMM22,%ZMM4,%ZMM24 |
(1038) 0x272e44 VPERMT2PD %ZMM22,%ZMM6,%ZMM25 |
(1038) 0x272e4a VPERMT2PD %ZMM27,%ZMM8,%ZMM22 |
(1038) 0x272e50 VMOVUPD -0x40(%R13,%RDX,1),%ZMM27 |
(1038) 0x272e58 VPERMT2PD %ZMM26,%ZMM5,%ZMM24 |
(1038) 0x272e5e VPERMT2PD %ZMM26,%ZMM7,%ZMM25 |
(1038) 0x272e64 VPERMT2PD %ZMM26,%ZMM9,%ZMM22 |
(1038) 0x272e6a VMOVUPD 0x240(%RDI),%ZMM26 |
(1038) 0x272e71 VFMADD231PD %ZMM24,%ZMM27,%ZMM13 |
(1038) 0x272e77 VMOVUPD 0x280(%RDI),%ZMM24 |
(1038) 0x272e7e VFMADD231PD %ZMM22,%ZMM27,%ZMM20 |
(1038) 0x272e84 VFMADD231PD %ZMM25,%ZMM27,%ZMM17 |
(1038) 0x272e8a VMOVUPD 0x2c0(%RDI),%ZMM27 |
(1038) 0x272e91 ADD $0x300,%RDI |
(1038) 0x272e98 VMOVAPD %ZMM26,%ZMM22 |
(1038) 0x272e9e VMOVAPD %ZMM26,%ZMM23 |
(1038) 0x272ea4 VPERMT2PD %ZMM24,%ZMM4,%ZMM22 |
(1038) 0x272eaa VPERMT2PD %ZMM24,%ZMM6,%ZMM23 |
(1038) 0x272eb0 VPERMT2PD %ZMM26,%ZMM8,%ZMM24 |
(1038) 0x272eb6 VMOVUPD (%R13,%RDX,1),%ZMM26 |
(1038) 0x272ebe ADD $0x100,%RDX |
(1038) 0x272ec5 VPERMT2PD %ZMM27,%ZMM5,%ZMM22 |
(1038) 0x272ecb VPERMT2PD %ZMM27,%ZMM7,%ZMM23 |
(1038) 0x272ed1 VPERMT2PD %ZMM27,%ZMM9,%ZMM24 |
(1038) 0x272ed7 VFMADD231PD %ZMM22,%ZMM26,%ZMM14 |
(1038) 0x272edd VFMADD231PD %ZMM23,%ZMM26,%ZMM18 |
(1038) 0x272ee3 VFMADD231PD %ZMM24,%ZMM26,%ZMM21 |
(1038) 0x272ee9 CMP %RDX,%RAX |
(1038) 0x272eec JNE 272d60 |
(1040) 0x272ef2 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(1040) 0x272ef8 VADDPD %ZMM17,%ZMM18,%ZMM16 |
(1040) 0x272efe VADDPD %ZMM11,%ZMM12,%ZMM11 |
(1040) 0x272f04 VADDPD %ZMM13,%ZMM14,%ZMM13 |
(1040) 0x272f0a VADDPD %ZMM10,%ZMM19,%ZMM10 |
(1040) 0x272f10 VADDPD %ZMM20,%ZMM21,%ZMM19 |
(1040) 0x272f16 MOV -0x30(%RBP),%RDX |
(1040) 0x272f1a VADDPD %ZMM15,%ZMM16,%ZMM15 |
(1040) 0x272f20 VADDPD %ZMM11,%ZMM13,%ZMM11 |
(1040) 0x272f26 VADDPD %ZMM10,%ZMM19,%ZMM10 |
(1040) 0x272f2c VEXTRACTF64X4 $0x1,%ZMM15,%YMM16 |
(1040) 0x272f33 VEXTRACTF64X4 $0x1,%ZMM11,%YMM12 |
(1040) 0x272f3a VEXTRACTF64X4 $0x1,%ZMM10,%YMM19 |
(1040) 0x272f41 VADDPD %ZMM16,%ZMM15,%ZMM15 |
(1040) 0x272f47 VADDPD %ZMM12,%ZMM11,%ZMM11 |
(1040) 0x272f4d VADDPD %ZMM19,%ZMM10,%ZMM10 |
(1040) 0x272f53 VEXTRACTF32X4 $0x1,%YMM15,%XMM16 |
(1040) 0x272f5a VEXTRACTF128 $0x1,%YMM11,%XMM12 |
(1040) 0x272f60 VEXTRACTF32X4 $0x1,%YMM10,%XMM19 |
(1040) 0x272f67 VADDPD %XMM16,%XMM15,%XMM15 |
(1040) 0x272f6d VADDPD %XMM12,%XMM11,%XMM11 |
(1040) 0x272f72 VADDPD %XMM19,%XMM10,%XMM10 |
(1040) 0x272f78 VPERMILPD $0x1,%XMM15,%XMM16 |
(1040) 0x272f7f VPERMILPD $0x1,%XMM11,%XMM12 |
(1040) 0x272f85 VPERMILPD $0x1,%XMM10,%XMM19 |
(1040) 0x272f8c VADDSD %XMM16,%XMM15,%XMM15 |
(1040) 0x272f92 VADDSD %XMM12,%XMM11,%XMM12 |
(1040) 0x272f97 VADDSD %XMM19,%XMM10,%XMM10 |
(1040) 0x272f9d VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(1040) 0x272fa2 CMP %RCX,%RDX |
(1040) 0x272fa5 JE 2730af |
(1040) 0x272fab TEST $0x1c,%CL |
(1040) 0x272fae JE 27306c |
(1040) 0x272fb4 LEA (%RDX,%RDX,2),%RDI |
(1040) 0x272fb8 VMOVQ %XMM11,%XMM12 |
(1040) 0x272fbd VUNPCKHPD %XMM0,%XMM11,%XMM11 |
(1040) 0x272fc1 VMOVQ %XMM10,%XMM10 |
(1040) 0x272fc6 LEA (%R9,%RDI,8),%RDI |
(1040) 0x272fca NOPW (%RAX,%RAX,1) |
(1044) 0x272fd0 VMOVUPD (%RDI),%ZMM14 |
(1044) 0x272fd6 VMOVUPD 0x40(%RDI),%YMM15 |
(1044) 0x272fdb VMOVUPD (%RSI,%RDX,8),%YMM13 |
(1044) 0x272fe0 ADD $0x4,%RDX |
(1044) 0x272fe4 ADD $0x60,%RDI |
(1044) 0x272fe8 VMOVAPD %ZMM14,%ZMM16 |
(1044) 0x272fee VMOVAPD %ZMM14,%ZMM17 |
(1044) 0x272ff4 VPERMT2PD %ZMM15,%ZMM1,%ZMM16 |
(1044) 0x272ffa VPERMT2PD %ZMM15,%ZMM2,%ZMM17 |
(1044) 0x273000 VPERMT2PD %ZMM15,%ZMM3,%ZMM14 |
(1044) 0x273006 VFMADD231PD %YMM16,%YMM13,%YMM12 |
(1044) 0x27300c VFMADD231PD %YMM17,%YMM13,%YMM11 |
(1044) 0x273012 VFMADD231PD %YMM14,%YMM13,%YMM10 |
(1044) 0x273017 CMP %RDX,%R12 |
(1044) 0x27301a JNE 272fd0 |
(1040) 0x27301c VEXTRACTF128 $0x1,%YMM10,%XMM13 |
(1040) 0x273022 VADDPD %YMM13,%YMM10,%YMM10 |
(1040) 0x273027 VPERMILPD $0x1,%XMM10,%XMM13 |
(1040) 0x27302d VADDPD %YMM13,%YMM10,%YMM10 |
(1040) 0x273032 VEXTRACTF128 $0x1,%YMM11,%XMM13 |
(1040) 0x273038 VADDPD %XMM13,%XMM11,%XMM11 |
(1040) 0x27303d VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(1040) 0x273043 VADDPD %XMM13,%XMM12,%XMM12 |
(1040) 0x273048 VUNPCKHPD %XMM11,%XMM12,%XMM13 |
(1040) 0x27304d VUNPCKLPD %XMM11,%XMM12,%XMM11 |
(1040) 0x273052 VADDPD %XMM13,%XMM11,%XMM11 |
(1040) 0x273057 CMP %RCX,%R12 |
(1040) 0x27305a JE 2730af |
(1040) 0x27305c VBROADCASTSD %XMM10,%YMM10 |
(1040) 0x273061 MOV %R12,%RDX |
(1040) 0x273064 VBLENDPD $0xc,%YMM10,%YMM11,%YMM11 |
(1040) 0x27306a JMP 273080 |
(1040) 0x27306c VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(1040) 0x273071 VINSERTF128 $0x1,%XMM10,%YMM11,%YMM10 |
(1040) 0x273077 VPERMILPD $0x2,%YMM10,%YMM11 |
(1040) 0x27307d NOPL (%RAX) |
(1040) 0x273080 LEA (%RDX,%RDX,2),%RDI |
(1040) 0x273084 LEA (%R9,%RDI,8),%RDI |
(1040) 0x273088 NOPL (%RAX,%RAX,1) |
(1043) 0x273090 VPERMILPD $0x2,(%RDI),%YMM10 |
(1043) 0x273096 ADD $0x18,%RDI |
(1043) 0x27309a VFMADD231PD (%RSI,%RDX,8){1to4},%YMM10,%YMM11 |
(1043) 0x2730a1 INC %RDX |
(1043) 0x2730a4 CMP %RDX,%RCX |
(1043) 0x2730a7 JNE 273090 |
(1040) 0x2730a9 VEXTRACTF128 $0x1,%YMM11,%XMM10 |
(1040) 0x2730af CMP $0x4,%ECX |
(1040) 0x2730b2 JAE 2730c0 |
(1040) 0x2730b4 VXORPD %XMM12,%XMM12,%XMM12 |
(1040) 0x2730b9 XOR %EDX,%EDX |
(1040) 0x2730bb JMP 2731e0 |
(1040) 0x2730c0 CMP $0x20,%ECX |
(1040) 0x2730c3 JAE 2730e0 |
(1040) 0x2730c5 VXORPD %XMM12,%XMM12,%XMM12 |
(1040) 0x2730ca XOR %EDI,%EDI |
(1040) 0x2730cc JMP 273195 |
0x2730d1 NOPW %CS:(%RAX,%RAX,1) |
(1040) 0x2730e0 VXORPD %XMM12,%XMM12,%XMM12 |
(1040) 0x2730e5 VXORPD %XMM13,%XMM13,%XMM13 |
(1040) 0x2730ea VXORPD %XMM14,%XMM14,%XMM14 |
(1040) 0x2730ef VXORPD %XMM15,%XMM15,%XMM15 |
(1040) 0x2730f4 XOR %EDX,%EDX |
(1040) 0x2730f6 NOPW %CS:(%RAX,%RAX,1) |
(1039) 0x273100 VMOVUPD -0xc0(%R11,%RDX,1),%ZMM16 |
(1039) 0x273108 VMOVUPD -0x80(%R11,%RDX,1),%ZMM17 |
(1039) 0x273110 VMOVUPD -0x40(%R11,%RDX,1),%ZMM18 |
(1039) 0x273118 VMOVUPD (%R11,%RDX,1),%ZMM19 |
(1039) 0x27311f VFMADD231PD -0xc0(%R13,%RDX,1),%ZMM16,%ZMM12 |
(1039) 0x273127 VFMADD231PD -0x80(%R13,%RDX,1),%ZMM17,%ZMM13 |
(1039) 0x27312f VFMADD231PD -0x40(%R13,%RDX,1),%ZMM18,%ZMM14 |
(1039) 0x273137 VFMADD231PD (%R13,%RDX,1),%ZMM19,%ZMM15 |
(1039) 0x27313f ADD $0x100,%RDX |
(1039) 0x273146 CMP %RDX,%RAX |
(1039) 0x273149 JNE 273100 |
(1040) 0x27314b VADDPD %ZMM12,%ZMM13,%ZMM12 |
(1040) 0x273151 VADDPD %ZMM14,%ZMM15,%ZMM14 |
(1040) 0x273157 MOV -0x30(%RBP),%RDX |
(1040) 0x27315b VADDPD %ZMM12,%ZMM14,%ZMM12 |
(1040) 0x273161 VEXTRACTF64X4 $0x1,%ZMM12,%YMM13 |
(1040) 0x273168 VADDPD %ZMM13,%ZMM12,%ZMM12 |
(1040) 0x27316e VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(1040) 0x273174 VADDPD %XMM13,%XMM12,%XMM12 |
(1040) 0x273179 VPERMILPD $0x1,%XMM12,%XMM13 |
(1040) 0x27317f VADDSD %XMM13,%XMM12,%XMM12 |
(1040) 0x273184 CMP %RCX,%RDX |
(1040) 0x273187 JE 272c60 |
(1040) 0x27318d MOV %RDX,%RDI |
(1040) 0x273190 TEST $0x1c,%CL |
(1040) 0x273193 JE 2731e0 |
(1040) 0x273195 VMOVQ %XMM12,%XMM12 |
(1040) 0x27319a NOPW (%RAX,%RAX,1) |
(1042) 0x2731a0 VMOVUPD (%R8,%RDI,8),%YMM13 |
(1042) 0x2731a6 VFMADD231PD (%RSI,%RDI,8),%YMM13,%YMM12 |
(1042) 0x2731ac ADD $0x4,%RDI |
(1042) 0x2731b0 CMP %RDI,%R12 |
(1042) 0x2731b3 JNE 2731a0 |
(1040) 0x2731b5 VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(1040) 0x2731bb MOV %R12,%RDX |
(1040) 0x2731be VADDPD %XMM13,%XMM12,%XMM12 |
(1040) 0x2731c3 VPERMILPD $0x1,%XMM12,%XMM13 |
(1040) 0x2731c9 VADDSD %XMM13,%XMM12,%XMM12 |
(1040) 0x2731ce CMP %RCX,%R12 |
(1040) 0x2731d1 JE 272c60 |
(1040) 0x2731d7 NOPW (%RAX,%RAX,1) |
(1041) 0x2731e0 VMOVSD (%R8,%RDX,8),%XMM13 |
(1041) 0x2731e6 VFMADD231SD (%RSI,%RDX,8),%XMM13,%XMM12 |
(1041) 0x2731ec INC %RDX |
(1041) 0x2731ef CMP %RDX,%RCX |
(1041) 0x2731f2 JNE 2731e0 |
(1040) 0x2731f4 JMP 272c60 |
0x2731f9 VMOVSD 0x10(%RBX),%XMM0 |
0x2731fe ADD $0x38,%RSP |
0x273202 POP %RBX |
0x273203 POP %R12 |
0x273205 POP %R13 |
0x273207 POP %R14 |
0x273209 POP %R15 |
0x27320b POP %RBP |
0x27320c VZEROUPPER |
0x27320f RET |
Path / |
Source file and lines | DiracDeterminantRef.cpp:235-255 |
Module | exec |
nb instructions | 96 |
nb uops | 93 |
loop length | 479 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 15.50 cycles |
front end | 15.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.50 | 5.25 | 5.25 | 3.50 | 14.67 | 14.67 | 14.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 5.50 | 5.50 | 5.25 | 5.25 | 3.50 | 16.67 | 16.67 | 16.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 15.50 |
Dispatch | 16.67 |
Overall L1 | 16.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 60% |
load | 64% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 37% |
load | 52% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 41% |
load | 51% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 44% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 275320 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x484(%RBX),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP $0x1,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 272b5d <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x9d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%RBX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%RBX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%RBX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
JMP 2731f9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2731f9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x480(%RBX),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2731f9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x118(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R14),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x6e566(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6e60e(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6e5f6(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x681a0(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x6816a(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x68134(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x680fe(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x680c8(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x68092(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x158(%RBX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x100(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%RBX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
AND $-0x4,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0x478(%RBX),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RDI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RSI),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%R8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x8,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
JMP 272cd0 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x210> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VMOVSD 0x10(%RBX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | DiracDeterminantRef.cpp:235-255 |
Module | exec |
nb instructions | 96 |
nb uops | 93 |
loop length | 479 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 15.50 cycles |
front end | 15.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.50 | 5.25 | 5.25 | 3.50 | 14.67 | 14.67 | 14.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 5.50 | 5.50 | 5.25 | 5.25 | 3.50 | 16.67 | 16.67 | 16.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 15.50 |
Dispatch | 16.67 |
Overall L1 | 16.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 60% |
load | 64% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 37% |
load | 52% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 41% |
load | 51% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 44% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 275320 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x484(%RBX),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP $0x1,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 272b5d <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x9d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%RBX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%RBX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%RBX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
JMP 2731f9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2731f9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x480(%RBX),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2731f9 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x118(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R14),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x6e566(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6e60e(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6e5f6(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x681a0(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x6816a(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x68134(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x680fe(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x680c8(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x68092(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x158(%RBX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x100(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%RBX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
AND $-0x4,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0x478(%RBX),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RDI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RSI),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%R8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x8,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
JMP 272cd0 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x210> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VMOVSD 0x10(%RBX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE– | 0.08 | 0.16 |
▼Loop 1040 - inner_product.hpp:82-155 - exec– | 0 | 0 |
○Loop 1038 - inner_product.hpp:155-155 - exec | 0.06 | 0.12 |
○Loop 1039 - inner_product.hpp:82-83 - exec | 0.02 | 0.03 |
○Loop 1042 - inner_product.hpp:82-83 - exec | 0 | 0 |
○Loop 1041 - inner_product.hpp:82-83 - exec | 0 | 0 |
○Loop 1043 - inner_product.hpp:155-155 - exec | 0 | 0 |
○Loop 1044 - inner_product.hpp:155-155 - exec | 0 | 0 |