Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: exec | Source: DiracDeterminantRef.cpp:235-255 [...] | Coverage: 0.06% |
---|
Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: exec | Source: DiracDeterminantRef.cpp:235-255 [...] | Coverage: 0.06% |
---|
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 217 - 217 |
-------------------------------------------------------------------------------- |
217: inline Type_t* operator[](size_type i) { return X.data() + i * D2; } |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 155 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 235 - 255 |
-------------------------------------------------------------------------------- |
235: { |
236: recompute(P); |
237: |
238: if (NumPtcls == 1) |
239: { |
240: ValueType y = psiM(0, 0); |
241: GradType rv = y * dpsiM(0, 0); |
242: G[FirstIndex] += rv; |
243: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
244: } |
245: else |
246: { |
247: for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++) |
248: { |
249: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
250: mValueType lap = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
251: G[iat] += rv; |
252: L[iat] += lap - dot(rv, rv); |
253: } |
254: } |
255: return LogValue; |
0x272800 PUSH %RBP |
0x272801 MOV %RSP,%RBP |
0x272804 PUSH %R15 |
0x272806 PUSH %R14 |
0x272808 PUSH %R13 |
0x27280a PUSH %R12 |
0x27280c PUSH %RBX |
0x27280d SUB $0x38,%RSP |
0x272811 MOV %RCX,%R14 |
0x272814 MOV %RDX,%R15 |
0x272817 MOV %RDI,%RBX |
0x27281a CALL 275060 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> |
0x27281f MOV 0x484(%RBX),%EDX |
0x272825 CMP $0x1,%RDX |
0x272829 JNE 27289d |
0x27282b MOV 0x100(%RBX),%RAX |
0x272832 MOV 0x140(%RBX),%RCX |
0x272839 MOV 0x18(%R15),%RDX |
0x27283d VMOVDDUP (%RAX),%XMM0 |
0x272841 MOVSXD 0x478(%RBX),%RAX |
0x272848 VMULPD (%RCX),%XMM0,%XMM2 |
0x27284c VMULSD 0x10(%RCX),%XMM0,%XMM1 |
0x272851 LEA (%RAX,%RAX,2),%RSI |
0x272855 VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 |
0x27285a VMULPD %XMM2,%XMM2,%XMM2 |
0x27285e VMOVUPD %XMM3,(%RDX,%RSI,8) |
0x272863 VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 |
0x272869 VMOVSD %XMM3,0x10(%RDX,%RSI,8) |
0x27286f VPERMILPD $0x1,%XMM2,%XMM3 |
0x272875 MOV 0x180(%RBX),%RCX |
0x27287c VADDSD %XMM2,%XMM3,%XMM2 |
0x272880 MOV 0x18(%R14),%RDX |
0x272884 VFMADD231SD %XMM1,%XMM1,%XMM2 |
0x272889 VFMSUB231SD (%RCX),%XMM0,%XMM2 |
0x27288e VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 |
0x272893 VMOVSD %XMM0,(%RDX,%RAX,8) |
0x272898 JMP 272f39 |
0x27289d TEST %EDX,%EDX |
0x27289f JLE 272f39 |
0x2728a5 MOV 0x480(%RBX),%ECX |
0x2728ab TEST %ECX,%ECX |
0x2728ad JLE 272f39 |
0x2728b3 MOV 0x118(%RBX),%RAX |
0x2728ba MOV 0x18(%R14),%R14 |
0x2728be VMOVAPD -0x6e246(%RIP),%YMM1 |
0x2728c6 VMOVAPD -0x6e2ee(%RIP),%YMM2 |
0x2728ce VMOVAPD -0x6e2d6(%RIP),%YMM3 |
0x2728d6 VMOVAPD -0x681a0(%RIP),%ZMM4 |
0x2728e0 VMOVAPD -0x6816a(%RIP),%ZMM5 |
0x2728ea VMOVAPD -0x68134(%RIP),%ZMM6 |
0x2728f4 VMOVAPD -0x680fe(%RIP),%ZMM7 |
0x2728fe VMOVAPD -0x680c8(%RIP),%ZMM8 |
0x272908 VMOVAPD -0x68092(%RIP),%ZMM9 |
0x272912 MOV 0x158(%RBX),%R11 |
0x272919 MOV 0xd8(%RBX),%RDI |
0x272920 MOV 0x100(%RBX),%RSI |
0x272927 MOV 0x180(%RBX),%R8 |
0x27292e MOV 0x18(%R15),%R10 |
0x272932 MOV 0x140(%RBX),%R9 |
0x272939 MOV %ECX,%R15D |
0x27293c MOV %ECX,%R12D |
0x27293f AND $-0x20,%R15D |
0x272943 AND $-0x4,%R12D |
0x272947 VXORPD %XMM0,%XMM0,%XMM0 |
0x27294b MOV %RDX,-0x58(%RBP) |
0x27294f MOV %R15,-0x30(%RBP) |
0x272953 XOR %R15D,%R15D |
0x272956 SAL $0x3,%RAX |
0x27295a MOV %R14,-0x40(%RBP) |
0x27295e MOVSXD 0x478(%RBX),%R14 |
0x272965 SAL $0x3,%R11 |
0x272969 SAL $0x3,%RDI |
0x27296d LEA 0xc0(%RSI),%R13 |
0x272974 LEA (%RAX,%RAX,2),%RAX |
0x272978 MOV %R11,-0x48(%RBP) |
0x27297c LEA 0xc0(%R8),%R11 |
0x272983 MOV %RDI,-0x50(%RBP) |
0x272987 MOV %RAX,-0x38(%RBP) |
0x27298b MOV %RCX,%RAX |
0x27298e SHR $0x5,%RAX |
0x272992 SAL $0x8,%RAX |
0x272996 JMP 272a10 |
0x272998 NOPL (%RAX,%RAX,1) |
(1040) 0x2729a0 LEA (%R14,%R14,2),%RDX |
(1040) 0x2729a4 MOV -0x48(%RBP),%RDI |
(1040) 0x2729a8 ADD -0x38(%RBP),%R9 |
(1040) 0x2729ac INC %R15 |
(1040) 0x2729af VADDPD (%R10,%RDX,8),%XMM11,%XMM13 |
(1040) 0x2729b5 VMULPD %XMM11,%XMM11,%XMM11 |
(1040) 0x2729ba ADD %RDI,%R11 |
(1040) 0x2729bd ADD %RDI,%R8 |
(1040) 0x2729c0 VMOVUPD %XMM13,(%R10,%RDX,8) |
(1040) 0x2729c6 VADDSD 0x10(%R10,%RDX,8),%XMM10,%XMM13 |
(1040) 0x2729cd VMOVSD %XMM13,0x10(%R10,%RDX,8) |
(1040) 0x2729d4 VPERMILPD $0x1,%XMM11,%XMM13 |
(1040) 0x2729da MOV -0x40(%RBP),%RDX |
(1040) 0x2729de VADDSD %XMM11,%XMM13,%XMM11 |
(1040) 0x2729e3 VFNMSUB231SD %XMM10,%XMM10,%XMM11 |
(1040) 0x2729e8 VADDSD %XMM11,%XMM12,%XMM10 |
(1040) 0x2729ed VADDSD (%RDX,%R14,8),%XMM10,%XMM10 |
(1040) 0x2729f3 VMOVSD %XMM10,(%RDX,%R14,8) |
(1040) 0x2729f9 MOV -0x50(%RBP),%RDX |
(1040) 0x2729fd INC %R14 |
(1040) 0x272a00 ADD %RDX,%R13 |
(1040) 0x272a03 ADD %RDX,%RSI |
(1040) 0x272a06 CMP %R15,-0x58(%RBP) |
(1040) 0x272a0a JE 272f39 |
(1040) 0x272a10 CMP $0x4,%ECX |
(1040) 0x272a13 JAE 272a30 |
(1040) 0x272a15 VXORPD %XMM11,%XMM11,%XMM11 |
(1040) 0x272a1a XOR %EDX,%EDX |
(1040) 0x272a1c JMP 272dc0 |
0x272a21 NOPW %CS:(%RAX,%RAX,1) |
(1040) 0x272a30 CMP $0x20,%ECX |
(1040) 0x272a33 JAE 272a50 |
(1040) 0x272a35 VXORPD %XMM11,%XMM11,%XMM11 |
(1040) 0x272a3a VXORPD %XMM10,%XMM10,%XMM10 |
(1040) 0x272a3f XOR %EDX,%EDX |
(1040) 0x272a41 JMP 272cf4 |
0x272a46 NOPW %CS:(%RAX,%RAX,1) |
(1040) 0x272a50 VXORPD %XMM11,%XMM11,%XMM11 |
(1040) 0x272a55 VXORPD %XMM12,%XMM12,%XMM12 |
(1040) 0x272a5a VXORPD %XMM13,%XMM13,%XMM13 |
(1040) 0x272a5f VXORPD %XMM14,%XMM14,%XMM14 |
(1040) 0x272a64 VXORPD %XMM15,%XMM15,%XMM15 |
(1040) 0x272a69 VXORPD %XMM16,%XMM16,%XMM16 |
(1040) 0x272a6f VXORPD %XMM17,%XMM17,%XMM17 |
(1040) 0x272a75 VXORPD %XMM18,%XMM18,%XMM18 |
(1040) 0x272a7b VXORPD %XMM10,%XMM10,%XMM10 |
(1040) 0x272a80 VXORPD %XMM19,%XMM19,%XMM19 |
(1040) 0x272a86 VXORPD %XMM20,%XMM20,%XMM20 |
(1040) 0x272a8c VXORPD %XMM21,%XMM21,%XMM21 |
(1040) 0x272a92 XOR %EDX,%EDX |
(1040) 0x272a94 MOV %R9,%RDI |
(1040) 0x272a97 NOPW (%RAX,%RAX,1) |
(1038) 0x272aa0 VMOVUPD (%RDI),%ZMM23 |
(1038) 0x272aa6 VMOVUPD 0x40(%RDI),%ZMM22 |
(1038) 0x272aad VMOVUPD 0x80(%RDI),%ZMM27 |
(1038) 0x272ab4 VMOVUPD -0xc0(%R13,%RDX,1),%ZMM26 |
(1038) 0x272abc VMOVUPD -0x80(%R13,%RDX,1),%ZMM28 |
(1038) 0x272ac4 VMOVAPD %ZMM23,%ZMM24 |
(1038) 0x272aca VPERMT2PD %ZMM22,%ZMM4,%ZMM24 |
(1038) 0x272ad0 VMOVAPD %ZMM23,%ZMM25 |
(1038) 0x272ad6 VPERMT2PD %ZMM22,%ZMM6,%ZMM25 |
(1038) 0x272adc VPERMT2PD %ZMM23,%ZMM8,%ZMM22 |
(1038) 0x272ae2 VPERMT2PD %ZMM27,%ZMM5,%ZMM24 |
(1038) 0x272ae8 VPERMT2PD %ZMM27,%ZMM7,%ZMM25 |
(1038) 0x272aee VPERMT2PD %ZMM27,%ZMM9,%ZMM22 |
(1038) 0x272af4 VMOVUPD 0xc0(%RDI),%ZMM27 |
(1038) 0x272afb VFMADD231PD %ZMM24,%ZMM26,%ZMM11 |
(1038) 0x272b01 VMOVUPD 0x100(%RDI),%ZMM24 |
(1038) 0x272b08 VFMADD231PD %ZMM22,%ZMM26,%ZMM10 |
(1038) 0x272b0e VFMADD231PD %ZMM25,%ZMM26,%ZMM15 |
(1038) 0x272b14 VMOVUPD 0x140(%RDI),%ZMM26 |
(1038) 0x272b1b VMOVAPD %ZMM27,%ZMM22 |
(1038) 0x272b21 VMOVAPD %ZMM27,%ZMM23 |
(1038) 0x272b27 VPERMT2PD %ZMM24,%ZMM4,%ZMM22 |
(1038) 0x272b2d VPERMT2PD %ZMM24,%ZMM6,%ZMM23 |
(1038) 0x272b33 VPERMT2PD %ZMM27,%ZMM8,%ZMM24 |
(1038) 0x272b39 VMOVUPD 0x180(%RDI),%ZMM27 |
(1038) 0x272b40 VPERMT2PD %ZMM26,%ZMM5,%ZMM22 |
(1038) 0x272b46 VPERMT2PD %ZMM26,%ZMM9,%ZMM24 |
(1038) 0x272b4c VPERMT2PD %ZMM26,%ZMM7,%ZMM23 |
(1038) 0x272b52 VMOVUPD 0x200(%RDI),%ZMM26 |
(1038) 0x272b59 VMOVAPD %ZMM27,%ZMM25 |
(1038) 0x272b5f VFMADD231PD %ZMM22,%ZMM28,%ZMM12 |
(1038) 0x272b65 VMOVUPD 0x1c0(%RDI),%ZMM22 |
(1038) 0x272b6c VFMADD231PD %ZMM24,%ZMM28,%ZMM19 |
(1038) 0x272b72 VMOVAPD %ZMM27,%ZMM24 |
(1038) 0x272b78 VFMADD231PD %ZMM23,%ZMM28,%ZMM16 |
(1038) 0x272b7e VPERMT2PD %ZMM22,%ZMM4,%ZMM24 |
(1038) 0x272b84 VPERMT2PD %ZMM22,%ZMM6,%ZMM25 |
(1038) 0x272b8a VPERMT2PD %ZMM27,%ZMM8,%ZMM22 |
(1038) 0x272b90 VMOVUPD -0x40(%R13,%RDX,1),%ZMM27 |
(1038) 0x272b98 VPERMT2PD %ZMM26,%ZMM5,%ZMM24 |
(1038) 0x272b9e VPERMT2PD %ZMM26,%ZMM7,%ZMM25 |
(1038) 0x272ba4 VPERMT2PD %ZMM26,%ZMM9,%ZMM22 |
(1038) 0x272baa VMOVUPD 0x240(%RDI),%ZMM26 |
(1038) 0x272bb1 VFMADD231PD %ZMM24,%ZMM27,%ZMM13 |
(1038) 0x272bb7 VMOVUPD 0x280(%RDI),%ZMM24 |
(1038) 0x272bbe VFMADD231PD %ZMM22,%ZMM27,%ZMM20 |
(1038) 0x272bc4 VFMADD231PD %ZMM25,%ZMM27,%ZMM17 |
(1038) 0x272bca VMOVUPD 0x2c0(%RDI),%ZMM27 |
(1038) 0x272bd1 ADD $0x300,%RDI |
(1038) 0x272bd8 VMOVAPD %ZMM26,%ZMM22 |
(1038) 0x272bde VMOVAPD %ZMM26,%ZMM23 |
(1038) 0x272be4 VPERMT2PD %ZMM24,%ZMM4,%ZMM22 |
(1038) 0x272bea VPERMT2PD %ZMM24,%ZMM6,%ZMM23 |
(1038) 0x272bf0 VPERMT2PD %ZMM26,%ZMM8,%ZMM24 |
(1038) 0x272bf6 VMOVUPD (%R13,%RDX,1),%ZMM26 |
(1038) 0x272bfe ADD $0x100,%RDX |
(1038) 0x272c05 VPERMT2PD %ZMM27,%ZMM5,%ZMM22 |
(1038) 0x272c0b VPERMT2PD %ZMM27,%ZMM7,%ZMM23 |
(1038) 0x272c11 VPERMT2PD %ZMM27,%ZMM9,%ZMM24 |
(1038) 0x272c17 VFMADD231PD %ZMM22,%ZMM26,%ZMM14 |
(1038) 0x272c1d VFMADD231PD %ZMM23,%ZMM26,%ZMM18 |
(1038) 0x272c23 VFMADD231PD %ZMM24,%ZMM26,%ZMM21 |
(1038) 0x272c29 CMP %RDX,%RAX |
(1038) 0x272c2c JNE 272aa0 |
(1040) 0x272c32 VADDPD %ZMM15,%ZMM16,%ZMM15 |
(1040) 0x272c38 VADDPD %ZMM17,%ZMM18,%ZMM16 |
(1040) 0x272c3e VADDPD %ZMM11,%ZMM12,%ZMM11 |
(1040) 0x272c44 VADDPD %ZMM13,%ZMM14,%ZMM13 |
(1040) 0x272c4a VADDPD %ZMM10,%ZMM19,%ZMM10 |
(1040) 0x272c50 VADDPD %ZMM20,%ZMM21,%ZMM19 |
(1040) 0x272c56 MOV -0x30(%RBP),%RDX |
(1040) 0x272c5a VADDPD %ZMM15,%ZMM16,%ZMM15 |
(1040) 0x272c60 VADDPD %ZMM11,%ZMM13,%ZMM11 |
(1040) 0x272c66 VADDPD %ZMM10,%ZMM19,%ZMM10 |
(1040) 0x272c6c VEXTRACTF64X4 $0x1,%ZMM15,%YMM16 |
(1040) 0x272c73 VEXTRACTF64X4 $0x1,%ZMM11,%YMM12 |
(1040) 0x272c7a VEXTRACTF64X4 $0x1,%ZMM10,%YMM19 |
(1040) 0x272c81 VADDPD %ZMM16,%ZMM15,%ZMM15 |
(1040) 0x272c87 VADDPD %ZMM12,%ZMM11,%ZMM11 |
(1040) 0x272c8d VADDPD %ZMM19,%ZMM10,%ZMM10 |
(1040) 0x272c93 VEXTRACTF32X4 $0x1,%YMM15,%XMM16 |
(1040) 0x272c9a VEXTRACTF128 $0x1,%YMM11,%XMM12 |
(1040) 0x272ca0 VEXTRACTF32X4 $0x1,%YMM10,%XMM19 |
(1040) 0x272ca7 VADDPD %XMM16,%XMM15,%XMM15 |
(1040) 0x272cad VADDPD %XMM12,%XMM11,%XMM11 |
(1040) 0x272cb2 VADDPD %XMM19,%XMM10,%XMM10 |
(1040) 0x272cb8 VPERMILPD $0x1,%XMM15,%XMM16 |
(1040) 0x272cbf VPERMILPD $0x1,%XMM11,%XMM12 |
(1040) 0x272cc5 VPERMILPD $0x1,%XMM10,%XMM19 |
(1040) 0x272ccc VADDSD %XMM16,%XMM15,%XMM15 |
(1040) 0x272cd2 VADDSD %XMM12,%XMM11,%XMM12 |
(1040) 0x272cd7 VADDSD %XMM19,%XMM10,%XMM10 |
(1040) 0x272cdd VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(1040) 0x272ce2 CMP %RCX,%RDX |
(1040) 0x272ce5 JE 272def |
(1040) 0x272ceb TEST $0x1c,%CL |
(1040) 0x272cee JE 272dac |
(1040) 0x272cf4 LEA (%RDX,%RDX,2),%RDI |
(1040) 0x272cf8 VMOVQ %XMM11,%XMM12 |
(1040) 0x272cfd VUNPCKHPD %XMM0,%XMM11,%XMM11 |
(1040) 0x272d01 VMOVQ %XMM10,%XMM10 |
(1040) 0x272d06 LEA (%R9,%RDI,8),%RDI |
(1040) 0x272d0a NOPW (%RAX,%RAX,1) |
(1044) 0x272d10 VMOVUPD (%RDI),%ZMM14 |
(1044) 0x272d16 VMOVUPD 0x40(%RDI),%YMM15 |
(1044) 0x272d1b VMOVUPD (%RSI,%RDX,8),%YMM13 |
(1044) 0x272d20 ADD $0x4,%RDX |
(1044) 0x272d24 ADD $0x60,%RDI |
(1044) 0x272d28 VMOVAPD %ZMM14,%ZMM16 |
(1044) 0x272d2e VMOVAPD %ZMM14,%ZMM17 |
(1044) 0x272d34 VPERMT2PD %ZMM15,%ZMM1,%ZMM16 |
(1044) 0x272d3a VPERMT2PD %ZMM15,%ZMM2,%ZMM17 |
(1044) 0x272d40 VPERMT2PD %ZMM15,%ZMM3,%ZMM14 |
(1044) 0x272d46 VFMADD231PD %YMM16,%YMM13,%YMM12 |
(1044) 0x272d4c VFMADD231PD %YMM17,%YMM13,%YMM11 |
(1044) 0x272d52 VFMADD231PD %YMM14,%YMM13,%YMM10 |
(1044) 0x272d57 CMP %RDX,%R12 |
(1044) 0x272d5a JNE 272d10 |
(1040) 0x272d5c VEXTRACTF128 $0x1,%YMM10,%XMM13 |
(1040) 0x272d62 VADDPD %YMM13,%YMM10,%YMM10 |
(1040) 0x272d67 VPERMILPD $0x1,%XMM10,%XMM13 |
(1040) 0x272d6d VADDPD %YMM13,%YMM10,%YMM10 |
(1040) 0x272d72 VEXTRACTF128 $0x1,%YMM11,%XMM13 |
(1040) 0x272d78 VADDPD %XMM13,%XMM11,%XMM11 |
(1040) 0x272d7d VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(1040) 0x272d83 VADDPD %XMM13,%XMM12,%XMM12 |
(1040) 0x272d88 VUNPCKHPD %XMM11,%XMM12,%XMM13 |
(1040) 0x272d8d VUNPCKLPD %XMM11,%XMM12,%XMM11 |
(1040) 0x272d92 VADDPD %XMM13,%XMM11,%XMM11 |
(1040) 0x272d97 CMP %RCX,%R12 |
(1040) 0x272d9a JE 272def |
(1040) 0x272d9c VBROADCASTSD %XMM10,%YMM10 |
(1040) 0x272da1 MOV %R12,%RDX |
(1040) 0x272da4 VBLENDPD $0xc,%YMM10,%YMM11,%YMM11 |
(1040) 0x272daa JMP 272dc0 |
(1040) 0x272dac VUNPCKLPD %XMM15,%XMM12,%XMM11 |
(1040) 0x272db1 VINSERTF128 $0x1,%XMM10,%YMM11,%YMM10 |
(1040) 0x272db7 VPERMILPD $0x2,%YMM10,%YMM11 |
(1040) 0x272dbd NOPL (%RAX) |
(1040) 0x272dc0 LEA (%RDX,%RDX,2),%RDI |
(1040) 0x272dc4 LEA (%R9,%RDI,8),%RDI |
(1040) 0x272dc8 NOPL (%RAX,%RAX,1) |
(1043) 0x272dd0 VPERMILPD $0x2,(%RDI),%YMM10 |
(1043) 0x272dd6 ADD $0x18,%RDI |
(1043) 0x272dda VFMADD231PD (%RSI,%RDX,8){1to4},%YMM10,%YMM11 |
(1043) 0x272de1 INC %RDX |
(1043) 0x272de4 CMP %RDX,%RCX |
(1043) 0x272de7 JNE 272dd0 |
(1040) 0x272de9 VEXTRACTF128 $0x1,%YMM11,%XMM10 |
(1040) 0x272def CMP $0x4,%ECX |
(1040) 0x272df2 JAE 272e00 |
(1040) 0x272df4 VXORPD %XMM12,%XMM12,%XMM12 |
(1040) 0x272df9 XOR %EDX,%EDX |
(1040) 0x272dfb JMP 272f20 |
(1040) 0x272e00 CMP $0x20,%ECX |
(1040) 0x272e03 JAE 272e20 |
(1040) 0x272e05 VXORPD %XMM12,%XMM12,%XMM12 |
(1040) 0x272e0a XOR %EDI,%EDI |
(1040) 0x272e0c JMP 272ed5 |
0x272e11 NOPW %CS:(%RAX,%RAX,1) |
(1040) 0x272e20 VXORPD %XMM12,%XMM12,%XMM12 |
(1040) 0x272e25 VXORPD %XMM13,%XMM13,%XMM13 |
(1040) 0x272e2a VXORPD %XMM14,%XMM14,%XMM14 |
(1040) 0x272e2f VXORPD %XMM15,%XMM15,%XMM15 |
(1040) 0x272e34 XOR %EDX,%EDX |
(1040) 0x272e36 NOPW %CS:(%RAX,%RAX,1) |
(1039) 0x272e40 VMOVUPD -0xc0(%R11,%RDX,1),%ZMM16 |
(1039) 0x272e48 VMOVUPD -0x80(%R11,%RDX,1),%ZMM17 |
(1039) 0x272e50 VMOVUPD -0x40(%R11,%RDX,1),%ZMM18 |
(1039) 0x272e58 VMOVUPD (%R11,%RDX,1),%ZMM19 |
(1039) 0x272e5f VFMADD231PD -0xc0(%R13,%RDX,1),%ZMM16,%ZMM12 |
(1039) 0x272e67 VFMADD231PD -0x80(%R13,%RDX,1),%ZMM17,%ZMM13 |
(1039) 0x272e6f VFMADD231PD -0x40(%R13,%RDX,1),%ZMM18,%ZMM14 |
(1039) 0x272e77 VFMADD231PD (%R13,%RDX,1),%ZMM19,%ZMM15 |
(1039) 0x272e7f ADD $0x100,%RDX |
(1039) 0x272e86 CMP %RDX,%RAX |
(1039) 0x272e89 JNE 272e40 |
(1040) 0x272e8b VADDPD %ZMM12,%ZMM13,%ZMM12 |
(1040) 0x272e91 VADDPD %ZMM14,%ZMM15,%ZMM14 |
(1040) 0x272e97 MOV -0x30(%RBP),%RDX |
(1040) 0x272e9b VADDPD %ZMM12,%ZMM14,%ZMM12 |
(1040) 0x272ea1 VEXTRACTF64X4 $0x1,%ZMM12,%YMM13 |
(1040) 0x272ea8 VADDPD %ZMM13,%ZMM12,%ZMM12 |
(1040) 0x272eae VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(1040) 0x272eb4 VADDPD %XMM13,%XMM12,%XMM12 |
(1040) 0x272eb9 VPERMILPD $0x1,%XMM12,%XMM13 |
(1040) 0x272ebf VADDSD %XMM13,%XMM12,%XMM12 |
(1040) 0x272ec4 CMP %RCX,%RDX |
(1040) 0x272ec7 JE 2729a0 |
(1040) 0x272ecd MOV %RDX,%RDI |
(1040) 0x272ed0 TEST $0x1c,%CL |
(1040) 0x272ed3 JE 272f20 |
(1040) 0x272ed5 VMOVQ %XMM12,%XMM12 |
(1040) 0x272eda NOPW (%RAX,%RAX,1) |
(1042) 0x272ee0 VMOVUPD (%R8,%RDI,8),%YMM13 |
(1042) 0x272ee6 VFMADD231PD (%RSI,%RDI,8),%YMM13,%YMM12 |
(1042) 0x272eec ADD $0x4,%RDI |
(1042) 0x272ef0 CMP %RDI,%R12 |
(1042) 0x272ef3 JNE 272ee0 |
(1040) 0x272ef5 VEXTRACTF128 $0x1,%YMM12,%XMM13 |
(1040) 0x272efb MOV %R12,%RDX |
(1040) 0x272efe VADDPD %XMM13,%XMM12,%XMM12 |
(1040) 0x272f03 VPERMILPD $0x1,%XMM12,%XMM13 |
(1040) 0x272f09 VADDSD %XMM13,%XMM12,%XMM12 |
(1040) 0x272f0e CMP %RCX,%R12 |
(1040) 0x272f11 JE 2729a0 |
(1040) 0x272f17 NOPW (%RAX,%RAX,1) |
(1041) 0x272f20 VMOVSD (%R8,%RDX,8),%XMM13 |
(1041) 0x272f26 VFMADD231SD (%RSI,%RDX,8),%XMM13,%XMM12 |
(1041) 0x272f2c INC %RDX |
(1041) 0x272f2f CMP %RDX,%RCX |
(1041) 0x272f32 JNE 272f20 |
(1040) 0x272f34 JMP 2729a0 |
0x272f39 VMOVSD 0x10(%RBX),%XMM0 |
0x272f3e ADD $0x38,%RSP |
0x272f42 POP %RBX |
0x272f43 POP %R12 |
0x272f45 POP %R13 |
0x272f47 POP %R14 |
0x272f49 POP %R15 |
0x272f4b POP %RBP |
0x272f4c VZEROUPPER |
0x272f4f RET |
Path / |
Source file and lines | DiracDeterminantRef.cpp:235-255 |
Module | exec |
nb instructions | 96 |
nb uops | 93 |
loop length | 479 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 15.50 cycles |
front end | 15.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.50 | 5.25 | 5.25 | 3.50 | 14.67 | 14.67 | 14.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 5.50 | 5.50 | 5.25 | 5.25 | 3.50 | 16.67 | 16.67 | 16.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 15.50 |
Dispatch | 16.67 |
Overall L1 | 16.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 60% |
load | 64% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 37% |
load | 52% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 41% |
load | 51% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 44% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 275060 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x484(%RBX),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP $0x1,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 27289d <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x9d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%RBX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%RBX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%RBX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
JMP 272f39 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 272f39 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x480(%RBX),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 272f39 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x118(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R14),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x6e246(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6e2ee(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6e2d6(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x681a0(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x6816a(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x68134(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x680fe(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x680c8(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x68092(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x158(%RBX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x100(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%RBX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
AND $-0x4,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0x478(%RBX),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RDI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RSI),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%R8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x8,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
JMP 272a10 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x210> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VMOVSD 0x10(%RBX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | DiracDeterminantRef.cpp:235-255 |
Module | exec |
nb instructions | 96 |
nb uops | 93 |
loop length | 479 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 3 |
used zmm registers | 6 |
nb stack references | 6 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 15.50 cycles |
front end | 15.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.50 | 5.25 | 5.25 | 3.50 | 14.67 | 14.67 | 14.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
cycles | 5.50 | 5.50 | 5.25 | 5.25 | 3.50 | 16.67 | 16.67 | 16.67 | 2.50 | 2.50 | 2.67 | 2.33 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 15.50 |
Dispatch | 16.67 |
Overall L1 | 16.67 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 60% |
load | 64% |
store | 33% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 37% |
load | 52% |
store | 11% |
mul | 66% |
add-sub | 25% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 41% |
load | 51% |
store | 16% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 44% |
store | 13% |
mul | 20% |
add-sub | 15% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 275060 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x484(%RBX),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP $0x1,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 27289d <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x9d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x100(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%RBX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDDUP (%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x478(%RBX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x10(%RCX),%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VADDPD (%RDX,%RSI,8),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VADDSD 0x10(%RDX,%RSI,8),%XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM3,0x10(%RDX,%RSI,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPERMILPD $0x1,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOV 0x180(%RBX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VADDSD %XMM2,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x18(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VFMADD231SD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231SD (%RCX),%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD (%RDX,%RAX,8),%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM0,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
JMP 272f39 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 272f39 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x480(%RBX),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 272f39 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x739> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x118(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R14),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD -0x6e246(%RIP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6e2ee(%RIP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x6e2d6(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x681a0(%RIP),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x6816a(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x68134(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x680fe(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x680c8(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD -0x68092(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x158(%RBX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xd8(%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x100(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R15),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x140(%RBX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
AND $-0x4,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0x478(%RBX),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RDI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0xc0(%RSI),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RAX,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0xc0(%R8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x8,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
JMP 272a10 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x210> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VMOVSD 0x10(%RBX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::DiracDeterminantRef | 0.06 | 0.11 |
▼Loop 1040 - inner_product.hpp:82-155 - exec– | 0 | 0 |
○Loop 1038 - inner_product.hpp:155-155 - exec | 0.04 | 0.08 |
○Loop 1039 - inner_product.hpp:82-83 - exec | 0.02 | 0.03 |
○Loop 1042 - inner_product.hpp:82-83 - exec | 0 | 0 |
○Loop 1041 - inner_product.hpp:82-83 - exec | 0 | 0 |
○Loop 1043 - inner_product.hpp:155-155 - exec | 0 | 0 |
○Loop 1044 - inner_product.hpp:155-155 - exec | 0 | 0 |