Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:203-230 [...] | Coverage: 0.94% |
---|
Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:203-230 [...] | Coverage: 0.94% |
---|
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 231 - 271 |
-------------------------------------------------------------------------------- |
231: inline const AoSElement_t operator[](size_t i) const { return AoSElement_t(myData + i, nGhosts); } |
[...] |
265: inline T* data() { return myData; } |
[...] |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 203 - 230 |
-------------------------------------------------------------------------------- |
203: ScopedTimer local_timer(timer); |
204: |
205: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
206: for (int i = 0; i < nBlocks; ++i) |
207: MultiBsplineEvalRef::evaluate_vgh(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), |
208: nSplinesPerBlock); |
209: } |
210: |
211: inline void evaluate(const ParticleSet& P, |
[...] |
219: for (int i = 0; i < nBlocks; ++i) |
220: { |
221: // in real simulation, phase needs to be applied. Here just fake computation |
222: const int first = i * nBlocks; |
223: for (int j = first; j < std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize); j++) |
224: { |
225: psi_v[j] = psi[i][j - first]; |
226: dpsi_v[j] = grad[i][j - first]; |
227: d2psi_v[j] = hess[i].data(0)[j - first]; |
228: } |
229: } |
230: } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_vector.h: 1124 - 1256 |
-------------------------------------------------------------------------------- |
1124: return *(this->_M_impl._M_start + __n); |
[...] |
1256: { return _M_data_ptr(this->_M_impl._M_start); } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 173 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
171: val_dot[i] = T1(0.0); |
172: else |
173: val_dot[i] -= std::floor(val_dot[i]); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 229 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 145 - 145 |
-------------------------------------------------------------------------------- |
145: X[i] = base[i * offset]; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
0x47f1a0 PUSH %RBP |
0x47f1a1 MOV %RSP,%RBP |
0x47f1a4 PUSH %R15 |
0x47f1a6 PUSH %R14 |
0x47f1a8 PUSH %R13 |
0x47f1aa MOVSXD %EDX,%R13 |
0x47f1ad PUSH %R12 |
0x47f1af MOV %RSI,%R12 |
0x47f1b2 PUSH %RBX |
0x47f1b3 MOV %RDI,%RBX |
0x47f1b6 SUB $0x68,%RSP |
0x47f1ba MOV %RCX,-0x78(%RBP) |
0x47f1be MOV 0x358(%RDI),%R14 |
0x47f1c5 MOV %R8,-0x80(%RBP) |
0x47f1c9 MOV %R9,-0x88(%RBP) |
0x47f1d0 MOV %R14,%RDI |
0x47f1d3 CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x47f1d8 LEA 0x128(%R12),%RCX |
0x47f1e0 CMP 0x124(%R12),%R13D |
0x47f1e8 JE 47f1f8 |
0x47f1ea MOV 0x40(%R12),%RAX |
0x47f1ef LEA (%R13,%R13,2),%RDX |
0x47f1f4 LEA (%RAX,%RDX,8),%RCX |
0x47f1f8 VMOVUPD (%RCX),%XMM0 |
0x47f1fc VMOVSD 0x10(%RCX),%XMM4 |
0x47f201 VUNPCKHPD %XMM0,%XMM0,%XMM1 |
0x47f205 VMOVSD %XMM0,%XMM0,%XMM2 |
0x47f209 VMULSD 0x100(%RBX),%XMM1,%XMM3 |
0x47f211 VFMADD132SD 0xe8(%RBX),%XMM3,%XMM2 |
0x47f21a VFMADD231SD 0x118(%RBX),%XMM4,%XMM2 |
0x47f223 VCOMISD 0x10e5b5(%RIP),%XMM2 |
0x47f22b JBE 47f23b |
0x47f22d VXORPD %XMM5,%XMM5,%XMM5 |
0x47f231 VCOMISD %XMM2,%XMM5 |
0x47f235 JA 47f679 |
0x47f23b VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM6 |
0x47f242 VSUBSD %XMM6,%XMM2,%XMM2 |
0x47f246 MOV 0x30(%RBX),%R9D |
0x47f24a TEST %R9D,%R9D |
0x47f24d JLE 47f394 |
0x47f253 VPERMILPD $0,%XMM0,%XMM9 |
0x47f259 VPERMILPD $0x3,%XMM0,%XMM8 |
0x47f25f VMOVDDUP %XMM4,%XMM7 |
0x47f263 VMOVDDUP 0x10e575(%RIP),%XMM13 |
0x47f26b XOR %R15D,%R15D |
0x47f26e VMULPD 0xd8(%RBX),%XMM9,%XMM10 |
0x47f276 VXORPD %XMM12,%XMM12,%XMM12 |
0x47f27b VFMADD231PD 0xf0(%RBX),%XMM8,%XMM10 |
0x47f284 VFMADD231PD 0x108(%RBX),%XMM7,%XMM10 |
0x47f28d VRNDSCALEPD $0x9,%XMM10,%XMM11 |
0x47f294 VCMPPD $0x1,%XMM12,%XMM10,%K1 |
0x47f29b VCMPPD $0xe,%XMM13,%XMM10,%K0{%K1} |
0x47f2a2 KNOTB %K0,%K2 |
0x47f2a6 VSUBPD %XMM11,%XMM10,%XMM4{%K2}{z} |
0x47f2ac VUNPCKHPD %XMM4,%XMM4,%XMM14 |
0x47f2b0 VMOVQ %XMM4,%R12 |
0x47f2b5 VMOVSD %XMM14,%XMM14,%XMM1 |
(891) 0x47f2b9 MOV 0x310(%RBX),%R13 |
(891) 0x47f2c0 MOV 0x340(%RBX),%R8 |
(891) 0x47f2c7 LEA (%R15,%R15,2),%R11 |
(891) 0x47f2cb LEA (%R15,%R15,4),%RDI |
(891) 0x47f2cf MOV 0x328(%RBX),%R10 |
(891) 0x47f2d6 MOV 0x2f8(%RBX),%R9 |
(891) 0x47f2dd SAL $0x3,%RDI |
(891) 0x47f2e1 VMOVQ %R12,%XMM0 |
(891) 0x47f2e6 LEA (%R13,%R11,8),%RAX |
(891) 0x47f2eb MOV 0x18(%R8,%RDI,1),%RCX |
(891) 0x47f2f0 MOVSXD 0x40(%RBX),%R8 |
(891) 0x47f2f4 VMOVSD %XMM2,-0x40(%RBP) |
(891) 0x47f2f9 MOV 0x18(%R10,%RDI,1),%RDX |
(891) 0x47f2fe MOV (%RAX),%RSI |
(891) 0x47f301 VMOVSD %XMM1,-0x38(%RBP) |
(891) 0x47f306 MOV (%R9,%R15,8),%RDI |
(891) 0x47f30a CALL 47e520 <_ZN16miniqmcreference19MultiBsplineEvalRef12evaluate_vghIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_S9_S9_m> |
(891) 0x47f30f LEA 0x1(%R15),%R8 |
(891) 0x47f313 VMOVSD -0x38(%RBP),%XMM1 |
(891) 0x47f318 VMOVSD -0x40(%RBP),%XMM2 |
(891) 0x47f31d CMP %R8D,0x30(%RBX) |
(891) 0x47f321 JLE 47f394 |
(891) 0x47f323 MOV 0x328(%RBX),%RSI |
(891) 0x47f32a MOV 0x310(%RBX),%RDI |
(891) 0x47f331 LEA (%R8,%R8,4),%RDX |
(891) 0x47f335 LEA (%R8,%R8,2),%R11 |
(891) 0x47f339 SAL $0x3,%RDX |
(891) 0x47f33d MOV 0x340(%RBX),%RCX |
(891) 0x47f344 MOV 0x2f8(%RBX),%RAX |
(891) 0x47f34b VMOVQ %R12,%XMM0 |
(891) 0x47f350 MOV 0x18(%RSI,%RDX,1),%R10 |
(891) 0x47f355 LEA (%RDI,%R11,8),%R13 |
(891) 0x47f359 ADD $0x2,%R15 |
(891) 0x47f35d VMOVSD %XMM2,-0x40(%RBP) |
(891) 0x47f362 MOV 0x18(%RCX,%RDX,1),%RCX |
(891) 0x47f367 MOV (%RAX,%R8,8),%RDI |
(891) 0x47f36b VMOVSD %XMM1,-0x38(%RBP) |
(891) 0x47f370 MOV (%R13),%RSI |
(891) 0x47f374 MOVSXD 0x40(%RBX),%R8 |
(891) 0x47f378 MOV %R10,%RDX |
(891) 0x47f37b CALL 47e520 <_ZN16miniqmcreference19MultiBsplineEvalRef12evaluate_vghIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_S9_S9_m> |
(891) 0x47f380 CMP %R15D,0x30(%RBX) |
(891) 0x47f384 VMOVSD -0x38(%RBP),%XMM1 |
(891) 0x47f389 VMOVSD -0x40(%RBP),%XMM2 |
(891) 0x47f38e JG 47f2b9 |
0x47f394 MOV %R14,%RDI |
0x47f397 CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x47f39c MOV 0x30(%RBX),%R14D |
0x47f3a0 MOV %R14D,-0x54(%RBP) |
0x47f3a4 TEST %R14D,%R14D |
0x47f3a7 JLE 47f66a |
0x47f3ad MOVSXD -0x54(%RBP),%R9 |
0x47f3b1 MOV 0x8(%RBX),%R12D |
0x47f3b5 MOVQ $0,-0x48(%RBP) |
0x47f3bd XOR %R14D,%R14D |
0x47f3c0 MOV 0x40(%RBX),%R15D |
0x47f3c4 MOVQ $0,-0x50(%RBP) |
0x47f3cc LEA (%R9,%R9,2),%RDX |
0x47f3d0 LEA (,%R9,8),%R8 |
0x47f3d8 MOV %R12D,-0x58(%RBP) |
0x47f3dc SAL $0x3,%RDX |
0x47f3e0 MOV %R8,-0x60(%RBP) |
0x47f3e4 MOV %RDX,-0x70(%RBP) |
0x47f3e8 MOVL $0,-0x38(%RBP) |
0x47f3ef MOV %R15D,-0x64(%RBP) |
0x47f3f3 MOV %R15D,-0x40(%RBP) |
0x47f3f7 XOR %R15D,%R15D |
0x47f3fa NOPW (%RAX,%RAX,1) |
(889) 0x47f400 MOV -0x40(%RBP),%ECX |
(889) 0x47f403 MOV -0x58(%RBP),%R12D |
(889) 0x47f407 CMP %R12D,%ECX |
(889) 0x47f40a CMOVLE %ECX,%R12D |
(889) 0x47f40e CMP %R12D,-0x38(%RBP) |
(889) 0x47f412 JGE 47f639 |
(889) 0x47f418 MOV 0x310(%RBX),%R10 |
(889) 0x47f41f MOV 0x328(%RBX),%RDI |
(889) 0x47f426 MOV 0x340(%RBX),%R13 |
(889) 0x47f42d MOV -0x38(%RBP),%R9D |
(889) 0x47f431 MOV (%R10,%R14,1),%R11 |
(889) 0x47f435 MOV -0x78(%RBP),%R10 |
(889) 0x47f439 ADD %R15,%RDI |
(889) 0x47f43c MOV 0x18(%RDI),%RSI |
(889) 0x47f440 MOVSXD 0x8(%RDI),%RCX |
(889) 0x47f444 SUB %R9D,%R12D |
(889) 0x47f447 MOV -0x50(%RBP),%RDI |
(889) 0x47f44b MOV 0x18(%R13,%R15,1),%R8 |
(889) 0x47f450 SAL $0x3,%R12 |
(889) 0x47f454 MOV -0x88(%RBP),%R13 |
(889) 0x47f45b MOV 0x18(%R10),%R10 |
(889) 0x47f45f LEA (%RSI,%RCX,8),%R9 |
(889) 0x47f463 SAL $0x4,%RCX |
(889) 0x47f467 MOV -0x80(%RBP),%RAX |
(889) 0x47f46b MOV -0x48(%RBP),%RDX |
(889) 0x47f46f ADD %RSI,%RCX |
(889) 0x47f472 ADD %RDI,%R10 |
(889) 0x47f475 ADD 0x18(%R13),%RDI |
(889) 0x47f479 LEA -0x8(%R12),%R13 |
(889) 0x47f47e SHR $0x3,%R13 |
(889) 0x47f482 ADD 0x18(%RAX),%RDX |
(889) 0x47f486 XOR %EAX,%EAX |
(889) 0x47f488 INC %R13 |
(889) 0x47f48b AND $0x3,%R13D |
(889) 0x47f48f JE 47f54f |
(889) 0x47f495 CMP $0x1,%R13 |
(889) 0x47f499 JE 47f50d |
(889) 0x47f49b CMP $0x2,%R13 |
(889) 0x47f49f JE 47f4d4 |
(889) 0x47f4a1 VMOVSD (%R11),%XMM15 |
(889) 0x47f4a6 ADD $0x18,%RDX |
(889) 0x47f4aa MOV $0x8,%EAX |
(889) 0x47f4af VMOVSD %XMM15,(%R10) |
(889) 0x47f4b4 VMOVSD (%RSI),%XMM0 |
(889) 0x47f4b8 VMOVSD (%RCX),%XMM2 |
(889) 0x47f4bc VMOVHPD (%R9),%XMM0,%XMM1 |
(889) 0x47f4c1 VMOVSD %XMM2,-0x8(%RDX) |
(889) 0x47f4c6 VMOVUPD %XMM1,-0x18(%RDX) |
(889) 0x47f4cb VMOVSD (%R8),%XMM3 |
(889) 0x47f4d0 VMOVSD %XMM3,(%RDI) |
(889) 0x47f4d4 VMOVSD (%R11,%RAX,1),%XMM5 |
(889) 0x47f4da ADD $0x18,%RDX |
(889) 0x47f4de VMOVSD %XMM5,(%R10,%RAX,1) |
(889) 0x47f4e4 VMOVSD (%RSI,%RAX,1),%XMM7 |
(889) 0x47f4e9 VMOVSD (%RCX,%RAX,1),%XMM6 |
(889) 0x47f4ee VMOVHPD (%R9,%RAX,1),%XMM7,%XMM8 |
(889) 0x47f4f4 VMOVSD %XMM6,-0x8(%RDX) |
(889) 0x47f4f9 VMOVUPD %XMM8,-0x18(%RDX) |
(889) 0x47f4fe VMOVSD (%R8,%RAX,1),%XMM9 |
(889) 0x47f504 VMOVSD %XMM9,(%RDI,%RAX,1) |
(889) 0x47f509 ADD $0x8,%RAX |
(889) 0x47f50d VMOVSD (%R11,%RAX,1),%XMM10 |
(889) 0x47f513 ADD $0x18,%RDX |
(889) 0x47f517 VMOVSD %XMM10,(%R10,%RAX,1) |
(889) 0x47f51d VMOVSD (%RSI,%RAX,1),%XMM12 |
(889) 0x47f522 VMOVSD (%RCX,%RAX,1),%XMM11 |
(889) 0x47f527 VMOVHPD (%R9,%RAX,1),%XMM12,%XMM13 |
(889) 0x47f52d VMOVSD %XMM11,-0x8(%RDX) |
(889) 0x47f532 VMOVUPD %XMM13,-0x18(%RDX) |
(889) 0x47f537 VMOVSD (%R8,%RAX,1),%XMM14 |
(889) 0x47f53d VMOVSD %XMM14,(%RDI,%RAX,1) |
(889) 0x47f542 ADD $0x8,%RAX |
(889) 0x47f546 CMP %R12,%RAX |
(889) 0x47f549 JE 47f639 |
(890) 0x47f54f VMOVSD (%R11,%RAX,1),%XMM4 |
(890) 0x47f555 ADD $0x60,%RDX |
(890) 0x47f559 VMOVSD %XMM4,(%R10,%RAX,1) |
(890) 0x47f55f VMOVSD (%RSI,%RAX,1),%XMM2 |
(890) 0x47f564 VMOVSD (%RCX,%RAX,1),%XMM15 |
(890) 0x47f569 VMOVHPD (%R9,%RAX,1),%XMM2,%XMM0 |
(890) 0x47f56f VMOVSD %XMM15,-0x50(%RDX) |
(890) 0x47f574 VMOVUPD %XMM0,-0x60(%RDX) |
(890) 0x47f579 VMOVSD (%R8,%RAX,1),%XMM1 |
(890) 0x47f57f VMOVSD %XMM1,(%RDI,%RAX,1) |
(890) 0x47f584 VMOVSD 0x8(%R11,%RAX,1),%XMM3 |
(890) 0x47f58b VMOVSD %XMM3,0x8(%RAX,%R10,1) |
(890) 0x47f592 VMOVSD 0x8(%RSI,%RAX,1),%XMM6 |
(890) 0x47f598 VMOVSD 0x8(%RCX,%RAX,1),%XMM5 |
(890) 0x47f59e VMOVHPD 0x8(%R9,%RAX,1),%XMM6,%XMM7 |
(890) 0x47f5a5 VMOVSD %XMM5,-0x38(%RDX) |
(890) 0x47f5aa VMOVUPD %XMM7,-0x48(%RDX) |
(890) 0x47f5af VMOVSD 0x8(%R8,%RAX,1),%XMM8 |
(890) 0x47f5b6 VMOVSD %XMM8,0x8(%RAX,%RDI,1) |
(890) 0x47f5bc VMOVSD 0x10(%R11,%RAX,1),%XMM9 |
(890) 0x47f5c3 VMOVSD %XMM9,0x10(%RAX,%R10,1) |
(890) 0x47f5ca VMOVSD 0x10(%RSI,%RAX,1),%XMM11 |
(890) 0x47f5d0 VMOVSD 0x10(%RCX,%RAX,1),%XMM10 |
(890) 0x47f5d6 VMOVHPD 0x10(%R9,%RAX,1),%XMM11,%XMM12 |
(890) 0x47f5dd VMOVSD %XMM10,-0x20(%RDX) |
(890) 0x47f5e2 VMOVUPD %XMM12,-0x30(%RDX) |
(890) 0x47f5e7 VMOVSD 0x10(%R8,%RAX,1),%XMM13 |
(890) 0x47f5ee VMOVSD %XMM13,0x10(%RAX,%RDI,1) |
(890) 0x47f5f4 VMOVSD 0x18(%R11,%RAX,1),%XMM14 |
(890) 0x47f5fb VMOVSD %XMM14,0x18(%RAX,%R10,1) |
(890) 0x47f602 VMOVSD 0x18(%RSI,%RAX,1),%XMM15 |
(890) 0x47f608 VMOVSD 0x18(%RCX,%RAX,1),%XMM4 |
(890) 0x47f60e VMOVHPD 0x18(%R9,%RAX,1),%XMM15,%XMM2 |
(890) 0x47f615 VMOVUPD %XMM2,-0x18(%RDX) |
(890) 0x47f61a VMOVSD %XMM4,-0x8(%RDX) |
(890) 0x47f61f VMOVSD 0x18(%R8,%RAX,1),%XMM0 |
(890) 0x47f626 ADD $0x20,%RAX |
(890) 0x47f62a VMOVSD %XMM0,-0x8(%RAX,%RDI,1) |
(890) 0x47f630 CMP %R12,%RAX |
(890) 0x47f633 JNE 47f54f |
(889) 0x47f639 MOV -0x64(%RBP),%R12D |
(889) 0x47f63d MOV -0x54(%RBP),%R11D |
(889) 0x47f641 ADD $0x18,%R14 |
(889) 0x47f645 ADD $0x28,%R15 |
(889) 0x47f649 MOV -0x60(%RBP),%RSI |
(889) 0x47f64d MOV -0x70(%RBP),%RCX |
(889) 0x47f651 ADD %R12D,-0x40(%RBP) |
(889) 0x47f655 ADD %R11D,-0x38(%RBP) |
(889) 0x47f659 ADD %RSI,-0x50(%RBP) |
(889) 0x47f65d ADD %RCX,-0x48(%RBP) |
(889) 0x47f661 CMP %R14,%RCX |
(889) 0x47f664 JNE 47f400 |
0x47f66a ADD $0x68,%RSP |
0x47f66e POP %RBX |
0x47f66f POP %R12 |
0x47f671 POP %R13 |
0x47f673 POP %R14 |
0x47f675 POP %R15 |
0x47f677 POP %RBP |
0x47f678 RET |
0x47f679 VMOVSD %XMM5,%XMM5,%XMM2 |
0x47f67d JMP 47f246 |
0x47f682 NOPW %CS:(%RAX,%RAX,1) |
0x47f68c NOPL (%RAX) |
Path / |
Source file and lines | einspline_spo_ref.hpp:203-230 |
Module | exec |
nb instructions | 92 |
nb uops | 97 |
loop length | 427 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 15 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.83 | 8.67 | 8.33 | 8.33 | 10.00 | 8.50 | 7.00 | 10.00 | 10.00 | 10.00 | 4.00 | 8.33 |
cycles | 8.83 | 8.67 | 8.33 | 8.33 | 10.00 | 8.50 | 7.00 | 10.00 | 10.00 | 10.00 | 4.00 | 8.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 16.25 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 10.00 |
Overall L1 | 16.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 44% |
load | 40% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 43% |
all | 24% |
load | 28% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 8% |
load | 7% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 18% |
load | 17% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
all | 13% |
load | 14% |
store | 8% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x358(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x128(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R12),%R13D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 47f1f8 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x58> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R13,%R13,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RCX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUNPCKHPD %XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM0,%XMM0,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD 0x100(%RBX),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD132SD 0xe8(%RBX),%XMM3,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x118(%RBX),%XMM4,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VCOMISD 0x10e5b5(%RIP),%XMM2 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JBE 47f23b <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x9b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCOMISD %XMM2,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JA 47f679 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x4d9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x30(%RBX),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R9D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 47f394 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x1f4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPERMILPD $0,%XMM0,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPERMILPD $0x3,%XMM0,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM4,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP 0x10e575(%RIP),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULPD 0xd8(%RBX),%XMM9,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD 0xf0(%RBX),%XMM8,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231PD 0x108(%RBX),%XMM7,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VRNDSCALEPD $0x9,%XMM10,%XMM11 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCMPPD $0x1,%XMM12,%XMM10,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VCMPPD $0xe,%XMM13,%XMM10,%K0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KNOTB %K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %XMM11,%XMM10,%XMM4{%K2}{z} | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM4,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVQ %XMM4,%R12 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM14,%XMM14,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14D,-0x54(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R14D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 47f66a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD -0x54(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RBX),%R15D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R9,%R9,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R9,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12D,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R8,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,-0x64(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
VMOVSD %XMM5,%XMM5,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 47f246 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xa6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:203-230 |
Module | exec |
nb instructions | 92 |
nb uops | 97 |
loop length | 427 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 15 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.83 | 8.67 | 8.33 | 8.33 | 10.00 | 8.50 | 7.00 | 10.00 | 10.00 | 10.00 | 4.00 | 8.33 |
cycles | 8.83 | 8.67 | 8.33 | 8.33 | 10.00 | 8.50 | 7.00 | 10.00 | 10.00 | 10.00 | 4.00 | 8.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 16.25 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 10.00 |
Overall L1 | 16.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 44% |
load | 40% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 43% |
all | 24% |
load | 28% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 8% |
load | 7% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 18% |
load | 17% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
all | 13% |
load | 14% |
store | 8% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x358(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x128(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R12),%R13D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 47f1f8 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x58> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R13,%R13,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RCX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUNPCKHPD %XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM0,%XMM0,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD 0x100(%RBX),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD132SD 0xe8(%RBX),%XMM3,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x118(%RBX),%XMM4,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VCOMISD 0x10e5b5(%RIP),%XMM2 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JBE 47f23b <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x9b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCOMISD %XMM2,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JA 47f679 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x4d9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x30(%RBX),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R9D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 47f394 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x1f4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPERMILPD $0,%XMM0,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPERMILPD $0x3,%XMM0,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM4,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP 0x10e575(%RIP),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULPD 0xd8(%RBX),%XMM9,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD 0xf0(%RBX),%XMM8,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231PD 0x108(%RBX),%XMM7,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VRNDSCALEPD $0x9,%XMM10,%XMM11 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCMPPD $0x1,%XMM12,%XMM10,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VCMPPD $0xe,%XMM13,%XMM10,%K0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KNOTB %K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %XMM11,%XMM10,%XMM4{%K2}{z} | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM4,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVQ %XMM4,%R12 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM14,%XMM14,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14D,-0x54(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R14D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 47f66a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD -0x54(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RBX),%R15D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R9,%R9,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R9,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12D,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R8,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,-0x64(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
VMOVSD %XMM5,%XMM5,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 47f246 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xa6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::einspline_spo_ref | 0.94 | 0.93 |
▼Loop 889 - einspline_spo_ref.hpp:219-227 - exec– | 0.01 | 0.01 |
○Loop 890 - einspline_spo_ref.hpp:223-227 - exec | 0.91 | 0.79 |
○Loop 891 - einspline_spo_ref.hpp:206-207 - exec | 0 | 0 |