Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: exec | Source: einspline_spo_ref.hpp:203-230 [...] | Coverage: 0.4% |
---|
Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: exec | Source: einspline_spo_ref.hpp:203-230 [...] | Coverage: 0.4% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 231 - 271 |
-------------------------------------------------------------------------------- |
231: inline const AoSElement_t operator[](size_t i) const { return AoSElement_t(myData + i, nGhosts); } |
[...] |
265: inline T* data() { return myData; } |
[...] |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 145 - 145 |
-------------------------------------------------------------------------------- |
145: X[i] = base[i * offset]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 229 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_vector.h: 1126 - 1258 |
-------------------------------------------------------------------------------- |
1126: return *(this->_M_impl._M_start + __n); |
[...] |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 203 - 230 |
-------------------------------------------------------------------------------- |
203: ScopedTimer local_timer(timer); |
204: |
205: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
206: for (int i = 0; i < nBlocks; ++i) |
207: MultiBsplineEvalRef::evaluate_vgh(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), |
208: nSplinesPerBlock); |
209: } |
210: |
211: inline void evaluate(const ParticleSet& P, |
[...] |
219: for (int i = 0; i < nBlocks; ++i) |
220: { |
221: // in real simulation, phase needs to be applied. Here just fake computation |
222: const int first = i * nBlocks; |
223: for (int j = first; j < std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize); j++) |
224: { |
225: psi_v[j] = psi[i][j - first]; |
226: dpsi_v[j] = grad[i][j - first]; |
227: d2psi_v[j] = hess[i].data(0)[j - first]; |
228: } |
229: } |
230: } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 173 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
171: val_dot[i] = T1(0.0); |
172: else |
173: val_dot[i] -= std::floor(val_dot[i]); |
0x4825d0 PUSH %RBP |
0x4825d1 MOV %RSP,%RBP |
0x4825d4 PUSH %R15 |
0x4825d6 PUSH %R14 |
0x4825d8 PUSH %R13 |
0x4825da PUSH %R12 |
0x4825dc PUSH %RBX |
0x4825dd MOV %RSI,%R13 |
0x4825e0 MOV %RDI,%RBX |
0x4825e3 SUB $0x48,%RSP |
0x4825e7 MOV 0x358(%RDI),%R12 |
0x4825ee MOV %RCX,-0x60(%RBP) |
0x4825f2 MOVSXD %EDX,%R15 |
0x4825f5 MOV %R8,-0x68(%RBP) |
0x4825f9 MOV %R9,-0x70(%RBP) |
0x4825fd MOV %R12,%RDI |
0x482600 CALL 51ed20 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x482605 LEA 0x128(%R13),%RCX |
0x48260c CMP 0x124(%R13),%R15D |
0x482613 JE 482621 |
0x482615 MOV 0x40(%R13),%RAX |
0x482619 LEA (%R15,%R15,2),%RDX |
0x48261d LEA (%RAX,%RDX,8),%RCX |
0x482621 VMOVUPD (%RCX),%XMM1 |
0x482625 VMOVSD 0x10(%RCX),%XMM4 |
0x48262a VMOVSD %XMM1,%XMM1,%XMM0 |
0x48262e VUNPCKHPD %XMM1,%XMM1,%XMM2 |
0x482632 VMULSD 0x100(%RBX),%XMM2,%XMM3 |
0x48263a VFMADD132SD 0xe8(%RBX),%XMM3,%XMM0 |
0x482643 VFMADD231SD 0x118(%RBX),%XMM4,%XMM0 |
0x48264c VCOMISD 0x12248c(%RIP),%XMM0 |
0x482654 JBE 482664 |
0x482656 VXORPD %XMM5,%XMM5,%XMM5 |
0x48265a VCOMISD %XMM0,%XMM5 |
0x48265e JA 482a8d |
0x482664 VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM6 |
0x48266b VSUBSD %XMM6,%XMM0,%XMM7 |
0x48266f MOV 0x30(%RBX),%R8D |
0x482673 TEST %R8D,%R8D |
0x482676 JLE 4827a9 |
0x48267c VPERMILPD $0,%XMM1,%XMM10 |
0x482682 VMULPD 0xd8(%RBX),%XMM10,%XMM11 |
0x48268a VPERMILPD $0x3,%XMM1,%XMM9 |
0x482690 VMOVDDUP %XMM4,%XMM8 |
0x482694 VFMADD231PD 0xf0(%RBX),%XMM9,%XMM11 |
0x48269d VXORPD %XMM13,%XMM13,%XMM13 |
0x4826a2 VFMADD231PD 0x108(%RBX),%XMM8,%XMM11 |
0x4826ab VCMPPD $0x1,%XMM13,%XMM11,%K1 |
0x4826b2 VRNDSCALEPD $0x9,%XMM11,%XMM12 |
0x4826b9 VMOVDDUP 0x12241f(%RIP),%XMM14 |
0x4826c1 VMOVSD %XMM7,-0x48(%RBP) |
0x4826c6 VCMPPD $0xe,%XMM14,%XMM11,%K0{%K1} |
0x4826cd KNOTB %K0,%K2 |
0x4826d1 XOR %R14D,%R14D |
0x4826d4 VSUBPD %XMM12,%XMM11,%XMM15{%K2}{z} |
0x4826da VMOVHPD %XMM15,-0x38(%RBP) |
0x4826df VMOVLPD %XMM15,-0x40(%RBP) |
(900) 0x4826e4 MOV 0x310(%RBX),%R11 |
(900) 0x4826eb MOV 0x340(%RBX),%RDI |
(900) 0x4826f2 MOV 0x328(%RBX),%R9 |
(900) 0x4826f9 LEA (%R14,%R14,4),%RSI |
(900) 0x4826fd MOV 0x2f8(%RBX),%R15 |
(900) 0x482704 LEA (%R14,%R14,2),%R10 |
(900) 0x482708 MOVSXD 0x40(%RBX),%R8 |
(900) 0x48270c VMOVSD -0x48(%RBP),%XMM2 |
(900) 0x482711 VMOVSD -0x38(%RBP),%XMM1 |
(900) 0x482716 VMOVSD -0x40(%RBP),%XMM0 |
(900) 0x48271b SAL $0x3,%RSI |
(900) 0x48271f LEA (%R11,%R10,8),%R13 |
(900) 0x482723 MOV 0x18(%RDI,%RSI,1),%RCX |
(900) 0x482728 MOV 0x18(%R9,%RSI,1),%RDX |
(900) 0x48272d MOV (%R15,%R14,8),%RDI |
(900) 0x482731 MOV (%R13),%RSI |
(900) 0x482735 CALL 481910 <_ZN16miniqmcreference19MultiBsplineEvalRef12evaluate_vghIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_S9_S9_m> |
(900) 0x48273a LEA 0x1(%R14),%RAX |
(900) 0x48273e CMP %EAX,0x30(%RBX) |
(900) 0x482741 JLE 4827a9 |
(900) 0x482743 MOV 0x328(%RBX),%R8 |
(900) 0x48274a MOV 0x310(%RBX),%RSI |
(900) 0x482751 LEA (%RAX,%RAX,4),%RDX |
(900) 0x482755 LEA (%RAX,%RAX,2),%RDI |
(900) 0x482759 MOV 0x340(%RBX),%RCX |
(900) 0x482760 MOV 0x2f8(%RBX),%R11 |
(900) 0x482767 VMOVSD -0x48(%RBP),%XMM2 |
(900) 0x48276c VMOVSD -0x38(%RBP),%XMM1 |
(900) 0x482771 VMOVSD -0x40(%RBP),%XMM0 |
(900) 0x482776 ADD $0x2,%R14 |
(900) 0x48277a SAL $0x3,%RDX |
(900) 0x48277e LEA (%RSI,%RDI,8),%R10 |
(900) 0x482782 MOV 0x18(%R8,%RDX,1),%R9 |
(900) 0x482787 MOVSXD 0x40(%RBX),%R8 |
(900) 0x48278b MOV 0x18(%RCX,%RDX,1),%RCX |
(900) 0x482790 MOV (%R10),%RSI |
(900) 0x482793 MOV (%R11,%RAX,8),%RDI |
(900) 0x482797 MOV %R9,%RDX |
(900) 0x48279a CALL 481910 <_ZN16miniqmcreference19MultiBsplineEvalRef12evaluate_vghIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_S9_S9_m> |
(900) 0x48279f CMP %R14D,0x30(%RBX) |
(900) 0x4827a3 JG 4826e4 |
0x4827a9 MOV %R12,%RDI |
0x4827ac CALL 51ef50 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x4827b1 MOV 0x30(%RBX),%R12D |
0x4827b5 MOV %R12D,-0x4c(%RBP) |
0x4827b9 TEST %R12D,%R12D |
0x4827bc JLE 482a7e |
0x4827c2 MOVSXD -0x4c(%RBP),%R14 |
0x4827c6 VMOVD 0x40(%RBX),%XMM3 |
0x4827cb VMOVD 0x8(%RBX),%XMM2 |
0x4827d0 XOR %R10D,%R10D |
0x4827d3 VMOVDQA %XMM3,%XMM7 |
0x4827d7 XOR %R11D,%R11D |
0x4827da MOVQ $0,-0x40(%RBP) |
0x4827e2 MOVQ $0,-0x48(%RBP) |
0x4827ea MOVL $0,-0x38(%RBP) |
0x4827f1 LEA (,%R14,8),%R13 |
0x4827f9 LEA (%R14,%R14,2),%R15 |
0x4827fd LEA (,%R15,8),%RAX |
0x482805 VMOVQ %R13,%XMM1 |
0x48280a MOV %RAX,-0x58(%RBP) |
0x48280e XCHG %AX,%AX |
(898) 0x482810 VPMINSD %XMM2,%XMM7,%XMM0 |
(898) 0x482815 VMOVD %XMM0,%ESI |
(898) 0x482819 CMP %ESI,-0x38(%RBP) |
(898) 0x48281c JGE 482a52 |
(898) 0x482822 MOV -0x48(%RBP),%R8 |
(898) 0x482826 MOV 0x340(%RBX),%RDI |
(898) 0x48282d MOV 0x328(%RBX),%RCX |
(898) 0x482834 XOR %EAX,%EAX |
(898) 0x482836 MOV -0x38(%RBP),%R13D |
(898) 0x48283a MOV 0x310(%RBX),%RDX |
(898) 0x482841 MOV -0x68(%RBP),%R14 |
(898) 0x482845 ADD %R8,%RCX |
(898) 0x482848 MOV 0x18(%RDI,%R8,1),%R15 |
(898) 0x48284d MOV -0x70(%RBP),%RDI |
(898) 0x482851 MOV -0x60(%RBP),%R8 |
(898) 0x482855 MOV 0x18(%RCX),%R12 |
(898) 0x482859 SUB %R13D,%ESI |
(898) 0x48285c MOVSXD 0x8(%RCX),%RCX |
(898) 0x482860 MOV (%RDX,%R10,1),%R9 |
(898) 0x482864 MOV -0x40(%RBP),%RDX |
(898) 0x482868 ADD 0x18(%R14),%RDX |
(898) 0x48286c SAL $0x3,%RSI |
(898) 0x482870 MOV 0x18(%RDI),%R14 |
(898) 0x482874 MOV 0x18(%R8),%R8 |
(898) 0x482878 LEA -0x8(%RSI),%RDI |
(898) 0x48287c LEA (%R12,%RCX,8),%R13 |
(898) 0x482880 SHR $0x3,%RDI |
(898) 0x482884 SAL $0x4,%RCX |
(898) 0x482888 INC %RDI |
(898) 0x48288b ADD %R12,%RCX |
(898) 0x48288e ADD %R11,%R8 |
(898) 0x482891 ADD %R11,%R14 |
(898) 0x482894 AND $0x3,%EDI |
(898) 0x482897 JE 482960 |
(898) 0x48289d CMP $0x1,%RDI |
(898) 0x4828a1 JE 48291c |
(898) 0x4828a3 CMP $0x2,%RDI |
(898) 0x4828a7 JE 4828e0 |
(898) 0x4828a9 VMOVSD (%R9),%XMM4 |
(898) 0x4828ae VMOVSD %XMM4,(%R8) |
(898) 0x4828b3 VMOVSD (%R12),%XMM6 |
(898) 0x4828b9 VMOVHPD (%R13),%XMM6,%XMM8 |
(898) 0x4828bf VMOVSD (%RCX),%XMM5 |
(898) 0x4828c3 ADD $0x18,%RDX |
(898) 0x4828c7 MOV $0x8,%EAX |
(898) 0x4828cc VMOVUPD %XMM8,-0x18(%RDX) |
(898) 0x4828d1 VMOVSD %XMM5,-0x8(%RDX) |
(898) 0x4828d6 VMOVSD (%R15),%XMM9 |
(898) 0x4828db VMOVSD %XMM9,(%R14) |
(898) 0x4828e0 VMOVSD (%R9,%RAX,1),%XMM10 |
(898) 0x4828e6 VMOVSD %XMM10,(%R8,%RAX,1) |
(898) 0x4828ec VMOVSD (%R12,%RAX,1),%XMM12 |
(898) 0x4828f2 VMOVHPD (%R13,%RAX,1),%XMM12,%XMM13 |
(898) 0x4828f9 VMOVSD (%RCX,%RAX,1),%XMM11 |
(898) 0x4828fe ADD $0x18,%RDX |
(898) 0x482902 VMOVUPD %XMM13,-0x18(%RDX) |
(898) 0x482907 VMOVSD %XMM11,-0x8(%RDX) |
(898) 0x48290c VMOVSD (%R15,%RAX,1),%XMM14 |
(898) 0x482912 VMOVSD %XMM14,(%R14,%RAX,1) |
(898) 0x482918 ADD $0x8,%RAX |
(898) 0x48291c VMOVSD (%R9,%RAX,1),%XMM15 |
(898) 0x482922 VMOVSD %XMM15,(%R8,%RAX,1) |
(898) 0x482928 VMOVSD (%R12,%RAX,1),%XMM4 |
(898) 0x48292e VMOVHPD (%R13,%RAX,1),%XMM4,%XMM5 |
(898) 0x482935 VMOVSD (%RCX,%RAX,1),%XMM0 |
(898) 0x48293a VMOVUPD %XMM5,(%RDX) |
(898) 0x48293e VMOVSD %XMM0,0x10(%RDX) |
(898) 0x482943 VMOVSD (%R15,%RAX,1),%XMM6 |
(898) 0x482949 VMOVSD %XMM6,(%R14,%RAX,1) |
(898) 0x48294f ADD $0x8,%RAX |
(898) 0x482953 ADD $0x18,%RDX |
(898) 0x482957 CMP %RAX,%RSI |
(898) 0x48295a JE 482a52 |
(899) 0x482960 VMOVSD (%R9,%RAX,1),%XMM8 |
(899) 0x482966 VMOVSD %XMM8,(%R8,%RAX,1) |
(899) 0x48296c VMOVSD (%R12,%RAX,1),%XMM10 |
(899) 0x482972 VMOVHPD (%R13,%RAX,1),%XMM10,%XMM11 |
(899) 0x482979 VMOVSD (%RCX,%RAX,1),%XMM9 |
(899) 0x48297e VMOVUPD %XMM11,(%RDX) |
(899) 0x482982 VMOVSD %XMM9,0x10(%RDX) |
(899) 0x482987 VMOVSD (%R15,%RAX,1),%XMM12 |
(899) 0x48298d VMOVSD %XMM12,(%R14,%RAX,1) |
(899) 0x482993 VMOVSD 0x8(%R9,%RAX,1),%XMM13 |
(899) 0x48299a VMOVSD %XMM13,0x8(%RAX,%R8,1) |
(899) 0x4829a1 VMOVSD 0x8(%R12,%RAX,1),%XMM15 |
(899) 0x4829a8 VMOVHPD 0x8(%R13,%RAX,1),%XMM15,%XMM0 |
(899) 0x4829af VMOVSD 0x8(%RCX,%RAX,1),%XMM14 |
(899) 0x4829b5 VMOVSD %XMM14,0x28(%RDX) |
(899) 0x4829ba ADD $0x60,%RDX |
(899) 0x4829be VMOVUPD %XMM0,-0x48(%RDX) |
(899) 0x4829c3 VMOVSD 0x8(%R15,%RAX,1),%XMM4 |
(899) 0x4829ca VMOVSD %XMM4,0x8(%RAX,%R14,1) |
(899) 0x4829d1 VMOVSD 0x10(%R9,%RAX,1),%XMM5 |
(899) 0x4829d8 VMOVSD %XMM5,0x10(%RAX,%R8,1) |
(899) 0x4829df VMOVSD 0x10(%R12,%RAX,1),%XMM6 |
(899) 0x4829e6 VMOVHPD 0x10(%R13,%RAX,1),%XMM6,%XMM9 |
(899) 0x4829ed VMOVSD 0x10(%RCX,%RAX,1),%XMM8 |
(899) 0x4829f3 VMOVUPD %XMM9,-0x30(%RDX) |
(899) 0x4829f8 VMOVSD %XMM8,-0x20(%RDX) |
(899) 0x4829fd VMOVSD 0x10(%R15,%RAX,1),%XMM10 |
(899) 0x482a04 VMOVSD %XMM10,0x10(%RAX,%R14,1) |
(899) 0x482a0b VMOVSD 0x18(%R9,%RAX,1),%XMM11 |
(899) 0x482a12 VMOVSD %XMM11,0x18(%RAX,%R8,1) |
(899) 0x482a19 VMOVSD 0x18(%R12,%RAX,1),%XMM13 |
(899) 0x482a20 VMOVHPD 0x18(%R13,%RAX,1),%XMM13,%XMM14 |
(899) 0x482a27 VMOVSD 0x18(%RCX,%RAX,1),%XMM12 |
(899) 0x482a2d VMOVUPD %XMM14,-0x18(%RDX) |
(899) 0x482a32 VMOVSD %XMM12,-0x8(%RDX) |
(899) 0x482a37 VMOVSD 0x18(%R15,%RAX,1),%XMM15 |
(899) 0x482a3e ADD $0x20,%RAX |
(899) 0x482a42 VMOVSD %XMM15,-0x8(%RAX,%R14,1) |
(899) 0x482a49 CMP %RAX,%RSI |
(899) 0x482a4c JNE 482960 |
(898) 0x482a52 MOV -0x58(%RBP),%R12 |
(898) 0x482a56 MOV -0x4c(%RBP),%ESI |
(898) 0x482a59 VMOVQ %XMM1,%R9 |
(898) 0x482a5e ADD $0x18,%R10 |
(898) 0x482a62 VPADDD %XMM3,%XMM7,%XMM7 |
(898) 0x482a66 ADD %R9,%R11 |
(898) 0x482a69 ADD %ESI,-0x38(%RBP) |
(898) 0x482a6c ADDQ $0x28,-0x48(%RBP) |
(898) 0x482a71 ADD %R12,-0x40(%RBP) |
(898) 0x482a75 CMP %R10,%R12 |
(898) 0x482a78 JNE 482810 |
0x482a7e ADD $0x48,%RSP |
0x482a82 POP %RBX |
0x482a83 POP %R12 |
0x482a85 POP %R13 |
0x482a87 POP %R14 |
0x482a89 POP %R15 |
0x482a8b POP %RBP |
0x482a8c RET |
0x482a8d VMOVSD %XMM5,%XMM5,%XMM7 |
0x482a91 JMP 48266f |
0x482a96 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | einspline_spo_ref.hpp:203-230 |
Module | exec |
nb instructions | 89 |
nb uops | 92 |
loop length | 413 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 8 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 15.33 cycles |
front end | 15.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.75 | 4.75 | 4.50 | 4.50 | 4.50 | 10.33 | 10.33 | 10.33 | 5.50 | 5.58 | 5.50 | 5.42 | 2.50 | 2.50 |
cycles | 4.75 | 4.75 | 4.50 | 4.50 | 4.50 | 10.33 | 10.33 | 10.33 | 5.50 | 5.58 | 5.50 | 5.42 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 15.33 |
Dispatch | 10.33 |
Overall L1 | 15.33 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 42% |
load | 40% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 27% |
load | 25% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 38% |
all | 9% |
load | 7% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 17% |
load | 17% |
store | 12% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 14% |
load | 13% |
store | 10% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x358(%RDI),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %EDX,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 51ed20 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x128(%R13),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP 0x124(%R13),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 482621 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x51> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x40(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%R15,%R15,2),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RDX,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVUPD (%RCX),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x10(%RCX),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM1,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VUNPCKHPD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VMULSD 0x100(%RBX),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD 0xe8(%RBX),%XMM3,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD 0x118(%RBX),%XMM4,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD 0x12248c(%RIP),%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 |
JBE 482664 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x94> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCOMISD %XMM0,%XMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 |
JA 482a8d <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x4bd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VSUBSD %XMM6,%XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x30(%RBX),%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %R8D,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 4827a9 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x1d9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPERMILPD $0,%XMM1,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VMULPD 0xd8(%RBX),%XMM10,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPERMILPD $0x3,%XMM1,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVDDUP %XMM4,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VFMADD231PD 0xf0(%RBX),%XMM9,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD 0x108(%RBX),%XMM8,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%XMM13,%XMM11,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VRNDSCALEPD $0x9,%XMM11,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVDDUP 0x12241f(%RIP),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM7,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VCMPPD $0xe,%XMM14,%XMM11,%K0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KNOTB %K0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VSUBPD %XMM12,%XMM11,%XMM15{%K2}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVHPD %XMM15,-0x38(%RBP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 9-11 | 1 |
VMOVLPD %XMM15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 51ef50 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RBX),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R12D,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %R12D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 482a7e <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x4ae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOVSXD -0x4c(%RBP),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVD 0x40(%RBX),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 |
VMOVD 0x8(%RBX),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQA %XMM3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVQ $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (,%R14,8),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R14,%R14,2),%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (,%R15,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVQ %R13,%XMM1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM5,%XMM5,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
JMP 48266f <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x9f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | einspline_spo_ref.hpp:203-230 |
Module | exec |
nb instructions | 89 |
nb uops | 92 |
loop length | 413 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 8 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 15.33 cycles |
front end | 15.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.75 | 4.75 | 4.50 | 4.50 | 4.50 | 10.33 | 10.33 | 10.33 | 5.50 | 5.58 | 5.50 | 5.42 | 2.50 | 2.50 |
cycles | 4.75 | 4.75 | 4.50 | 4.50 | 4.50 | 10.33 | 10.33 | 10.33 | 5.50 | 5.58 | 5.50 | 5.42 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 15.33 |
Dispatch | 10.33 |
Overall L1 | 15.33 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 42% |
load | 40% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 27% |
load | 25% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 38% |
all | 9% |
load | 7% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 17% |
load | 17% |
store | 12% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 14% |
load | 13% |
store | 10% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x358(%RDI),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %EDX,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 51ed20 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x128(%R13),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP 0x124(%R13),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 482621 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x51> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x40(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%R15,%R15,2),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%RDX,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVUPD (%RCX),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x10(%RCX),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM1,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VUNPCKHPD %XMM1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VMULSD 0x100(%RBX),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD 0xe8(%RBX),%XMM3,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD 0x118(%RBX),%XMM4,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD 0x12248c(%RIP),%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 |
JBE 482664 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x94> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCOMISD %XMM0,%XMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 |
JA 482a8d <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x4bd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VSUBSD %XMM6,%XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOV 0x30(%RBX),%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %R8D,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 4827a9 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x1d9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPERMILPD $0,%XMM1,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VMULPD 0xd8(%RBX),%XMM10,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPERMILPD $0x3,%XMM1,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVDDUP %XMM4,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VFMADD231PD 0xf0(%RBX),%XMM9,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD 0x108(%RBX),%XMM8,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%XMM13,%XMM11,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VRNDSCALEPD $0x9,%XMM11,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVDDUP 0x12241f(%RIP),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM7,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VCMPPD $0xe,%XMM14,%XMM11,%K0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KNOTB %K0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VSUBPD %XMM12,%XMM11,%XMM15{%K2}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVHPD %XMM15,-0x38(%RBP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 9-11 | 1 |
VMOVLPD %XMM15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 51ef50 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RBX),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R12D,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %R12D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 482a7e <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x4ae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOVSXD -0x4c(%RBP),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVD 0x40(%RBX),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 |
VMOVD 0x8(%RBX),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQA %XMM3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVQ $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (,%R14,8),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R14,%R14,2),%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (,%R15,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVQ %R13,%XMM1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM5,%XMM5,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
JMP 48266f <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x9f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_– | 0.4 | 0.82 |
▼Loop 898 - einspline_spo_ref.hpp:219-227 - exec– | 0 | 0.01 |
○Loop 899 - einspline_spo_ref.hpp:223-227 - exec | 0.39 | 0.73 |
○Loop 900 - einspline_spo_ref.hpp:206-207 - exec | 0 | 0.01 |