Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 26.95% |
---|
Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 26.95% |
---|
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/Tensor.h: 213 - 213 |
-------------------------------------------------------------------------------- |
213: inline Type_t operator[](unsigned int i) const { return X[i]; } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_algobase.h: 200 - 696 |
-------------------------------------------------------------------------------- |
200: if (__b < __a) |
[...] |
366: const ptrdiff_t _Num = __last - __first; |
367: if (_Num) |
368: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
695: for (; __first != __last; ++__first) |
696: *__first = __tmp; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 170 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 61 - 61 |
-------------------------------------------------------------------------------- |
61: for (size_t d = 0; d < D; ++d) |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_vector.h: 933 - 1056 |
-------------------------------------------------------------------------------- |
933: return *(this->_M_impl._M_start + __n); |
[...] |
1056: { return _M_data_ptr(this->_M_impl._M_start); } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 45 - 47 |
-------------------------------------------------------------------------------- |
45: T sf = std::floor(x); |
46: T dx2 = x - sf; |
47: int ind2 = std::min(std::max(0, static_cast<int>(sf)), nmax); |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_algo.h: 782 - 782 |
-------------------------------------------------------------------------------- |
782: { return std::copy(__first, __first + __n, __result); } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
0x434dc0 PUSH %RBP |
0x434dc1 MOV %RSP,%RBP |
0x434dc4 PUSH %R15 |
0x434dc6 PUSH %R14 |
0x434dc8 PUSH %R13 |
0x434dca PUSH %R12 |
0x434dcc PUSH %RBX |
0x434dcd SUB $0xf8,%RSP |
0x434dd4 MOV %RCX,-0x70(%RBP) |
0x434dd8 MOV %EDX,%R12D |
0x434ddb MOV %RSI,%R13 |
0x434dde MOV %RDI,%RBX |
0x434de1 MOV 0x358(%RDI),%RDI |
0x434de8 MOV %RDI,-0x78(%RBP) |
0x434dec CALL 481d70 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x434df1 MOVSXD %R12D,%RCX |
0x434df4 LEA (%RCX,%RCX,2),%RAX |
0x434df8 SAL $0x3,%RAX |
0x434dfc ADD 0x40(%R13),%RAX |
0x434e00 LEA 0x128(%R13),%RDX |
0x434e07 CMP %ECX,0x124(%R13) |
0x434e0e CMOVE %RDX,%RAX |
0x434e12 MOV %RBX,-0x48(%RBP) |
0x434e16 MOV 0x30(%RBX),%ECX |
0x434e19 MOV %RCX,-0x50(%RBP) |
0x434e1d TEST %ECX,%ECX |
0x434e1f JLE 435408 |
0x434e25 MOV -0x48(%RBP),%RCX |
0x434e29 VMOVSD 0xf0(%RCX),%XMM0 |
0x434e31 VMOVUPD 0xe0(%RCX),%XMM1 |
0x434e39 VMOVUPD (%RAX),%XMM2 |
0x434e3d VMULSD 0xd8(%RCX),%XMM2,%XMM3 |
0x434e45 VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 |
0x434e4b VMOVDDUP 0x10(%RAX),%XMM0 |
0x434e50 VFMADD231SD 0x108(%RCX),%XMM0,%XMM3 |
0x434e59 VMOVSD 0xf8(%RCX),%XMM4 |
0x434e61 VSHUFPD $0x1,%XMM2,%XMM2,%XMM5 |
0x434e66 VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 |
0x434e6e VMULPD %XMM5,%XMM4,%XMM4 |
0x434e72 VMOVHPD 0x100(%RCX),%XMM1,%XMM1 |
0x434e7a VFMADD213PD %XMM4,%XMM2,%XMM1 |
0x434e7f VFMADD231PD 0x110(%RCX),%XMM0,%XMM1 |
0x434e88 VMOVSD 0xb14c0(%RIP),%XMM0 |
0x434e90 VCMPPD $0x1,%XMM3,%XMM0,%K1 |
0x434e97 VXORPD %XMM2,%XMM2,%XMM2 |
0x434e9b VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 |
0x434ea1 VCMPPD $0x1,%XMM2,%XMM3,%K1{%K1} |
0x434ea8 VSUBSD %XMM4,%XMM3,%XMM3 |
0x434eac VMOVSD %XMM2,%XMM3,%XMM3{%K1} |
0x434eb2 VMOVUPD %XMM3,-0xe0(%RBP) |
0x434eba VFPCLASSPD $0x50,%XMM1,%K1 |
0x434ec1 VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 |
0x434ec7 VSUBSD %XMM3,%XMM1,%XMM17 |
0x434ecd VMOVAPD %XMM17,%XMM3 |
0x434ed3 VMOVSD %XMM2,%XMM3,%XMM3{%K1} |
0x434ed9 KMOVD %K1,%EAX |
0x434edd AND $0x2,%AL |
0x434edf SHR $0x1,%AL |
0x434ee1 VCMPSD $0x1,%XMM1,%XMM0,%K1 |
0x434ee8 VMOVSD %XMM3,%XMM17,%XMM17{%K1} |
0x434eee VSHUFPD $0x1,%XMM1,%XMM1,%XMM1 |
0x434ef3 VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 |
0x434ef9 VSUBSD %XMM3,%XMM1,%XMM18 |
0x434eff KMOVD %EAX,%K1 |
0x434f03 VMOVAPD %XMM18,%XMM3 |
0x434f09 VMOVSD %XMM2,%XMM3,%XMM3{%K1} |
0x434f0f VCMPSD $0x1,%XMM1,%XMM0,%K1 |
0x434f16 VMOVSD %XMM3,%XMM18,%XMM18{%K1} |
0x434f1c MOV 0x2f8(%RCX),%RAX |
0x434f23 MOV %RAX,-0x88(%RBP) |
0x434f2a MOV 0x310(%RCX),%RAX |
0x434f31 MOV %RAX,-0x80(%RBP) |
0x434f35 MOVSXD 0x40(%RCX),%RBX |
0x434f39 LEA (,%RBX,8),%RDX |
0x434f41 CMP $0x1,%RBX |
0x434f45 MOV %RBX,%R15 |
0x434f48 ADC $0,%R15 |
0x434f4c DECQ -0x50(%RBP) |
0x434f50 MOV %R15,%RAX |
0x434f53 SHR $0x1,%RAX |
0x434f56 MOV %RAX,-0xc8(%RBP) |
0x434f5d MOV %R15,-0xd0(%RBP) |
0x434f64 AND $-0x2,%R15 |
0x434f68 XOR %ECX,%ECX |
0x434f6a VMOVSD 0xb13e4(%RIP),%XMM19 |
0x434f74 VMOVSD 0xae65a(%RIP),%XMM20 |
0x434f7e VMOVDDUP 0xae650(%RIP),%XMM16 |
0x434f88 VMOVUPD 0xb14ce(%RIP),%XMM21 |
0x434f92 VMOVUPD 0xb14d4(%RIP),%XMM22 |
0x434f9c MOV %RDX,-0x58(%RBP) |
0x434fa0 JMP 434fcc |
0x434fa2 NOPW %CS:(%RAX,%RAX,1) |
(765) 0x434fb0 MOV -0x90(%RBP),%RCX |
(765) 0x434fb7 LEA 0x1(%RCX),%RAX |
(765) 0x434fbb CMP -0x50(%RBP),%RCX |
(765) 0x434fbf MOV %RAX,%RCX |
(765) 0x434fc2 MOV -0x58(%RBP),%RDX |
(765) 0x434fc6 JE 435408 |
(765) 0x434fcc MOV -0x88(%RBP),%RAX |
(765) 0x434fd3 MOV (%RAX,%RCX,8),%R14 |
(765) 0x434fd7 MOV %RCX,-0x90(%RBP) |
(765) 0x434fde LEA (%RCX,%RCX,2),%RAX |
(765) 0x434fe2 VMOVUPD -0xe0(%RBP),%XMM0 |
(765) 0x434fea VSUBSD 0x28(%R14),%XMM0,%XMM0 |
(765) 0x434ff0 VSUBSD 0x50(%R14),%XMM17,%XMM1 |
(765) 0x434ff7 MOV -0x80(%RBP),%RCX |
(765) 0x434ffb MOV (%RCX,%RAX,8),%R13 |
(765) 0x434fff VSUBSD 0x78(%R14),%XMM18,%XMM2 |
(765) 0x435006 VMULSD 0x48(%R14),%XMM0,%XMM0 |
(765) 0x43500c MOVSXD 0x38(%R14),%R12 |
(765) 0x435010 VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM28 |
(765) 0x435017 VSUBSD %XMM28,%XMM0,%XMM0 |
(765) 0x43501d VMULSD 0x70(%R14),%XMM1,%XMM1 |
(765) 0x435023 MOVSXD 0x60(%R14),%R8 |
(765) 0x435027 VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM29 |
(765) 0x43502e VMULSD 0x98(%R14),%XMM2,%XMM2 |
(765) 0x435037 VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM30 |
(765) 0x43503e MOVSXD 0x88(%R14),%R11 |
(765) 0x435045 VMULSD %XMM19,%XMM0,%XMM3 |
(765) 0x43504b VSUBSD %XMM3,%XMM20,%XMM4 |
(765) 0x435051 VMULSD %XMM0,%XMM0,%XMM5 |
(765) 0x435055 VMOVDDUP %XMM0,%XMM6 |
(765) 0x435059 VMOVAPD %XMM16,%XMM8 |
(765) 0x43505f VBLENDPD $0x1,%XMM4,%XMM8,%XMM4 |
(765) 0x435065 VMULPD %XMM4,%XMM6,%XMM4 |
(765) 0x435069 VADDPD %XMM21,%XMM4,%XMM7 |
(765) 0x43506f VPUNPCKLQDQ %XMM5,%XMM6,%XMM6 |
(765) 0x435073 VFMADD213PD %XMM22,%XMM7,%XMM6 |
(765) 0x435079 VMOVUPD %XMM6,-0x120(%RBP) |
(765) 0x435081 VSHUFPD $0x1,%XMM4,%XMM4,%XMM4 |
(765) 0x435086 VSUBSD %XMM4,%XMM20,%XMM4 |
(765) 0x43508c VFMADD213SD %XMM20,%XMM0,%XMM4 |
(765) 0x435092 VFMADD213SD %XMM19,%XMM0,%XMM4 |
(765) 0x435098 VMOVSD %XMM4,-0x110(%RBP) |
(765) 0x4350a0 VSUBSD %XMM29,%XMM1,%XMM0 |
(765) 0x4350a6 VMULSD %XMM5,%XMM3,%XMM1 |
(765) 0x4350aa VMOVSD %XMM1,-0x108(%RBP) |
(765) 0x4350b2 VMULSD %XMM19,%XMM0,%XMM1 |
(765) 0x4350b8 VSUBSD %XMM1,%XMM20,%XMM3 |
(765) 0x4350be VMULSD %XMM0,%XMM0,%XMM4 |
(765) 0x4350c2 VMOVDDUP %XMM0,%XMM5 |
(765) 0x4350c6 VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 |
(765) 0x4350cc VMULPD %XMM3,%XMM5,%XMM3 |
(765) 0x4350d0 VADDPD %XMM21,%XMM3,%XMM6 |
(765) 0x4350d6 VPUNPCKLQDQ %XMM4,%XMM5,%XMM5 |
(765) 0x4350da VFMADD213PD %XMM22,%XMM6,%XMM5 |
(765) 0x4350e0 VMOVUPD %XMM5,-0x100(%RBP) |
(765) 0x4350e8 VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 |
(765) 0x4350ed VSUBSD %XMM3,%XMM20,%XMM3 |
(765) 0x4350f3 VFMADD213SD %XMM20,%XMM0,%XMM3 |
(765) 0x4350f9 VFMADD213SD %XMM19,%XMM0,%XMM3 |
(765) 0x4350ff VMOVSD %XMM3,-0xf0(%RBP) |
(765) 0x435107 VMULSD %XMM4,%XMM1,%XMM0 |
(765) 0x43510b VMOVSD %XMM0,-0xe8(%RBP) |
(765) 0x435113 VSUBSD %XMM30,%XMM2,%XMM23 |
(765) 0x435119 VMULSD %XMM19,%XMM23,%XMM31 |
(765) 0x43511f VSUBSD %XMM31,%XMM20,%XMM26 |
(765) 0x435125 VFMADD213SD 0xae4c9(%RIP),%XMM23,%XMM26 |
(765) 0x43512f VMULSD %XMM20,%XMM23,%XMM25 |
(765) 0x435135 VADDSD 0xb1221(%RIP),%XMM25,%XMM27 |
(765) 0x43513f VMULSD %XMM23,%XMM23,%XMM24 |
(765) 0x435145 VFMADD213SD 0xb1219(%RIP),%XMM24,%XMM27 |
(765) 0x43514f MOV 0x10(%R14),%RAX |
(765) 0x435153 MOV %RAX,-0x60(%RBP) |
(765) 0x435157 MOV 0x18(%R14),%RAX |
(765) 0x43515b MOV %RAX,-0x68(%RBP) |
(765) 0x43515f MOV 0x20(%R14),%R10 |
(765) 0x435163 TEST %RDX,%RDX |
(765) 0x435166 JE 43518e |
(765) 0x435168 MOV %R13,%RDI |
(765) 0x43516b XOR %ESI,%ESI |
(765) 0x43516d MOV -0x58(%RBP),%RDX |
(765) 0x435171 MOV %R11,-0x40(%RBP) |
(765) 0x435175 MOV %R8,-0x38(%RBP) |
(765) 0x435179 MOV %R10,-0x30(%RBP) |
(765) 0x43517d CALL 4e17d0 <__intel_avx_rep_memset> |
(765) 0x435182 MOV -0x30(%RBP),%R10 |
(765) 0x435186 MOV -0x38(%RBP),%R8 |
(765) 0x43518a MOV -0x40(%RBP),%R11 |
(765) 0x43518e VCVTTSD2SI %XMM28,%EDX |
(765) 0x435194 VCVTTSD2SI %XMM29,%ESI |
(765) 0x43519a VCVTTSD2SI %XMM30,%ECX |
(765) 0x4351a0 VFMADD213SD %XMM19,%XMM23,%XMM26 |
(765) 0x4351a6 VSUBSD %XMM25,%XMM20,%XMM0 |
(765) 0x4351ac VFMADD213SD %XMM20,%XMM23,%XMM0 |
(765) 0x4351b2 VFMADD213SD %XMM19,%XMM23,%XMM0 |
(765) 0x4351b8 MOV 0x8(%R14),%RAX |
(765) 0x4351bc VMULSD %XMM24,%XMM31,%XMM1 |
(765) 0x4351c2 DEC %R11 |
(765) 0x4351c5 MOV %ECX,%EDI |
(765) 0x4351c7 SAR $0x1f,%EDI |
(765) 0x4351ca ANDN %ECX,%EDI,%ECX |
(765) 0x4351cf CMP %RCX,%R11 |
(765) 0x4351d2 CMOVGE %RCX,%R11 |
(765) 0x4351d6 MOV %R10,%RCX |
(765) 0x4351d9 IMUL %R11,%RCX |
(765) 0x4351dd DEC %R8 |
(765) 0x4351e0 MOV %ESI,%EDI |
(765) 0x4351e2 SAR $0x1f,%EDI |
(765) 0x4351e5 ANDN %ESI,%EDI,%ESI |
(765) 0x4351ea CMP %RSI,%R8 |
(765) 0x4351ed CMOVGE %RSI,%R8 |
(765) 0x4351f1 MOV -0x68(%RBP),%RDI |
(765) 0x4351f5 IMUL %RDI,%R8 |
(765) 0x4351f9 DEC %R12 |
(765) 0x4351fc MOV %EDX,%ESI |
(765) 0x4351fe SAR $0x1f,%ESI |
(765) 0x435201 ANDN %EDX,%ESI,%EDX |
(765) 0x435206 CMP %RDX,%R12 |
(765) 0x435209 CMOVGE %RDX,%R12 |
(765) 0x43520d MOV -0x60(%RBP),%RSI |
(765) 0x435211 IMUL %RSI,%R12 |
(765) 0x435215 ADD %R8,%R12 |
(765) 0x435218 MOV %R12,%R8 |
(765) 0x43521b LEA (%R10,%R10,2),%R12 |
(765) 0x43521f VMOVDDUP %XMM27,%XMM2 |
(765) 0x435225 VMOVDDUP %XMM26,%XMM3 |
(765) 0x43522b VMOVDDUP %XMM0,%XMM4 |
(765) 0x43522f VMOVDDUP %XMM1,%XMM5 |
(765) 0x435233 LEA (%R8,%RCX,1),%RDX |
(765) 0x435237 LEA (%RDX,%R15,1),%R9 |
(765) 0x43523b MOV %R9,-0x40(%RBP) |
(765) 0x43523f LEA (%R10,%RCX,1),%R9 |
(765) 0x435243 ADD %R8,%R9 |
(765) 0x435246 ADD %R15,%R9 |
(765) 0x435249 MOV %R9,-0x38(%RBP) |
(765) 0x43524d LEA (%RCX,%R10,2),%R9 |
(765) 0x435251 ADD %R8,%R9 |
(765) 0x435254 ADD %R15,%R9 |
(765) 0x435257 MOV %R9,-0x30(%RBP) |
(765) 0x43525b ADD %RCX,%R12 |
(765) 0x43525e ADD %R8,%R12 |
(765) 0x435261 ADD %R15,%R12 |
(765) 0x435264 LEA 0x3(%R11),%RCX |
(765) 0x435268 IMUL %R10,%RCX |
(765) 0x43526c ADD %R8,%RCX |
(765) 0x43526f LEA (%RAX,%RCX,8),%R9 |
(765) 0x435273 LEA (,%RSI,8),%RCX |
(765) 0x43527b MOV %RCX,-0x98(%RBP) |
(765) 0x435282 LEA 0x2(%R11),%RCX |
(765) 0x435286 IMUL %R10,%RCX |
(765) 0x43528a INC %R11 |
(765) 0x43528d IMUL %R10,%R11 |
(765) 0x435291 LEA (,%RDI,8),%R10 |
(765) 0x435299 ADD %R8,%RCX |
(765) 0x43529c LEA (%RAX,%RCX,8),%R14 |
(765) 0x4352a0 ADD %R8,%R11 |
(765) 0x4352a3 LEA (%RAX,%R11,8),%RCX |
(765) 0x4352a7 LEA (%RAX,%RDX,8),%R8 |
(765) 0x4352ab MOV %R9,%RDX |
(765) 0x4352ae MOV %RCX,%R9 |
(765) 0x4352b1 XOR %ECX,%ECX |
(765) 0x4352b3 JMP 435304 |
0x4352b5 NOPW %CS:(%RAX,%RAX,1) |
(766) 0x4352c0 MOV -0xa0(%RBP),%RDI |
(766) 0x4352c7 LEA 0x1(%RDI),%RCX |
(766) 0x4352cb MOV -0xc0(%RBP),%RDX |
(766) 0x4352d2 MOV -0x98(%RBP),%RSI |
(766) 0x4352d9 ADD %RSI,%RDX |
(766) 0x4352dc MOV -0xb8(%RBP),%R14 |
(766) 0x4352e3 ADD %RSI,%R14 |
(766) 0x4352e6 MOV -0xb0(%RBP),%R9 |
(766) 0x4352ed ADD %RSI,%R9 |
(766) 0x4352f0 MOV -0xa8(%RBP),%R8 |
(766) 0x4352f7 ADD %RSI,%R8 |
(766) 0x4352fa CMP $0x3,%RDI |
(766) 0x4352fe JE 434fb0 |
(766) 0x435304 VMOVSD -0x120(%RBP,%RCX,8),%XMM6 |
(766) 0x43530d MOV -0x60(%RBP),%R11 |
(766) 0x435311 MOV %RCX,-0xa0(%RBP) |
(766) 0x435318 IMUL %RCX,%R11 |
(766) 0x43531c MOV %R8,-0xa8(%RBP) |
(766) 0x435323 MOV %R9,-0xb0(%RBP) |
(766) 0x43532a MOV %R14,-0xb8(%RBP) |
(766) 0x435331 MOV %RDX,-0xc0(%RBP) |
(766) 0x435338 MOV %RDX,%RCX |
(766) 0x43533b XOR %EDX,%EDX |
(766) 0x43533d JMP 43535d |
0x43533f NOP |
(767) 0x435340 LEA 0x1(%RDX),%RSI |
(767) 0x435344 ADD %R10,%RCX |
(767) 0x435347 ADD %R10,%R14 |
(767) 0x43534a ADD %R10,%R9 |
(767) 0x43534d ADD %R10,%R8 |
(767) 0x435350 CMP $0x3,%RDX |
(767) 0x435354 MOV %RSI,%RDX |
(767) 0x435357 JE 4352c0 |
(767) 0x43535d TEST %EBX,%EBX |
(767) 0x43535f JE 435340 |
(767) 0x435361 VMULSD -0x100(%RBP,%RDX,8),%XMM6,%XMM7 |
(767) 0x43536a CMP $0x1,%EBX |
(767) 0x43536d JE 4353af |
(767) 0x43536f VMOVDDUP %XMM7,%XMM8 |
(767) 0x435373 MOV -0xc8(%RBP),%RSI |
(767) 0x43537a XOR %EDI,%EDI |
(767) 0x43537c NOPL (%RAX) |
(768) 0x435380 VMULPD (%R8,%RDI,1),%XMM3,%XMM9 |
(768) 0x435386 VFMADD231PD (%R9,%RDI,1),%XMM2,%XMM9 |
(768) 0x43538c VFMADD231PD (%R14,%RDI,1),%XMM4,%XMM9 |
(768) 0x435392 VFMADD231PD (%RCX,%RDI,1),%XMM5,%XMM9 |
(768) 0x435398 VFMADD213PD (%R13,%RDI,1),%XMM8,%XMM9 |
(768) 0x43539f VMOVUPD %XMM9,(%R13,%RDI,1) |
(768) 0x4353a6 ADD $0x10,%RDI |
(768) 0x4353aa DEC %RSI |
(768) 0x4353ad JNE 435380 |
(767) 0x4353af CMP -0xd0(%RBP),%R15 |
(767) 0x4353b6 JE 435340 |
(767) 0x4353b8 MOV -0x68(%RBP),%RSI |
(767) 0x4353bc IMUL %RDX,%RSI |
(767) 0x4353c0 ADD %R11,%RSI |
(767) 0x4353c3 MOV -0x40(%RBP),%RDI |
(767) 0x4353c7 ADD %RSI,%RDI |
(767) 0x4353ca VMULSD (%RAX,%RDI,8),%XMM26,%XMM8 |
(767) 0x4353d1 MOV -0x38(%RBP),%RDI |
(767) 0x4353d5 ADD %RSI,%RDI |
(767) 0x4353d8 VFMADD231SD (%RAX,%RDI,8),%XMM27,%XMM8 |
(767) 0x4353df MOV -0x30(%RBP),%RDI |
(767) 0x4353e3 ADD %RSI,%RDI |
(767) 0x4353e6 VFMADD231SD (%RAX,%RDI,8),%XMM0,%XMM8 |
(767) 0x4353ec ADD %R12,%RSI |
(767) 0x4353ef VFMADD231SD (%RAX,%RSI,8),%XMM1,%XMM8 |
(767) 0x4353f5 VFMADD213SD (%R13,%R15,8),%XMM7,%XMM8 |
(767) 0x4353fc VMOVSD %XMM8,(%R13,%R15,8) |
(767) 0x435403 JMP 435340 |
0x435408 MOV -0x78(%RBP),%RDI |
0x43540c CALL 481f60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x435411 MOV -0x48(%RBP),%R12 |
0x435415 MOV 0x30(%R12),%EAX |
0x43541a TEST %EAX,%EAX |
0x43541c MOV -0x70(%RBP),%R15 |
0x435420 JLE 435486 |
0x435422 XOR %EBX,%EBX |
0x435424 XOR %R14D,%R14D |
0x435427 JMP 43543c |
0x435429 NOPL (%RAX) |
(764) 0x435430 MOVSXD %EAX,%RCX |
(764) 0x435433 ADD $0x18,%RBX |
(764) 0x435437 CMP %RCX,%R14 |
(764) 0x43543a JGE 435486 |
(764) 0x43543c MOV %R14D,%ECX |
(764) 0x43543f IMUL %EAX,%ECX |
(764) 0x435442 INC %R14 |
(764) 0x435445 MOV 0x40(%R12),%EDX |
(764) 0x43544a IMUL %R14D,%EDX |
(764) 0x43544e MOV 0x8(%R12),%ESI |
(764) 0x435453 CMP %EDX,%ESI |
(764) 0x435455 CMOVL %ESI,%EDX |
(764) 0x435458 SUB %ECX,%EDX |
(764) 0x43545a JE 435430 |
(764) 0x43545c MOVSXD %EDX,%RDX |
(764) 0x43545f SAL $0x3,%RDX |
(764) 0x435463 MOV 0x310(%R12),%RAX |
(764) 0x43546b MOV (%RAX,%RBX,1),%RSI |
(764) 0x43546f MOVSXD %ECX,%RDI |
(764) 0x435472 SAL $0x3,%RDI |
(764) 0x435476 ADD 0x18(%R15),%RDI |
(764) 0x43547a CALL 403750 <memmove@plt> |
(764) 0x43547f MOV 0x30(%R12),%EAX |
(764) 0x435484 JMP 435430 |
0x435486 ADD $0xf8,%RSP |
0x43548d POP %RBX |
0x43548e POP %R12 |
0x435490 POP %R13 |
0x435492 POP %R14 |
0x435494 POP %R15 |
0x435496 POP %RBP |
0x435497 RET |
0x435498 MOV %RAX,%RDI |
0x43549b CALL 409f00 <__clang_call_terminate> |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 115 |
nb uops | 123 |
loop length | 574 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 13 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 20.50 cycles |
front end | 20.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.67 | 10.67 | 12.33 | 12.33 | 10.00 | 10.67 | 9.00 | 10.00 | 10.00 | 10.00 | 9.00 | 12.33 |
cycles | 10.67 | 10.67 | 12.33 | 12.33 | 10.00 | 10.67 | 9.00 | 10.00 | 10.00 | 10.00 | 9.00 | 12.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.52-20.49 |
Stall cycles | 0.00 |
Front-end | 20.50 |
Dispatch | 12.33 |
Overall L1 | 20.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 40% |
load | 31% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 26% |
load | 25% |
store | 9% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 11% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 17% |
load | 16% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 15% |
load | 15% |
store | 13% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 481d70 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RCX,%RCX,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %ECX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMOVE %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 435408 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x648> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xf0(%RCX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0xe0(%RCX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD (%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%RCX),%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%RCX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf8(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM2,%XMM2,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.50 |
VMULPD %XMM5,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHPD 0x100(%RCX),%XMM1,%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VFMADD213PD %XMM4,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%RCX),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xb14c0(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCMPPD $0x1,%XMM2,%XMM3,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM3,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFPCLASSPD $0x50,%XMM1,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM17 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %XMM17,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KMOVD %K1,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.25 |
SHR $0x1,%AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM17,%XMM17{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KMOVD %EAX,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM18,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM18,%XMM18{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x2f8(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RBX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x1,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RBX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x50(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x2,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xb13e4(%RIP),%XMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xae65a(%RIP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0xae650(%RIP),%XMM16 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0xb14ce(%RIP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xb14d4(%RIP),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 434fcc <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x20c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x78(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 481f60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0x70(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 435486 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6c6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43543c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x67c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 409f00 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 115 |
nb uops | 123 |
loop length | 574 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 13 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 20.50 cycles |
front end | 20.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.67 | 10.67 | 12.33 | 12.33 | 10.00 | 10.67 | 9.00 | 10.00 | 10.00 | 10.00 | 9.00 | 12.33 |
cycles | 10.67 | 10.67 | 12.33 | 12.33 | 10.00 | 10.67 | 9.00 | 10.00 | 10.00 | 10.00 | 9.00 | 12.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.52-20.49 |
Stall cycles | 0.00 |
Front-end | 20.50 |
Dispatch | 12.33 |
Overall L1 | 20.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 40% |
load | 31% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 26% |
load | 25% |
store | 9% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
all | 11% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 17% |
load | 16% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 15% |
load | 15% |
store | 13% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 481d70 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RCX,%RCX,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %ECX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMOVE %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 435408 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x648> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xf0(%RCX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0xe0(%RCX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD (%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%RCX),%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%RCX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf8(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM2,%XMM2,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.50 |
VMULPD %XMM5,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHPD 0x100(%RCX),%XMM1,%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VFMADD213PD %XMM4,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%RCX),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xb14c0(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCMPPD $0x1,%XMM2,%XMM3,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM3,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFPCLASSPD $0x50,%XMM1,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM17 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %XMM17,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KMOVD %K1,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.25 |
SHR $0x1,%AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM17,%XMM17{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KMOVD %EAX,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM18,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM18,%XMM18{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x2f8(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RBX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x1,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RBX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x50(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x2,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xb13e4(%RIP),%XMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xae65a(%RIP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0xae650(%RIP),%XMM16 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0xb14ce(%RIP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xb14d4(%RIP),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 434fcc <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x20c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x78(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 481f60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0x70(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 435486 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6c6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43543c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x67c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 409f00 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::einspline_spo_ref | 26.95 | 21.59 |
▼Loop 765 - MultiBsplineRef.hpp:42-71 - exec– | 0.01 | 0.01 |
▼Loop 766 - MultiBsplineRef.hpp:63-71 - exec– | 0.01 | 0 |
▼Loop 767 - MultiBsplineRef.hpp:64-71 - exec– | 0 | 0 |
○Loop 768 - MultiBsplineRef.hpp:68-70 - exec | 26.89 | 21.16 |
○Loop 764 - einspline_spo_ref.hpp:183-187 - exec | 0 | 0.01 |