Function: qmcplusplus::BsplineFunctor<double>::evaluateV(int, int, int, double const*, double*) cons ... | Module: exec | Source: BsplineFunctor.h:223-263 [...] | Coverage: 0.07% |
---|
Function: qmcplusplus::BsplineFunctor<double>::evaluateV(int, int, int, double const*, double*) cons ... | Module: exec | Source: BsplineFunctor.h:223-263 [...] | Coverage: 0.07% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 223 - 263 |
-------------------------------------------------------------------------------- |
223: inline T BsplineFunctor<T>::evaluateV(const int iat, |
[...] |
232: int iCount = 0; |
233: const int iLimit = iEnd - iStart; |
234: |
235: #pragma vector always |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
261: } |
262: return d; |
263: } |
/usr/include/c++/13.1.1/bits/stl_vector.h: 1142 - 1145 |
-------------------------------------------------------------------------------- |
1142: operator[](size_type __n) const _GLIBCXX_NOEXCEPT |
1143: { |
1144: __glibcxx_requires_subscript(__n); |
1145: return *(this->_M_impl._M_start + __n); |
0x44e180 MOVSXD %EDX,%RAX |
0x44e183 MOV %ECX,%R11D |
0x44e186 MOV %RAX,%R10 |
0x44e189 CMP %EAX,%ECX |
0x44e18b JLE 44e6e0 |
0x44e191 LEA (%R8,%RAX,8),%RAX |
0x44e195 MOV %R11D,%R8D |
0x44e198 VMOVSD 0x8(%RDI),%XMM0 |
0x44e19d MOV %R10D,%EDX |
0x44e1a0 SUB %R10D,%R8D |
0x44e1a3 XOR %ECX,%ECX |
0x44e1a5 AND $0x7,%R8D |
0x44e1a9 JE 44e2ad |
0x44e1af CMP $0x1,%R8D |
0x44e1b3 JE 44e285 |
0x44e1b9 CMP $0x2,%R8D |
0x44e1bd JE 44e266 |
0x44e1c3 CMP $0x3,%R8D |
0x44e1c7 JE 44e252 |
0x44e1cd CMP $0x4,%R8D |
0x44e1d1 JE 44e233 |
0x44e1d3 CMP $0x5,%R8D |
0x44e1d7 JE 44e21f |
0x44e1d9 CMP $0x6,%R8D |
0x44e1dd JE 44e200 |
0x44e1df VMOVSD (%RAX),%XMM1 |
0x44e1e3 VCOMISD %XMM1,%XMM0 |
0x44e1e7 JBE 44e1f8 |
0x44e1e9 CMP %R10D,%ESI |
0x44e1ec JE 44e1f8 |
0x44e1ee VMOVSD %XMM1,(%R9) |
0x44e1f3 MOV $0x1,%ECX |
0x44e1f8 ADD $0x8,%RAX |
0x44e1fc LEA 0x1(%R10),%EDX |
0x44e200 VMOVSD (%RAX),%XMM2 |
0x44e204 VCOMISD %XMM2,%XMM0 |
0x44e208 JBE 44e219 |
0x44e20a CMP %EDX,%ESI |
0x44e20c JE 44e219 |
0x44e20e MOVSXD %ECX,%R10 |
0x44e211 INC %ECX |
0x44e213 VMOVSD %XMM2,(%R9,%R10,8) |
0x44e219 ADD $0x8,%RAX |
0x44e21d INC %EDX |
0x44e21f VMOVSD (%RAX),%XMM3 |
0x44e223 VCOMISD %XMM3,%XMM0 |
0x44e227 JA 44e6c0 |
0x44e22d ADD $0x8,%RAX |
0x44e231 INC %EDX |
0x44e233 VMOVSD (%RAX),%XMM4 |
0x44e237 VCOMISD %XMM4,%XMM0 |
0x44e23b JBE 44e24c |
0x44e23d CMP %EDX,%ESI |
0x44e23f JE 44e24c |
0x44e241 MOVSXD %ECX,%R10 |
0x44e244 INC %ECX |
0x44e246 VMOVSD %XMM4,(%R9,%R10,8) |
0x44e24c ADD $0x8,%RAX |
0x44e250 INC %EDX |
0x44e252 VMOVSD (%RAX),%XMM5 |
0x44e256 VCOMISD %XMM5,%XMM0 |
0x44e25a JA 44e6a0 |
0x44e260 ADD $0x8,%RAX |
0x44e264 INC %EDX |
0x44e266 VMOVSD (%RAX),%XMM6 |
0x44e26a VCOMISD %XMM6,%XMM0 |
0x44e26e JBE 44e27f |
0x44e270 CMP %EDX,%ESI |
0x44e272 JE 44e27f |
0x44e274 MOVSXD %ECX,%R10 |
0x44e277 INC %ECX |
0x44e279 VMOVSD %XMM6,(%R9,%R10,8) |
0x44e27f ADD $0x8,%RAX |
0x44e283 INC %EDX |
0x44e285 VMOVSD (%RAX),%XMM7 |
0x44e289 VCOMISD %XMM7,%XMM0 |
0x44e28d JBE 44e29e |
0x44e28f CMP %EDX,%ESI |
0x44e291 JE 44e29e |
0x44e293 MOVSXD %ECX,%R8 |
0x44e296 INC %ECX |
0x44e298 VMOVSD %XMM7,(%R9,%R8,8) |
0x44e29e INC %EDX |
0x44e2a0 ADD $0x8,%RAX |
0x44e2a4 CMP %R11D,%EDX |
0x44e2a7 JE 44e3be |
(644) 0x44e2ad VMOVSD (%RAX),%XMM8 |
(644) 0x44e2b1 VCOMISD %XMM8,%XMM0 |
(644) 0x44e2b6 JBE 44e2c7 |
(644) 0x44e2b8 CMP %EDX,%ESI |
(644) 0x44e2ba JE 44e2c7 |
(644) 0x44e2bc MOVSXD %ECX,%R10 |
(644) 0x44e2bf INC %ECX |
(644) 0x44e2c1 VMOVSD %XMM8,(%R9,%R10,8) |
(644) 0x44e2c7 VMOVSD 0x8(%RAX),%XMM9 |
(644) 0x44e2cc INC %EDX |
(644) 0x44e2ce LEA 0x8(%RAX),%R8 |
(644) 0x44e2d2 VCOMISD %XMM9,%XMM0 |
(644) 0x44e2d7 JBE 44e2e8 |
(644) 0x44e2d9 CMP %EDX,%ESI |
(644) 0x44e2db JE 44e2e8 |
(644) 0x44e2dd MOVSXD %ECX,%RAX |
(644) 0x44e2e0 INC %ECX |
(644) 0x44e2e2 VMOVSD %XMM9,(%R9,%RAX,8) |
(644) 0x44e2e8 VMOVSD 0x8(%R8),%XMM10 |
(644) 0x44e2ee LEA 0x1(%RDX),%R10D |
(644) 0x44e2f2 VCOMISD %XMM10,%XMM0 |
(644) 0x44e2f7 JBE 44e309 |
(644) 0x44e2f9 CMP %R10D,%ESI |
(644) 0x44e2fc JE 44e309 |
(644) 0x44e2fe MOVSXD %ECX,%RAX |
(644) 0x44e301 INC %ECX |
(644) 0x44e303 VMOVSD %XMM10,(%R9,%RAX,8) |
(644) 0x44e309 VMOVSD 0x10(%R8),%XMM11 |
(644) 0x44e30f LEA 0x2(%RDX),%R10D |
(644) 0x44e313 VCOMISD %XMM11,%XMM0 |
(644) 0x44e318 JBE 44e32a |
(644) 0x44e31a CMP %R10D,%ESI |
(644) 0x44e31d JE 44e32a |
(644) 0x44e31f MOVSXD %ECX,%RAX |
(644) 0x44e322 INC %ECX |
(644) 0x44e324 VMOVSD %XMM11,(%R9,%RAX,8) |
(644) 0x44e32a VMOVSD 0x18(%R8),%XMM12 |
(644) 0x44e330 LEA 0x3(%RDX),%R10D |
(644) 0x44e334 VCOMISD %XMM12,%XMM0 |
(644) 0x44e339 JBE 44e34b |
(644) 0x44e33b CMP %R10D,%ESI |
(644) 0x44e33e JE 44e34b |
(644) 0x44e340 MOVSXD %ECX,%RAX |
(644) 0x44e343 INC %ECX |
(644) 0x44e345 VMOVSD %XMM12,(%R9,%RAX,8) |
(644) 0x44e34b VMOVSD 0x20(%R8),%XMM13 |
(644) 0x44e351 LEA 0x4(%RDX),%R10D |
(644) 0x44e355 VCOMISD %XMM13,%XMM0 |
(644) 0x44e35a JBE 44e36c |
(644) 0x44e35c CMP %R10D,%ESI |
(644) 0x44e35f JE 44e36c |
(644) 0x44e361 MOVSXD %ECX,%RAX |
(644) 0x44e364 INC %ECX |
(644) 0x44e366 VMOVSD %XMM13,(%R9,%RAX,8) |
(644) 0x44e36c VMOVSD 0x28(%R8),%XMM14 |
(644) 0x44e372 LEA 0x5(%RDX),%R10D |
(644) 0x44e376 VCOMISD %XMM14,%XMM0 |
(644) 0x44e37b JBE 44e38d |
(644) 0x44e37d CMP %R10D,%ESI |
(644) 0x44e380 JE 44e38d |
(644) 0x44e382 MOVSXD %ECX,%RAX |
(644) 0x44e385 INC %ECX |
(644) 0x44e387 VMOVSD %XMM14,(%R9,%RAX,8) |
(644) 0x44e38d VMOVSD 0x30(%R8),%XMM15 |
(644) 0x44e393 LEA 0x6(%RDX),%R10D |
(644) 0x44e397 VCOMISD %XMM15,%XMM0 |
(644) 0x44e39c JBE 44e3ae |
(644) 0x44e39e CMP %R10D,%ESI |
(644) 0x44e3a1 JE 44e3ae |
(644) 0x44e3a3 MOVSXD %ECX,%RAX |
(644) 0x44e3a6 INC %ECX |
(644) 0x44e3a8 VMOVSD %XMM15,(%R9,%RAX,8) |
(644) 0x44e3ae ADD $0x7,%EDX |
(644) 0x44e3b1 LEA 0x38(%R8),%RAX |
(644) 0x44e3b5 CMP %R11D,%EDX |
(644) 0x44e3b8 JNE 44e2ad |
0x44e3be TEST %ECX,%ECX |
0x44e3c0 JLE 44e6e0 |
0x44e3c6 VMOVSD 0x238(%RDI),%XMM21 |
0x44e3cd MOV 0x218(%RDI),%RSI |
0x44e3d4 VXORPD %XMM0,%XMM0,%XMM0 |
0x44e3d8 VMOVSD 0x18(%RDI),%XMM17 |
0x44e3df VMOVSD 0x20(%RDI),%XMM18 |
0x44e3e6 VMOVSD 0x28(%RDI),%XMM16 |
0x44e3ed VMOVSD 0x30(%RDI),%XMM15 |
0x44e3f2 VMOVSD 0x38(%RDI),%XMM13 |
0x44e3f7 VMOVSD 0x40(%RDI),%XMM14 |
0x44e3fc VMOVSD 0x48(%RDI),%XMM12 |
0x44e401 VMOVSD 0x50(%RDI),%XMM11 |
0x44e406 VMOVSD 0x58(%RDI),%XMM9 |
0x44e40b VMOVSD 0x60(%RDI),%XMM10 |
0x44e410 VMOVSD 0x68(%RDI),%XMM8 |
0x44e415 VMOVSD 0x70(%RDI),%XMM7 |
0x44e41a VMOVSD 0x78(%RDI),%XMM5 |
0x44e41f VMOVSD 0x80(%RDI),%XMM6 |
0x44e427 VMOVSD 0x88(%RDI),%XMM4 |
0x44e42f VMOVSD 0x90(%RDI),%XMM3 |
0x44e437 MOVSXD %ECX,%RDI |
0x44e43a LEA (%R9,%RDI,8),%R11 |
0x44e43e AND $0x1,%EDI |
0x44e441 JE 44e510 |
0x44e447 VMULSD (%R9),%XMM21,%XMM0 |
0x44e44d VMOVSD %XMM16,%XMM16,%XMM22 |
0x44e453 VMOVSD %XMM12,%XMM12,%XMM20 |
0x44e459 ADD $0x8,%R9 |
0x44e45d VMOVSD %XMM8,%XMM8,%XMM23 |
0x44e463 VRNDSCALESD $0xb,%XMM0,%XMM0,%XMM1 |
0x44e46a VCVTTSD2SI %XMM0,%ECX |
0x44e46e VSUBSD %XMM1,%XMM0,%XMM1 |
0x44e472 VMULSD %XMM1,%XMM1,%XMM19 |
0x44e478 VFMADD132SD %XMM1,%XMM15,%XMM22 |
0x44e47e VFMADD132SD %XMM1,%XMM11,%XMM20 |
0x44e484 VFMADD132SD %XMM1,%XMM7,%XMM23 |
0x44e48a MOVSXD %ECX,%RDX |
0x44e48d VMULSD %XMM1,%XMM19,%XMM2 |
0x44e493 VFMADD132SD %XMM4,%XMM3,%XMM1 |
0x44e498 VMULSD %XMM18,%XMM19,%XMM0 |
0x44e49e VMULSD %XMM10,%XMM19,%XMM25 |
0x44e4a4 VMULSD %XMM6,%XMM19,%XMM28 |
0x44e4aa VFMADD231SD %XMM2,%XMM17,%XMM0 |
0x44e4b0 VFMADD231SD %XMM2,%XMM9,%XMM25 |
0x44e4b6 VADDSD %XMM0,%XMM22,%XMM24 |
0x44e4bc VMULSD %XMM14,%XMM19,%XMM0 |
0x44e4c2 VADDSD %XMM23,%XMM25,%XMM26 |
0x44e4c8 VMULSD 0x10(%RSI,%RDX,8),%XMM26,%XMM27 |
0x44e4d0 VFMADD231SD %XMM2,%XMM13,%XMM0 |
0x44e4d5 VFMADD132SD %XMM5,%XMM28,%XMM2 |
0x44e4db VADDSD %XMM1,%XMM2,%XMM1 |
0x44e4df VADDSD %XMM20,%XMM0,%XMM0 |
0x44e4e5 VMULSD 0x18(%RSI,%RDX,8),%XMM1,%XMM2 |
0x44e4eb VFMADD132SD 0x8(%RSI,%RDX,8),%XMM27,%XMM0 |
0x44e4f3 VFMADD231SD (%RSI,%RDX,8),%XMM24,%XMM2 |
0x44e4fa VADDSD %XMM2,%XMM0,%XMM0 |
0x44e4fe CMP %R9,%R11 |
0x44e501 JE 44e6e5 |
0x44e507 NOPW (%RAX,%RAX,1) |
(645) 0x44e510 VMULSD (%R9),%XMM21,%XMM2 |
(645) 0x44e516 VMOVSD %XMM16,%XMM16,%XMM19 |
(645) 0x44e51c VMOVSD %XMM12,%XMM12,%XMM22 |
(645) 0x44e522 ADD $0x10,%R9 |
(645) 0x44e526 VMOVSD %XMM8,%XMM8,%XMM24 |
(645) 0x44e52c VRNDSCALESD $0xb,%XMM2,%XMM2,%XMM29 |
(645) 0x44e533 VCVTTSD2SI %XMM2,%R8D |
(645) 0x44e537 VSUBSD %XMM29,%XMM2,%XMM30 |
(645) 0x44e53d VMULSD %XMM30,%XMM30,%XMM31 |
(645) 0x44e543 VFMADD132SD %XMM30,%XMM15,%XMM19 |
(645) 0x44e549 VFMADD132SD %XMM30,%XMM11,%XMM22 |
(645) 0x44e54f VFMADD132SD %XMM30,%XMM7,%XMM24 |
(645) 0x44e555 MOVSXD %R8D,%R10 |
(645) 0x44e558 VMULSD %XMM30,%XMM31,%XMM1 |
(645) 0x44e55e VFMADD132SD %XMM4,%XMM3,%XMM30 |
(645) 0x44e564 VMULSD %XMM18,%XMM31,%XMM2 |
(645) 0x44e56a VMULSD %XMM10,%XMM31,%XMM20 |
(645) 0x44e570 VMULSD %XMM6,%XMM31,%XMM27 |
(645) 0x44e576 VFMADD231SD %XMM1,%XMM17,%XMM2 |
(645) 0x44e57c VFMADD231SD %XMM1,%XMM9,%XMM20 |
(645) 0x44e582 VADDSD %XMM2,%XMM19,%XMM25 |
(645) 0x44e588 VMULSD %XMM14,%XMM31,%XMM2 |
(645) 0x44e58e VMOVSD %XMM12,%XMM12,%XMM19 |
(645) 0x44e594 VADDSD %XMM24,%XMM20,%XMM23 |
(645) 0x44e59a VMULSD 0x10(%RSI,%R10,8),%XMM23,%XMM26 |
(645) 0x44e5a2 VFMADD231SD %XMM1,%XMM13,%XMM2 |
(645) 0x44e5a7 VFMADD132SD %XMM5,%XMM27,%XMM1 |
(645) 0x44e5ad VADDSD %XMM30,%XMM1,%XMM1 |
(645) 0x44e5b3 VADDSD %XMM22,%XMM2,%XMM2 |
(645) 0x44e5b9 VMOVSD %XMM16,%XMM16,%XMM30 |
(645) 0x44e5bf VMOVSD %XMM8,%XMM8,%XMM22 |
(645) 0x44e5c5 VMULSD 0x18(%RSI,%R10,8),%XMM1,%XMM1 |
(645) 0x44e5cc VFMADD132SD 0x8(%RSI,%R10,8),%XMM26,%XMM2 |
(645) 0x44e5d4 VFMADD231SD (%RSI,%R10,8),%XMM25,%XMM1 |
(645) 0x44e5db VADDSD %XMM1,%XMM2,%XMM2 |
(645) 0x44e5df VADDSD %XMM2,%XMM0,%XMM28 |
(645) 0x44e5e5 VMULSD -0x8(%R9),%XMM21,%XMM0 |
(645) 0x44e5ec VRNDSCALESD $0xb,%XMM0,%XMM0,%XMM1 |
(645) 0x44e5f3 VCVTTSD2SI %XMM0,%EAX |
(645) 0x44e5f7 VSUBSD %XMM1,%XMM0,%XMM2 |
(645) 0x44e5fb VMULSD %XMM2,%XMM2,%XMM29 |
(645) 0x44e601 VFMADD132SD %XMM2,%XMM15,%XMM30 |
(645) 0x44e607 VFMADD132SD %XMM2,%XMM11,%XMM19 |
(645) 0x44e60d VFMADD132SD %XMM2,%XMM7,%XMM22 |
(645) 0x44e613 MOVSXD %EAX,%RDI |
(645) 0x44e616 VMULSD %XMM2,%XMM29,%XMM1 |
(645) 0x44e61c VFMADD132SD %XMM4,%XMM3,%XMM2 |
(645) 0x44e621 VMULSD %XMM18,%XMM29,%XMM0 |
(645) 0x44e627 VMULSD %XMM10,%XMM29,%XMM25 |
(645) 0x44e62d VMULSD %XMM6,%XMM29,%XMM23 |
(645) 0x44e633 VFMADD231SD %XMM1,%XMM17,%XMM0 |
(645) 0x44e639 VFMADD231SD %XMM1,%XMM9,%XMM25 |
(645) 0x44e63f VADDSD %XMM0,%XMM30,%XMM31 |
(645) 0x44e645 VMULSD %XMM14,%XMM29,%XMM0 |
(645) 0x44e64b VADDSD %XMM22,%XMM25,%XMM20 |
(645) 0x44e651 VMULSD 0x10(%RSI,%RDI,8),%XMM20,%XMM24 |
(645) 0x44e659 VFMADD231SD %XMM1,%XMM13,%XMM0 |
(645) 0x44e65e VFMADD132SD %XMM5,%XMM23,%XMM1 |
(645) 0x44e664 VADDSD %XMM2,%XMM1,%XMM2 |
(645) 0x44e668 VADDSD %XMM19,%XMM0,%XMM0 |
(645) 0x44e66e VMULSD 0x18(%RSI,%RDI,8),%XMM2,%XMM1 |
(645) 0x44e674 VFMADD132SD 0x8(%RSI,%RDI,8),%XMM24,%XMM0 |
(645) 0x44e67c VFMADD231SD (%RSI,%RDI,8),%XMM31,%XMM1 |
(645) 0x44e683 VADDSD %XMM1,%XMM0,%XMM0 |
(645) 0x44e687 VADDSD %XMM0,%XMM28,%XMM0 |
(645) 0x44e68d CMP %R9,%R11 |
(645) 0x44e690 JNE 44e510 |
0x44e696 RET |
0x44e697 NOPW (%RAX,%RAX,1) |
0x44e6a0 CMP %EDX,%ESI |
0x44e6a2 JE 44e260 |
0x44e6a8 MOVSXD %ECX,%R8 |
0x44e6ab INC %ECX |
0x44e6ad VMOVSD %XMM5,(%R9,%R8,8) |
0x44e6b3 JMP 44e260 |
0x44e6b8 NOPL (%RAX,%RAX,1) |
0x44e6c0 CMP %EDX,%ESI |
0x44e6c2 JE 44e22d |
0x44e6c8 MOVSXD %ECX,%R8 |
0x44e6cb INC %ECX |
0x44e6cd VMOVSD %XMM3,(%R9,%R8,8) |
0x44e6d3 JMP 44e22d |
0x44e6d8 NOPL (%RAX,%RAX,1) |
0x44e6e0 VXORPD %XMM0,%XMM0,%XMM0 |
0x44e6e4 RET |
0x44e6e5 RET |
0x44e6e6 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►64.29+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:219 | exec |
○ | main._omp_fn.1 | stl_vector.h:1126 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►21.43+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:219 | exec |
○ | main._omp_fn.1 | miniqmc.cpp:486 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►14.29+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:219 | exec |
○ | main._omp_fn.1 | stl_vector.h:1123 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | BsplineFunctor.h:223-263 |
Module | exec |
nb instructions | 166 |
nb uops | 168 |
loop length | 729 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 29 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 0.67 |
micro-operation queue | 43.00 cycles |
front end | 43.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 30.75 | 30.75 | 15.50 | 15.50 | 7.00 | 30.75 | 30.75 | 10.00 |
cycles | 30.75 | 30.75 | 15.50 | 15.50 | 7.00 | 30.75 | 30.75 | 10.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 47.51 |
Stall cycles | 3.96 |
ROB full (events) | 2.52-2.53 |
RS full (events) | 4.23 |
Front-end | 43.00 |
Dispatch | 30.75 |
Overall L1 | 43.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
all | 6% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 11% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD %EDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %ECX,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %EAX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 44e6e0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (%R8,%RAX,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD 0x8(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R10D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R10D,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $0x7,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e2ad | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e285 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e266 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e252 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e233 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e21f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e200 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD (%RAX),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e1f8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %R10D,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e1f8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD %XMM1,(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV $0x1,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%R10),%EDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RAX),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM2,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e219 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e219 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM2,(%R9,%R10,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM3,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JA 44e6c0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM4,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e24c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e24c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM4,(%R9,%R10,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM5,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JA 44e6a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM6,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e27f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e27f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM6,(%R9,%R10,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM7,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e29e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e29e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM7,(%R9,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R11D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e3be | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 44e6e0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0x238(%RDI),%XMM21 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x218(%RDI),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD 0x18(%RDI),%XMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x20(%RDI),%XMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x28(%RDI),%XMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x30(%RDI),%XMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x38(%RDI),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x40(%RDI),%XMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x48(%RDI),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x50(%RDI),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x58(%RDI),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x60(%RDI),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x68(%RDI),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x70(%RDI),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x78(%RDI),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x80(%RDI),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x88(%RDI),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x90(%RDI),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %ECX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R9,%RDI,8),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $0x1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e510 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMULSD (%R9),%XMM21,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM16,%XMM16,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVSD %XMM12,%XMM12,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM8,%XMM8,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VRNDSCALESD $0xb,%XMM0,%XMM0,%XMM1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTSD2SI %XMM0,%ECX | 2 | 1.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 1 |
VSUBSD %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM1,%XMM1,%XMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM15,%XMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM7,%XMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %ECX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM1,%XMM19,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM18,%XMM19,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM10,%XMM19,%XMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM6,%XMM19,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM2,%XMM17,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM2,%XMM9,%XMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM0,%XMM22,%XMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM14,%XMM19,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM23,%XMM25,%XMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD 0x10(%RSI,%RDX,8),%XMM26,%XMM27 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM2,%XMM13,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM5,%XMM28,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM1,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM20,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD 0x18(%RSI,%RDX,8),%XMM1,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD 0x8(%RSI,%RDX,8),%XMM27,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD (%RSI,%RDX,8),%XMM24,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM2,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e6e5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e260 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM5,(%R9,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 44e260 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e22d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM3,(%R9,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 44e22d | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | BsplineFunctor.h:223-263 |
Module | exec |
nb instructions | 166 |
nb uops | 168 |
loop length | 729 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 29 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 0.67 |
micro-operation queue | 43.00 cycles |
front end | 43.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 30.75 | 30.75 | 15.50 | 15.50 | 7.00 | 30.75 | 30.75 | 10.00 |
cycles | 30.75 | 30.75 | 15.50 | 15.50 | 7.00 | 30.75 | 30.75 | 10.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 47.51 |
Stall cycles | 3.96 |
ROB full (events) | 2.52-2.53 |
RS full (events) | 4.23 |
Front-end | 43.00 |
Dispatch | 30.75 |
Overall L1 | 43.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
all | 6% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 11% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD %EDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %ECX,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %EAX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 44e6e0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (%R8,%RAX,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD 0x8(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R10D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R10D,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $0x7,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e2ad | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e285 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e266 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e252 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e233 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e21f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e200 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD (%RAX),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e1f8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %R10D,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e1f8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD %XMM1,(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV $0x1,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%R10),%EDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RAX),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM2,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e219 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e219 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM2,(%R9,%R10,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM3,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JA 44e6c0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM4,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e24c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e24c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM4,(%R9,%R10,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM5,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JA 44e6a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM6,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e27f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e27f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM6,(%R9,%R10,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RAX),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VCOMISD %XMM7,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 44e29e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e29e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM7,(%R9,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R11D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e3be | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 44e6e0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0x238(%RDI),%XMM21 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x218(%RDI),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD 0x18(%RDI),%XMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x20(%RDI),%XMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x28(%RDI),%XMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x30(%RDI),%XMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x38(%RDI),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x40(%RDI),%XMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x48(%RDI),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x50(%RDI),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x58(%RDI),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x60(%RDI),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x68(%RDI),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x70(%RDI),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x78(%RDI),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x80(%RDI),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x88(%RDI),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x90(%RDI),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %ECX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R9,%RDI,8),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $0x1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e510 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMULSD (%R9),%XMM21,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM16,%XMM16,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVSD %XMM12,%XMM12,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM8,%XMM8,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VRNDSCALESD $0xb,%XMM0,%XMM0,%XMM1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTSD2SI %XMM0,%ECX | 2 | 1.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 1 |
VSUBSD %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM1,%XMM1,%XMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM15,%XMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM7,%XMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %ECX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM1,%XMM19,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM18,%XMM19,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM10,%XMM19,%XMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM6,%XMM19,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM2,%XMM17,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM2,%XMM9,%XMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM0,%XMM22,%XMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM14,%XMM19,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM23,%XMM25,%XMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD 0x10(%RSI,%RDX,8),%XMM26,%XMM27 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM2,%XMM13,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM5,%XMM28,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM1,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM20,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD 0x18(%RSI,%RDX,8),%XMM1,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD 0x8(%RSI,%RDX,8),%XMM27,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD (%RSI,%RDX,8),%XMM24,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM2,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e6e5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e260 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM5,(%R9,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 44e260 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 44e22d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %ECX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM3,(%R9,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 44e22d | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::BsplineFunctor | 0.07 | 0.07 |
○Loop 644 - BsplineFunctor.h:236-241 - exec | 0.06 | 0.05 |
○Loop 645 - BsplineFunctor.h:246-260 - exec | 0.02 | 0.01 |