Function: qmcplusplus::BsplineFunctor<double>::evaluateVGL(int, int, int, double const*, double*, do ... | Module: exec | Source: BsplineFunctor.h:275-339 [...] | Coverage: 0.75% |
---|
Function: qmcplusplus::BsplineFunctor<double>::evaluateVGL(int, int, int, double const*, double*, do ... | Module: exec | Source: BsplineFunctor.h:275-339 [...] | Coverage: 0.75% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 275 - 339 |
-------------------------------------------------------------------------------- |
275: { |
276: real_type dSquareDeltaRinv = DeltaRInv * DeltaRInv; |
[...] |
284: int iLimit = iEnd - iStart; |
285: const real_type* distArray = _distArray + iStart; |
[...] |
291: for (int jat = 0; jat < iLimit; jat++) |
292: { |
293: real_type r = distArray[jat]; |
294: if (r < cutoff_radius && iStart + jat != iat) |
295: { |
296: distIndices[iCount] = jat; |
297: distArrayCompressed[iCount] = r; |
298: iCount++; |
299: } |
300: } |
301: |
302: #pragma omp simd |
303: for (int j = 0; j < iCount; j++) |
304: { |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
337: // clang-format on |
338: } |
339: } |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_vector.h: 1145 - 1145 |
-------------------------------------------------------------------------------- |
1145: return *(this->_M_impl._M_start + __n); |
0x41d9b0 PUSH %RBP |
0x41d9b1 MOV %RSP,%RBP |
0x41d9b4 PUSH %R15 |
0x41d9b6 PUSH %R14 |
0x41d9b8 PUSH %R13 |
0x41d9ba PUSH %R12 |
0x41d9bc PUSH %RBX |
0x41d9bd AND $-0x20,%RSP |
0x41d9c1 SUB $0x4c0,%RSP |
0x41d9c8 SUB %EDX,%ECX |
0x41d9ca TEST %ECX,%ECX |
0x41d9cc JLE 41e76a |
0x41d9d2 MOV 0x28(%RBP),%R12 |
0x41d9d6 MOV 0x20(%RBP),%R14 |
0x41d9da VMOVSD 0x238(%RDI),%XMM23 |
0x41d9e1 MOVSXD %EDX,%R11 |
0x41d9e4 VMOVSD 0x8(%RDI),%XMM17 |
0x41d9eb MOV %ECX,%R15D |
0x41d9ee MOV $-0x10,%R10D |
0x41d9f4 AND %R15,%R10 |
0x41d9f7 JE 41e2ca |
0x41d9fd VPBROADCASTD %ESI,%YMM2 |
0x41da03 VBROADCASTSD %XMM17,%YMM3 |
0x41da09 VMOVQ %R12,%XMM4 |
0x41da0e VMOVQ %R14,%XMM5 |
0x41da13 MOV %EDX,%R12D |
0x41da16 MOV %R11,0x40(%RSP) |
0x41da1b MOV %R8,0x20(%RSP) |
0x41da20 LEA (%R8,%R11,8),%R13 |
0x41da24 VPXOR %XMM6,%XMM6,%XMM6 |
0x41da28 XOR %EBX,%EBX |
0x41da2a VMOVDQU 0x7a84e(%RIP),%YMM7 |
0x41da32 VMOVDQU 0x7a866(%RIP),%YMM8 |
0x41da3a VPXOR %XMM11,%XMM11,%XMM11 |
0x41da3f VPXOR %XMM10,%XMM10,%XMM10 |
0x41da44 NOPW %CS:(%RAX,%RAX,1) |
(299) 0x41da50 VMOVUPD (%R13,%RBX,8),%YMM9 |
(299) 0x41da57 VMOVUPD 0x20(%R13,%RBX,8),%YMM12 |
(299) 0x41da5e VMOVUPD 0x40(%R13,%RBX,8),%YMM13 |
(299) 0x41da65 VMOVUPD 0x60(%R13,%RBX,8),%YMM14 |
(299) 0x41da6c LEA (%R12,%RBX,1),%EAX |
(299) 0x41da70 VPBROADCASTD %EAX,%YMM15 |
(299) 0x41da76 VPADDD %YMM7,%YMM15,%YMM16 |
(299) 0x41da7c VPADDD %YMM8,%YMM15,%YMM15 |
(299) 0x41da81 VPCMPNEQD %YMM2,%YMM15,%K5 |
(299) 0x41da88 VPCMPNEQD %YMM2,%YMM16,%K0 |
(299) 0x41da8f VCMPPD $0x1,%YMM3,%YMM13,%K7 |
(299) 0x41da96 VCMPPD $0x1,%YMM3,%YMM14,%K6 |
(299) 0x41da9d KSHIFTLB $0x4,%K6,%K1 |
(299) 0x41daa3 KORB %K1,%K7,%K4 |
(299) 0x41daa7 VCMPPD $0x1,%YMM3,%YMM9,%K3 |
(299) 0x41daae VCMPPD $0x1,%YMM3,%YMM12,%K1 |
(299) 0x41dab5 KSHIFTLB $0x4,%K1,%K2 |
(299) 0x41dabb KORB %K2,%K3,%K2 |
(299) 0x41dabf VPMOVSXDQ %XMM10,%XMM0 |
(299) 0x41dac4 VPSLLQ $0x2,%XMM0,%XMM1 |
(299) 0x41dac9 VPADDQ %XMM1,%XMM4,%XMM1 |
(299) 0x41dacd VMOVQ %XMM1,%RAX |
(299) 0x41dad2 VPBROADCASTD %EBX,%YMM1 |
(299) 0x41dad8 VPADDD %YMM7,%YMM1,%YMM15 |
(299) 0x41dadc VPADDD %YMM1,%YMM8,%YMM1 |
(299) 0x41dae0 KANDB %K0,%K2,%K2 |
(299) 0x41dae4 KMOVB %K2,%R11D |
(299) 0x41dae8 XOR %ECX,%ECX |
(299) 0x41daea POPCNT %R11D,%ECX |
(299) 0x41daef KANDB %K5,%K4,%K4 |
(299) 0x41daf3 VPCOMPRESSD %YMM1,(%RAX,%RCX,4){%K4} |
(299) 0x41dafa VPCOMPRESSD %YMM15,(%RAX){%K2} |
(299) 0x41db00 VPSLLQ $0x3,%XMM0,%XMM0 |
(299) 0x41db05 VPADDQ %XMM0,%XMM5,%XMM0 |
(299) 0x41db09 VMOVQ %XMM0,%RAX |
(299) 0x41db0e KANDW %K5,%K7,%K7 |
(299) 0x41db12 VCOMPRESSPD %YMM13,(%RAX,%RCX,8){%K7} |
(299) 0x41db19 KMOVB %K7,0x1f(%RSP) |
(299) 0x41db1f KSHIFTRB $0x4,%K5,%K5 |
(299) 0x41db25 KANDW %K5,%K6,%K5 |
(299) 0x41db29 LEA (%RAX,%RCX,8),%R11 |
(299) 0x41db2d MOVZX 0x1f(%RSP),%ECX |
(299) 0x41db32 POPCNT %ECX,%ECX |
(299) 0x41db36 VCOMPRESSPD %YMM14,(%R11,%RCX,8){%K5} |
(299) 0x41db3d KANDW %K0,%K3,%K3 |
(299) 0x41db41 KSHIFTRB $0x4,%K0,%K0 |
(299) 0x41db47 KANDW %K0,%K1,%K1 |
(299) 0x41db4b KMOVB %K3,0x1e(%RSP) |
(299) 0x41db51 MOVZX 0x1e(%RSP),%ECX |
(299) 0x41db56 POPCNT %ECX,%ECX |
(299) 0x41db5a VCOMPRESSPD %YMM12,(%RAX,%RCX,8){%K1} |
(299) 0x41db61 VCOMPRESSPD %YMM9,(%RAX){%K3} |
(299) 0x41db67 VPMOVM2D %K2,%YMM0 |
(299) 0x41db6d VPMOVM2D %K4,%YMM1 |
(299) 0x41db73 VPSUBD %YMM0,%YMM10,%YMM0 |
(299) 0x41db77 VPSUBD %YMM1,%YMM6,%YMM1 |
(299) 0x41db7b VPADDD %YMM1,%YMM0,%YMM0 |
(299) 0x41db7f VEXTRACTI128 $0x1,%YMM0,%XMM1 |
(299) 0x41db85 VPADDD %YMM1,%YMM0,%YMM0 |
(299) 0x41db89 VPSHUFD $-0x12,%XMM0,%XMM1 |
(299) 0x41db8e VPADDD %YMM1,%YMM0,%YMM0 |
(299) 0x41db92 VPSHUFD $0x55,%XMM0,%XMM1 |
(299) 0x41db97 VPADDD %YMM1,%YMM0,%YMM9 |
(299) 0x41db9b VPBLENDW $0x3,%XMM9,%XMM11,%XMM10 |
(299) 0x41dba1 ADD $0x10,%RBX |
(299) 0x41dba5 CMP %R10,%RBX |
(299) 0x41dba8 JB 41da50 |
0x41dbae VMOVD %XMM9,%EBX |
0x41dbb2 CMP %R15,%R10 |
0x41dbb5 MOV 0x40(%RSP),%R11 |
0x41dbba MOV 0x20(%RSP),%R8 |
0x41dbbf MOV 0x28(%RBP),%R12 |
0x41dbc3 JNE 41e2cf |
0x41dbc9 TEST %EBX,%EBX |
0x41dbcb JLE 41e76a |
0x41dbd1 MOV 0x18(%RBP),%RDX |
0x41dbd5 MOV 0x10(%RBP),%RSI |
0x41dbd9 MOV 0x218(%RDI),%RAX |
0x41dbe0 VMOVSD 0x128(%RDI),%XMM25 |
0x41dbe7 VMOVSD 0x130(%RDI),%XMM9 |
0x41dbef VMOVSD 0x148(%RDI),%XMM13 |
0x41dbf7 VMOVSD 0x150(%RDI),%XMM22 |
0x41dbfe VMOVSD 0x168(%RDI),%XMM15 |
0x41dc06 VMOVSD 0x170(%RDI),%XMM10 |
0x41dc0e VMOVSD 0x188(%RDI),%XMM31 |
0x41dc15 VMOVSD 0x190(%RDI),%XMM17 |
0x41dc1c VMOVSD 0xa0(%RDI),%XMM27 |
0x41dc23 VMOVSD 0xa8(%RDI),%XMM0 |
0x41dc2b VMOVUPS %XMM0,0x40(%RSP) |
0x41dc31 VMOVSD 0xb0(%RDI),%XMM0 |
0x41dc39 VMOVUPS %XMM0,0x20(%RSP) |
0x41dc3f VMOVSD 0xc0(%RDI),%XMM0 |
0x41dc47 VMOVUPS %XMM0,0xe0(%RSP) |
0x41dc50 VMOVSD 0xc8(%RDI),%XMM0 |
0x41dc58 VMOVUPS %XMM0,0x100(%RSP) |
0x41dc61 VMOVSD 0xd0(%RDI),%XMM0 |
0x41dc69 VMOVUPS %XMM0,0xa0(%RSP) |
0x41dc72 VMOVSD 0xe0(%RDI),%XMM0 |
0x41dc7a VMOVUPS %XMM0,0xc0(%RSP) |
0x41dc83 VMOVSD 0xe8(%RDI),%XMM12 |
0x41dc8b VMOVSD 0xf0(%RDI),%XMM18 |
0x41dc92 VMOVSD 0x100(%RDI),%XMM30 |
0x41dc99 VMOVSD 0x108(%RDI),%XMM26 |
0x41dca0 VMOVSD 0x110(%RDI),%XMM19 |
0x41dca7 VMOVSD 0x18(%RDI),%XMM20 |
0x41dcae VMOVSD 0x20(%RDI),%XMM5 |
0x41dcb3 VMOVSD 0x28(%RDI),%XMM2 |
0x41dcb8 VMOVSD 0x30(%RDI),%XMM3 |
0x41dcbd VMOVSD 0x38(%RDI),%XMM6 |
0x41dcc2 VMOVSD 0x40(%RDI),%XMM1 |
0x41dcc7 VMOVSD 0x48(%RDI),%XMM8 |
0x41dccc VMOVSD 0x50(%RDI),%XMM28 |
0x41dcd3 VMOVSD 0x58(%RDI),%XMM11 |
0x41dcd8 VMOVSD 0x60(%RDI),%XMM7 |
0x41dcdd VMOVSD 0x68(%RDI),%XMM0 |
0x41dce2 VMOVSD 0x70(%RDI),%XMM29 |
0x41dce9 MOV %EBX,%EBX |
0x41dceb MOV $-0x4,%ECX |
0x41dcf0 VMOVSD 0x78(%RDI),%XMM4 |
0x41dcf5 VMOVSD 0x80(%RDI),%XMM16 |
0x41dcfc VMOVSD 0x88(%RDI),%XMM24 |
0x41dd03 VMOVSD 0x90(%RDI),%XMM14 |
0x41dd0b VMULSD %XMM23,%XMM23,%XMM21 |
0x41dd11 AND %RBX,%RCX |
0x41dd14 VMOVUPS %XMM4,0x80(%RSP) |
0x41dd1d VMOVUPS %XMM0,0x60(%RSP) |
0x41dd23 JE 41e310 |
0x41dd29 VMOVAPD %XMM1,%XMM4 |
0x41dd2d VMOVAPD %XMM26,%XMM1 |
0x41dd33 VMOVAPD %XMM2,%XMM26 |
0x41dd39 VMOVAPD %XMM12,%XMM2 |
0x41dd3d VMOVAPD %XMM5,%XMM12 |
0x41dd41 VBROADCASTSD %XMM23,%YMM23 |
0x41dd47 VBROADCASTSD %XMM25,%YMM0 |
0x41dd4d VMOVUPD %YMM0,0x3c0(%RSP) |
0x41dd56 VBROADCASTSD %XMM9,%YMM0 |
0x41dd5b VMOVUPD %YMM0,0x1e0(%RSP) |
0x41dd64 VBROADCASTSD %XMM13,%YMM0 |
0x41dd69 VMOVUPS %YMM0,0x200(%RSP) |
0x41dd72 VBROADCASTSD %XMM22,%YMM0 |
0x41dd78 VMOVUPD %YMM0,0x1c0(%RSP) |
0x41dd81 VBROADCASTSD %XMM15,%YMM0 |
0x41dd86 VMOVUPD %YMM0,0x3a0(%RSP) |
0x41dd8f VBROADCASTSD %XMM10,%YMM0 |
0x41dd94 VMOVUPD %YMM0,0x1a0(%RSP) |
0x41dd9d VBROADCASTSD %XMM31,%YMM0 |
0x41dda3 VMOVUPD %YMM0,0x3e0(%RSP) |
0x41ddac VBROADCASTSD %XMM17,%YMM0 |
0x41ddb2 VMOVUPD %YMM0,0x140(%RSP) |
0x41ddbb VBROADCASTSD %XMM21,%YMM0 |
0x41ddc1 VMOVUPD %YMM0,0x160(%RSP) |
0x41ddca VBROADCASTSD %XMM27,%YMM0 |
0x41ddd0 VMOVUPD %YMM0,0x180(%RSP) |
0x41ddd9 VBROADCASTSD 0x40(%RSP),%YMM0 |
0x41dde0 VMOVUPS %YMM0,0x120(%RSP) |
0x41dde9 VBROADCASTSD 0x20(%RSP),%YMM0 |
0x41ddf0 VMOVUPS %YMM0,0x380(%RSP) |
0x41ddf9 VBROADCASTSD 0xe0(%RSP),%YMM0 |
0x41de03 VMOVUPS %YMM0,0xe0(%RSP) |
0x41de0c VBROADCASTSD 0x100(%RSP),%YMM0 |
0x41de16 VMOVUPS %YMM0,0x100(%RSP) |
0x41de1f VBROADCASTSD 0xa0(%RSP),%YMM0 |
0x41de29 VMOVUPS %YMM0,0x20(%RSP) |
0x41de2f VBROADCASTSD 0xc0(%RSP),%YMM0 |
0x41de39 VMOVUPS %YMM0,0x40(%RSP) |
0x41de3f VBROADCASTSD %XMM2,%YMM5 |
0x41de44 VBROADCASTSD %XMM18,%YMM13 |
0x41de4a VBROADCASTSD %XMM30,%YMM15 |
0x41de50 VBROADCASTSD %XMM1,%YMM1 |
0x41de55 VBROADCASTSD %XMM19,%YMM10 |
0x41de5b VBROADCASTSD %XMM20,%YMM27 |
0x41de61 VBROADCASTSD %XMM12,%YMM9 |
0x41de66 VBROADCASTSD %XMM26,%YMM31 |
0x41de6c VBROADCASTSD %XMM3,%YMM30 |
0x41de72 VBROADCASTSD %XMM6,%YMM12 |
0x41de77 VBROADCASTSD %XMM4,%YMM3 |
0x41de7c VBROADCASTSD %XMM8,%YMM4 |
0x41de81 VMOVAPD %XMM11,%XMM0 |
0x41de85 VBROADCASTSD %XMM28,%YMM11 |
0x41de8b VBROADCASTSD %XMM0,%YMM8 |
0x41de90 VBROADCASTSD %XMM7,%YMM2 |
0x41de95 VBROADCASTSD 0x60(%RSP),%YMM28 |
0x41de9d VBROADCASTSD %XMM29,%YMM0 |
0x41dea3 VBROADCASTSD 0x80(%RSP),%YMM29 |
0x41deab VBROADCASTSD %XMM16,%YMM18 |
0x41deb1 VBROADCASTSD %XMM24,%YMM16 |
0x41deb7 VBROADCASTSD %XMM14,%YMM14 |
0x41debc XOR %EDI,%EDI |
0x41debe VPBROADCASTQ %R11,%YMM7 |
0x41dec4 VPBROADCASTQ %RAX,%YMM6 |
0x41deca VMOVDQU %YMM6,0x400(%RSP) |
0x41ded3 VPBROADCASTQ %RDX,%YMM6 |
0x41ded9 VMOVDQU %YMM6,0x420(%RSP) |
0x41dee2 VPBROADCASTQ %RSI,%YMM6 |
0x41dee8 VMOVDQU %YMM6,0x460(%RSP) |
0x41def1 VPBROADCASTQ %R9,%YMM6 |
0x41def7 VMOVDQU %YMM6,0xa0(%RSP) |
(297) 0x41df00 VMOVUPD (%R14,%RDI,8),%YMM19 |
(297) 0x41df07 VMULPD %YMM23,%YMM19,%YMM6 |
(297) 0x41df0d VCVTTPD2DQ %YMM6,%XMM21 |
(297) 0x41df13 KXNORW %K0,%K0,%K1 |
(297) 0x41df17 VXORPD %XMM24,%XMM24,%XMM24 |
(297) 0x41df1d KXNORW %K0,%K0,%K2 |
(297) 0x41df21 VGATHERDPD (%RAX,%XMM21,8),%YMM24{%K1} |
(297) 0x41df28 VXORPD %XMM22,%XMM22,%XMM22 |
(297) 0x41df2e VGATHERDPD 0x8(%RAX,%XMM21,8),%YMM22{%K2} |
(297) 0x41df36 VRNDSCALEPD $0xb,%YMM6,%YMM20 |
(297) 0x41df3d VSUBPD %YMM20,%YMM6,%YMM6 |
(297) 0x41df43 VMOVAPD %YMM6,%YMM26 |
(297) 0x41df49 VMOVAPD %YMM6,%YMM25 |
(297) 0x41df4f VMOVUPD 0x120(%RSP),%YMM17 |
(297) 0x41df57 VFMADD132PD 0x180(%RSP),%YMM17,%YMM25 |
(297) 0x41df5f VFMADD213PD 0x380(%RSP),%YMM6,%YMM25 |
(297) 0x41df67 VMOVAPD %YMM6,%YMM20 |
(297) 0x41df6d VMOVUPD 0x3c0(%RSP),%YMM17 |
(297) 0x41df75 VFMADD213PD 0x1e0(%RSP),%YMM17,%YMM20 |
(297) 0x41df7d VMULPD %YMM24,%YMM20,%YMM20 |
(297) 0x41df83 VMOVUPD 0x200(%RSP),%YMM17 |
(297) 0x41df8b VFMADD213PD 0x1c0(%RSP),%YMM17,%YMM26 |
(297) 0x41df93 VFMADD213PD %YMM20,%YMM22,%YMM26 |
(297) 0x41df99 VMOVAPD %YMM6,%YMM20 |
(297) 0x41df9f VFMADD213PD %YMM9,%YMM27,%YMM20 |
(297) 0x41dfa5 VFMADD213PD %YMM31,%YMM6,%YMM20 |
(297) 0x41dfab VFMADD213PD %YMM30,%YMM6,%YMM20 |
(297) 0x41dfb1 VMULPD %YMM24,%YMM25,%YMM25 |
(297) 0x41dfb7 VMULPD %YMM24,%YMM20,%YMM20 |
(297) 0x41dfbd VMOVAPD %YMM6,%YMM24 |
(297) 0x41dfc3 VMOVUPD 0xe0(%RSP),%YMM17 |
(297) 0x41dfcb VFMADD213PD 0x100(%RSP),%YMM17,%YMM24 |
(297) 0x41dfd3 VFMADD213PD 0x20(%RSP),%YMM6,%YMM24 |
(297) 0x41dfdb VFMADD213PD %YMM25,%YMM22,%YMM24 |
(297) 0x41dfe1 VMOVAPD %YMM6,%YMM25 |
(297) 0x41dfe7 VFMADD213PD %YMM3,%YMM12,%YMM25 |
(297) 0x41dfed VFMADD213PD %YMM4,%YMM6,%YMM25 |
(297) 0x41dff3 VFMADD213PD %YMM11,%YMM6,%YMM25 |
(297) 0x41dff9 KXNORW %K0,%K0,%K1 |
(297) 0x41dffd VFMADD213PD %YMM20,%YMM22,%YMM25 |
(297) 0x41e003 VXORPD %XMM20,%XMM20,%XMM20 |
(297) 0x41e009 VGATHERDPD 0x10(%RAX,%XMM21,8),%YMM20{%K1} |
(297) 0x41e011 KXNORW %K0,%K0,%K1 |
(297) 0x41e015 VXORPD %XMM22,%XMM22,%XMM22 |
(297) 0x41e01b VGATHERDPD 0x18(%RAX,%XMM21,8),%YMM22{%K1} |
(297) 0x41e023 VMOVAPD %YMM6,%YMM21 |
(297) 0x41e029 VMOVUPD 0x3a0(%RSP),%YMM17 |
(297) 0x41e031 VFMADD213PD 0x1a0(%RSP),%YMM17,%YMM21 |
(297) 0x41e039 VFMADD213PD %YMM26,%YMM20,%YMM21 |
(297) 0x41e03f VMOVAPD %YMM6,%YMM26 |
(297) 0x41e045 VFMADD132PD 0x40(%RSP),%YMM5,%YMM26 |
(297) 0x41e04d VFMADD213PD %YMM13,%YMM6,%YMM26 |
(297) 0x41e053 VFMADD213PD %YMM24,%YMM20,%YMM26 |
(297) 0x41e059 VMOVAPD %YMM6,%YMM24 |
(297) 0x41e05f VFMADD213PD %YMM2,%YMM8,%YMM24 |
(297) 0x41e065 VFMADD213PD %YMM28,%YMM6,%YMM24 |
(297) 0x41e06b VFMADD213PD %YMM0,%YMM6,%YMM24 |
(297) 0x41e071 VFMADD213PD %YMM25,%YMM20,%YMM24 |
(297) 0x41e077 VMOVAPD %YMM6,%YMM20 |
(297) 0x41e07d VMOVAPD %YMM6,%YMM25 |
(297) 0x41e083 VFMADD213PD %YMM1,%YMM15,%YMM20 |
(297) 0x41e089 VFMADD213PD %YMM18,%YMM29,%YMM25 |
(297) 0x41e08f VFMADD213PD %YMM10,%YMM6,%YMM20 |
(297) 0x41e095 VFMADD213PD %YMM16,%YMM6,%YMM25 |
(297) 0x41e09b VFMADD213PD %YMM14,%YMM6,%YMM25 |
(297) 0x41e0a1 VMOVUPD 0x3e0(%RSP),%YMM17 |
(297) 0x41e0a9 VFMADD213PD 0x140(%RSP),%YMM17,%YMM6 |
(297) 0x41e0b1 VFMADD213PD %YMM21,%YMM22,%YMM6 |
(297) 0x41e0b7 VFMADD213PD %YMM26,%YMM22,%YMM20 |
(297) 0x41e0bd VFMADD213PD %YMM24,%YMM22,%YMM25 |
(297) 0x41e0c3 VMULPD %YMM23,%YMM20,%YMM20 |
(297) 0x41e0c9 VDIVPD %YMM19,%YMM20,%YMM19 |
(297) 0x41e0cf VPMOVSXDQ (%R12,%RDI,4),%YMM20 |
(297) 0x41e0d6 VMULPD 0x160(%RSP),%YMM6,%YMM6 |
(297) 0x41e0df VPADDQ %YMM20,%YMM7,%YMM20 |
(297) 0x41e0e5 KXNORW %K0,%K0,%K1 |
(297) 0x41e0e9 VSCATTERQPD %YMM6,(%RDX,%YMM20,8){%K1} |
(297) 0x41e0f0 KXNORW %K0,%K0,%K1 |
(297) 0x41e0f4 VSCATTERQPD %YMM19,(%RSI,%YMM20,8){%K1} |
(297) 0x41e0fb KXNORW %K0,%K0,%K1 |
(297) 0x41e0ff VSCATTERQPD %YMM25,(%R9,%YMM20,8){%K1} |
(297) 0x41e106 ADD $0x4,%RDI |
(297) 0x41e10a CMP %RCX,%RDI |
(297) 0x41e10d JB 41df00 |
0x41e113 VMOVDQU %YMM7,0x440(%RSP) |
0x41e11c VMOVUPD %YMM13,0x480(%RSP) |
0x41e125 VMOVUPD %YMM10,0xc0(%RSP) |
0x41e12e VMOVUPD 0xe0(%RSP),%YMM26 |
0x41e136 VMOVAPD %YMM15,%YMM21 |
0x41e13c VMOVAPD %YMM4,%YMM24 |
0x41e142 VMOVAPD %YMM11,%YMM20 |
0x41e148 VMOVAPD %YMM28,%YMM19 |
0x41e14e VMOVAPD %YMM23,%YMM6 |
0x41e154 VMOVUPD 0x100(%RSP),%YMM7 |
0x41e15d VMOVUPD 0x140(%RSP),%YMM4 |
0x41e166 VMOVUPD 0x1c0(%RSP),%YMM28 |
0x41e16e VMOVUPD 0x1a0(%RSP),%YMM11 |
0x41e177 VMOVUPD 0x120(%RSP),%YMM10 |
0x41e180 VMOVUPD 0x1e0(%RSP),%YMM23 |
0x41e188 VMOVUPD 0x160(%RSP),%YMM15 |
0x41e191 VMOVUPD 0x180(%RSP),%YMM13 |
0x41e19a CMP %RBX,%RCX |
0x41e19d JE 41e76a |
0x41e1a3 VMOVAPD %YMM0,%YMM22 |
0x41e1a9 VPBROADCASTQ %RBX,%YMM0 |
0x41e1af VMOVUPD %YMM27,0x320(%RSP) |
0x41e1b7 VMOVUPD %YMM29,0x80(%RSP) |
0x41e1bf VMOVUPD %YMM8,0x340(%RSP) |
0x41e1c8 VMOVUPD %YMM12,0x300(%RSP) |
0x41e1d1 VMOVUPD %YMM30,0x2e0(%RSP) |
0x41e1d9 VMOVUPD %YMM31,0x2c0(%RSP) |
0x41e1e1 VMOVUPD %YMM14,0x60(%RSP) |
0x41e1e7 VMOVUPD %YMM16,0x360(%RSP) |
0x41e1ef VMOVAPD %YMM15,%YMM17 |
0x41e1f5 VMOVAPD %YMM23,%YMM8 |
0x41e1fb VMOVAPD %YMM10,%YMM23 |
0x41e201 VMOVAPD %YMM11,%YMM10 |
0x41e206 VMOVAPD %YMM28,%YMM11 |
0x41e20c VMOVAPD %YMM4,%YMM28 |
0x41e212 VMOVAPD %YMM5,%YMM4 |
0x41e216 VMOVAPD %YMM7,%YMM5 |
0x41e21a VMOVAPD %YMM9,%YMM7 |
0x41e21e VMOVAPD %YMM1,%YMM9 |
0x41e222 VMOVAPD %YMM2,%YMM1 |
0x41e226 VMOVAPD %YMM3,%YMM2 |
0x41e22a VMOVAPD %YMM18,%YMM3 |
0x41e230 VMOVUPD %YMM22,0x2a0(%RSP) |
0x41e238 VMOVAPD %YMM6,%YMM31 |
0x41e23e VMOVUPD %YMM19,0x280(%RSP) |
0x41e246 VMOVUPD %YMM20,0x260(%RSP) |
0x41e24e VMOVDQA64 %YMM0,%YMM20 |
0x41e254 VMOVUPD %YMM24,0x220(%RSP) |
0x41e25c VMOVUPD %YMM21,0x240(%RSP) |
0x41e264 VMOVAPD %YMM26,%YMM16 |
0x41e26a VMOVAPD %YMM13,%YMM26 |
0x41e270 VMOVUPD 0x380(%RSP),%YMM12 |
0x41e279 VMOVUPD 0x3e0(%RSP),%YMM27 |
0x41e281 VMOVUPD 0x3c0(%RSP),%YMM25 |
0x41e289 VMOVUPD 0x480(%RSP),%YMM0 |
0x41e292 VMOVUPD 0x3a0(%RSP),%YMM29 |
0x41e29a VMOVUPD 0x200(%RSP),%YMM15 |
0x41e2a3 VMOVDQU 0x460(%RSP),%YMM13 |
0x41e2ac VMOVDQU 0x440(%RSP),%YMM14 |
0x41e2b5 VMOVDQU64 0x420(%RSP),%YMM18 |
0x41e2bd VMOVDQU64 0x400(%RSP),%YMM21 |
0x41e2c5 JMP 41e56b |
0x41e2ca XOR %R10D,%R10D |
0x41e2cd XOR %EBX,%EBX |
0x41e2cf SUB %EDX,%ESI |
0x41e2d1 LEA (%R8,%R11,8),%RAX |
0x41e2d5 JMP 41e2ec |
0x41e2d7 NOPW (%RAX,%RAX,1) |
(298) 0x41e2e0 INC %R10 |
(298) 0x41e2e3 CMP %R10,%R15 |
(298) 0x41e2e6 JE 41dbc9 |
(298) 0x41e2ec VMOVSD (%RAX,%R10,8),%XMM0 |
(298) 0x41e2f2 VUCOMISD %XMM0,%XMM17 |
(298) 0x41e2f8 JBE 41e2e0 |
(298) 0x41e2fa CMP %R10D,%ESI |
(298) 0x41e2fd JE 41e2e0 |
(298) 0x41e2ff MOVSXD %EBX,%RBX |
(298) 0x41e302 MOV %R10D,(%R12,%RBX,4) |
(298) 0x41e306 VMOVSD %XMM0,(%R14,%RBX,8) |
(298) 0x41e30c INC %EBX |
(298) 0x41e30e JMP 41e2e0 |
0x41e310 VBROADCASTSD %XMM23,%YMM0 |
0x41e316 VMOVUPD %YMM0,0x1e0(%RSP) |
0x41e31f VMOVUPD %XMM28,0x1a0(%RSP) |
0x41e327 VMOVUPD %XMM2,0x140(%RSP) |
0x41e330 VMOVUPD %XMM5,0x120(%RSP) |
0x41e339 VBROADCASTSD %XMM25,%YMM25 |
0x41e33f VMOVUPD %XMM1,0x180(%RSP) |
0x41e348 VMOVUPD %XMM6,0x160(%RSP) |
0x41e351 VMOVAPD %XMM3,%XMM6 |
0x41e355 VBROADCASTSD %XMM9,%YMM9 |
0x41e35a VBROADCASTSD %XMM13,%YMM0 |
0x41e35f VMOVUPS %YMM0,0x200(%RSP) |
0x41e368 VMOVUPD %XMM24,0x1c0(%RSP) |
0x41e370 VMOVAPD %XMM11,%XMM13 |
0x41e375 VBROADCASTSD %XMM22,%YMM11 |
0x41e37b VBROADCASTSD %XMM15,%YMM22 |
0x41e381 VMOVAPD %XMM7,%XMM15 |
0x41e385 VBROADCASTSD %XMM10,%YMM10 |
0x41e38a VMOVAPD %XMM16,%XMM24 |
0x41e390 VBROADCASTSD %XMM31,%YMM2 |
0x41e396 VBROADCASTSD %XMM17,%YMM28 |
0x41e39c VBROADCASTSD %XMM21,%YMM17 |
0x41e3a2 VBROADCASTSD %XMM27,%YMM16 |
0x41e3a8 VMOVAPD %XMM8,%XMM0 |
0x41e3ac VBROADCASTSD 0x40(%RSP),%YMM23 |
0x41e3b4 VBROADCASTSD 0x20(%RSP),%YMM1 |
0x41e3bb VMOVAPD %XMM18,%XMM7 |
0x41e3c1 VBROADCASTSD 0xe0(%RSP),%YMM18 |
0x41e3c9 VBROADCASTSD 0x100(%RSP),%YMM5 |
0x41e3d3 VBROADCASTSD 0xa0(%RSP),%YMM3 |
0x41e3dd VMOVUPS %YMM3,0x20(%RSP) |
0x41e3e3 VBROADCASTSD 0xc0(%RSP),%YMM3 |
0x41e3ed VMOVUPS %YMM3,0x40(%RSP) |
0x41e3f3 VMOVAPD %XMM12,%XMM3 |
0x41e3f7 VMOVAPD %YMM1,%YMM12 |
0x41e3fb VBROADCASTSD %XMM3,%YMM4 |
0x41e400 VBROADCASTSD %XMM7,%YMM3 |
0x41e405 VBROADCASTSD %XMM30,%YMM1 |
0x41e40b VMOVUPD %YMM1,0x240(%RSP) |
0x41e414 VMOVAPD %YMM9,%YMM8 |
0x41e419 VBROADCASTSD %XMM26,%YMM9 |
0x41e41f VBROADCASTSD %XMM19,%YMM1 |
0x41e425 VMOVUPD %YMM1,0xc0(%RSP) |
0x41e42e VMOVAPD %YMM2,%YMM27 |
0x41e434 VBROADCASTSD %XMM20,%YMM1 |
0x41e43a VMOVUPD %YMM1,0x320(%RSP) |
0x41e443 VBROADCASTSD 0x120(%RSP),%YMM7 |
0x41e44d VBROADCASTSD 0x140(%RSP),%YMM1 |
0x41e457 VMOVAPD %YMM16,%YMM26 |
0x41e45d VMOVUPS %YMM1,0x2c0(%RSP) |
0x41e466 VBROADCASTSD %XMM6,%YMM1 |
0x41e46b VMOVUPD %YMM1,0x2e0(%RSP) |
0x41e474 VBROADCASTSD 0x160(%RSP),%YMM1 |
0x41e47e VMOVUPS %YMM1,0x300(%RSP) |
0x41e487 VBROADCASTSD 0x180(%RSP),%YMM2 |
0x41e491 VMOVAPD %YMM18,%YMM16 |
0x41e497 VBROADCASTSD %XMM0,%YMM0 |
0x41e49c VMOVUPD %YMM0,0x220(%RSP) |
0x41e4a5 VMOVAPD %YMM3,%YMM0 |
0x41e4a9 VBROADCASTSD 0x1a0(%RSP),%YMM1 |
0x41e4b3 VMOVUPS %YMM1,0x260(%RSP) |
0x41e4bc VBROADCASTSD %XMM13,%YMM1 |
0x41e4c1 VMOVUPD %YMM1,0x340(%RSP) |
0x41e4ca VBROADCASTSD %XMM15,%YMM1 |
0x41e4cf VBROADCASTSD 0x60(%RSP),%YMM3 |
0x41e4d6 VMOVUPD 0x1e0(%RSP),%YMM31 |
0x41e4de VMOVUPD 0x200(%RSP),%YMM15 |
0x41e4e7 VMOVUPS %YMM3,0x280(%RSP) |
0x41e4f0 VBROADCASTSD %XMM29,%YMM3 |
0x41e4f6 VMOVUPD %YMM3,0x2a0(%RSP) |
0x41e4ff VBROADCASTSD 0x80(%RSP),%YMM3 |
0x41e509 VMOVUPS %YMM3,0x80(%RSP) |
0x41e512 VBROADCASTSD %XMM24,%YMM3 |
0x41e518 VBROADCASTSD 0x1c0(%RSP),%YMM6 |
0x41e522 VMOVUPS %YMM6,0x360(%RSP) |
0x41e52b VBROADCASTSD %XMM14,%YMM6 |
0x41e530 VMOVAPD %YMM22,%YMM29 |
0x41e536 VMOVUPD %YMM6,0x60(%RSP) |
0x41e53c XOR %ECX,%ECX |
0x41e53e VPBROADCASTQ %RBX,%YMM20 |
0x41e544 VPBROADCASTQ %RAX,%YMM21 |
0x41e54a VPBROADCASTQ %R11,%YMM14 |
0x41e550 VPBROADCASTQ %RDX,%YMM18 |
0x41e556 VPBROADCASTQ %RSI,%YMM13 |
0x41e55c VPBROADCASTQ %R9,%YMM6 |
0x41e562 VMOVDQU %YMM6,0xa0(%RSP) |
0x41e56b VPBROADCASTQ %RCX,%YMM19 |
0x41e571 VPADDQ 0x79c85(%RIP),%YMM19,%YMM19 |
0x41e57b VPCMPLTUQ %YMM20,%YMM19,%K1 |
0x41e582 VMOVUPD (%R14,%RCX,8),%YMM30{%K1}{z} |
0x41e589 VMULPD %YMM31,%YMM30,%YMM20 |
0x41e58f VCVTTPD2DQ %YMM20,%XMM6 |
0x41e595 VPMOVSXDQ %XMM6,%YMM6 |
0x41e59a VPSLLQ $0x3,%YMM6,%YMM6 |
0x41e59f VPADDQ %YMM6,%YMM21,%YMM6 |
0x41e5a5 KMOVQ %K1,%K2 |
0x41e5aa VXORPD %XMM22,%XMM22,%XMM22 |
0x41e5b0 VGATHERQPD (,%YMM6,1),%YMM22{%K2} |
0x41e5bb KMOVQ %K1,%K2 |
0x41e5c0 VPXORD %XMM19,%XMM19,%XMM19 |
0x41e5c6 VGATHERQPD 0x8(,%YMM6,1),%YMM19{%K2} |
0x41e5d1 KMOVQ %K1,%K2 |
0x41e5d6 VPXORD %XMM21,%XMM21,%XMM21 |
0x41e5dc VGATHERQPD 0x10(,%YMM6,1),%YMM21{%K2} |
0x41e5e7 KMOVQ %K1,%K2 |
0x41e5ec VXORPD %XMM24,%XMM24,%XMM24 |
0x41e5f2 VGATHERQPD 0x18(,%YMM6,1),%YMM24{%K2} |
0x41e5fd VRNDSCALEPD $0xb,%YMM20,%YMM6 |
0x41e604 VSUBPD %YMM6,%YMM20,%YMM20 |
0x41e60a VFMADD231PD %YMM25,%YMM20,%YMM8 |
0x41e610 VFMADD231PD %YMM15,%YMM20,%YMM11 |
0x41e616 VMULPD %YMM22,%YMM8,%YMM6 |
0x41e61c VFMADD213PD %YMM6,%YMM19,%YMM11 |
0x41e622 VFMADD231PD %YMM29,%YMM20,%YMM10 |
0x41e628 VFMADD213PD %YMM11,%YMM21,%YMM10 |
0x41e62e VFMADD231PD %YMM27,%YMM20,%YMM28 |
0x41e634 VFMADD213PD %YMM10,%YMM24,%YMM28 |
0x41e63a VMULPD %YMM28,%YMM17,%YMM8 |
0x41e640 VMOVDQU32 (%R12,%RCX,4),%XMM6{%K1}{z} |
0x41e647 VPMOVSXDQ %XMM6,%YMM6 |
0x41e64c VPADDQ %YMM6,%YMM14,%YMM14{%K1} |
0x41e652 VPSLLQ $0x3,%YMM14,%YMM6 |
0x41e658 VPADDQ %YMM6,%YMM18,%YMM10 |
0x41e65e KMOVQ %K1,%K2 |
0x41e663 VSCATTERQPD %YMM8,(,%YMM10,1){%K2} |
0x41e66e VFMADD231PD %YMM26,%YMM20,%YMM23 |
0x41e674 VFMADD213PD %YMM12,%YMM20,%YMM23 |
0x41e67a VFMADD231PD %YMM16,%YMM20,%YMM5 |
0x41e680 VFMADD213PD 0x20(%RSP),%YMM20,%YMM5 |
0x41e688 VMULPD %YMM22,%YMM23,%YMM8 |
0x41e68e VFMADD213PD %YMM8,%YMM19,%YMM5 |
0x41e694 VFMADD231PD 0x40(%RSP),%YMM20,%YMM4 |
0x41e69c VFMADD213PD %YMM0,%YMM20,%YMM4 |
0x41e6a2 VFMADD213PD %YMM5,%YMM21,%YMM4 |
0x41e6a8 VFMADD231PD 0x240(%RSP),%YMM20,%YMM9 |
0x41e6b0 VFMADD213PD 0xc0(%RSP),%YMM20,%YMM9 |
0x41e6b8 VFMADD213PD %YMM4,%YMM24,%YMM9 |
0x41e6be VMULPD %YMM31,%YMM9,%YMM0 |
0x41e6c4 VDIVPD %YMM30,%YMM0,%YMM0 |
0x41e6ca VPADDQ %YMM6,%YMM13,%YMM4 |
0x41e6ce KMOVQ %K1,%K2 |
0x41e6d3 VSCATTERQPD %YMM0,(,%YMM4,1){%K2} |
0x41e6de VFMADD231PD 0x320(%RSP),%YMM20,%YMM7 |
0x41e6e6 VFMADD213PD 0x2c0(%RSP),%YMM20,%YMM7 |
0x41e6ee VFMADD213PD 0x2e0(%RSP),%YMM20,%YMM7 |
0x41e6f6 VMULPD %YMM22,%YMM7,%YMM0 |
0x41e6fc VFMADD231PD 0x300(%RSP),%YMM20,%YMM2 |
0x41e704 VFMADD213PD 0x220(%RSP),%YMM20,%YMM2 |
0x41e70c VFMADD213PD 0x260(%RSP),%YMM20,%YMM2 |
0x41e714 VFMADD213PD %YMM0,%YMM19,%YMM2 |
0x41e71a VFMADD231PD 0x340(%RSP),%YMM20,%YMM1 |
0x41e722 VFMADD213PD 0x280(%RSP),%YMM20,%YMM1 |
0x41e72a VFMADD213PD 0x2a0(%RSP),%YMM20,%YMM1 |
0x41e732 VFMADD213PD %YMM2,%YMM21,%YMM1 |
0x41e738 VFMADD231PD 0x80(%RSP),%YMM20,%YMM3 |
0x41e740 VFMADD213PD 0x360(%RSP),%YMM20,%YMM3 |
0x41e748 VFMADD213PD 0x60(%RSP),%YMM20,%YMM3 |
0x41e750 VFMADD213PD %YMM1,%YMM24,%YMM3 |
0x41e756 VPADDQ 0xa0(%RSP),%YMM6,%YMM0 |
0x41e75f VSCATTERQPD %YMM3,(,%YMM0,1){%K1} |
0x41e76a LEA -0x28(%RBP),%RSP |
0x41e76e POP %RBX |
0x41e76f POP %R12 |
0x41e771 POP %R13 |
0x41e773 POP %R14 |
0x41e775 POP %R15 |
0x41e777 POP %RBP |
0x41e778 VZEROUPPER |
0x41e77b RET |
0x41e77c NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►61.39+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:271 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:207 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►26.73+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:271 | exec |
○ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►5.94+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:271 | exec |
○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:409 | exec |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:181 | exec |
○ | main.extracted.107 | miniqmc.cpp:375 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:374 | exec |
○ | __libc_init_first | libc.so.6 | |
►4.95+ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:222 | exec |
○ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:190 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:207 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | BsplineFunctor.h:275-339 |
Module | exec |
nb instructions | 404 |
nb uops | 474 |
loop length | 2607 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 32 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 41 |
ADD-SUB / MUL ratio | 0.14 |
micro-operation queue | 118.50 cycles |
front end | 118.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 34.67 | 34.33 | 74.50 | 74.17 | 89.00 | 81.00 | 22.00 | 74.33 |
cycles | 34.67 | 34.33 | 74.50 | 74.17 | 89.00 | 81.00 | 22.00 | 74.33 |
Cycles executing div or sqrt instructions | 8.00 |
FE+BE cycles | 167.50-168.50 |
Stall cycles | 63.99-65.00 |
LB full (events) | 68.99-69.99 |
LM full (events) | 0.02 |
SB full (events) | 0.99 |
Front-end | 118.50 |
Dispatch | 89.00 |
DIV/SQRT | 8.00 |
Overall L1 | 118.50 |
all | 50% |
load | 100% |
store | 75% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 30% |
all | 60% |
load | 38% |
store | 100% |
mul | 85% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 39% |
all | 58% |
load | 43% |
store | 97% |
mul | 85% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 37% |
all | 26% |
load | 47% |
store | 40% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 17% |
all | 32% |
load | 27% |
store | 44% |
mul | 44% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 24% |
all | 31% |
load | 28% |
store | 44% |
mul | 44% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 22% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x4c0,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB %EDX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41e76a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x28(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x238(%RDI),%XMM23 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %EDX,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD 0x8(%RDI),%XMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x10,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %R15,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41e2ca | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPBROADCASTD %ESI,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM17,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVQ %R12,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVQ %R14,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R11,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R8,%R11,8),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQU 0x7a84e(%RIP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU 0x7a866(%RIP),%YMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPXOR %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVD %XMM9,%EBX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP %R15,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x40(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JNE 41e2cf | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41e76a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x18(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x218(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x128(%RDI),%XMM25 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x130(%RDI),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x148(%RDI),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x150(%RDI),%XMM22 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x168(%RDI),%XMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x170(%RDI),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x188(%RDI),%XMM31 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x190(%RDI),%XMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0xa0(%RDI),%XMM27 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0xa8(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xb0(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xc0(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0xe0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xc8(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0x100(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xd0(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xe0(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0xc0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xe8(%RDI),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0xf0(%RDI),%XMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x100(%RDI),%XMM30 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x108(%RDI),%XMM26 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x110(%RDI),%XMM19 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x18(%RDI),%XMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x20(%RDI),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x28(%RDI),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x30(%RDI),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x38(%RDI),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x40(%RDI),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x48(%RDI),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x50(%RDI),%XMM28 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x58(%RDI),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x60(%RDI),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x68(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x70(%RDI),%XMM29 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x4,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD 0x78(%RDI),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x80(%RDI),%XMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x88(%RDI),%XMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x90(%RDI),%XMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULSD %XMM23,%XMM23,%XMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND %RBX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPS %XMM4,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPS %XMM0,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 41e310 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVAPD %XMM1,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM26,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM2,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM12,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM23,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x3c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %YMM0,0x200(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x3a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM31,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x3e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM27,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x40(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x20(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x380(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0xe0(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x100(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x100(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0xa0(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0xc0(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM19,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM28,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM0,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x60(%RSP),%YMM28 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD %XMM29,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x80(%RSP),%YMM29 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD %XMM16,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM24,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %R11,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RAX,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0x400(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPBROADCASTQ %RDX,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0x420(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPBROADCASTQ %RSI,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0x460(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPBROADCASTQ %R9,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0xa0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VMOVDQU %YMM7,0x440(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VMOVUPD %YMM13,0x480(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM10,0xc0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD 0xe0(%RSP),%YMM26 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM15,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM4,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM11,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM28,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM23,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD 0x100(%RSP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x140(%RSP),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x1c0(%RSP),%YMM28 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x1a0(%RSP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x120(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x1e0(%RSP),%YMM23 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x160(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x180(%RSP),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
CMP %RBX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41e76a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVAPD %YMM0,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RBX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVUPD %YMM27,0x320(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM29,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM8,0x340(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM12,0x300(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM30,0x2e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM31,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM14,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM16,0x360(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM15,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM23,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM10,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM11,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM28,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM4,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM5,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM7,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM9,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM1,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM3,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM18,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %YMM22,0x2a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM6,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %YMM19,0x280(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM20,0x260(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVDQA64 %YMM0,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %YMM24,0x220(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM21,0x240(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM26,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM13,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD 0x380(%RSP),%YMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x3e0(%RSP),%YMM27 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x3c0(%RSP),%YMM25 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x480(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x3a0(%RSP),%YMM29 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x200(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQU 0x460(%RSP),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU 0x440(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU64 0x420(%RSP),%YMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU64 0x400(%RSP),%YMM21 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
JMP 41e56b | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R8,%R11,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 41e2ec | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM23,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM28,0x1a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM2,0x140(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM5,0x120(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM1,0x180(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM6,0x160(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %XMM3,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %YMM0,0x200(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM24,0x1c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %XMM11,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM22,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM7,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM10,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM16,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM31,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM27,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM8,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD 0x40(%RSP),%YMM23 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0x20(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVAPD %XMM18,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD 0xe0(%RSP),%YMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0x100(%RSP),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0xa0(%RSP),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM3,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0xc0(%RSP),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM3,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %XMM12,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM1,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0x240(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM9,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM26,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM19,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0xc0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM2,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM20,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0x320(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x120(%RSP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0x140(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVAPD %YMM16,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPS %YMM1,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0x2e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x160(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM1,0x300(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x180(%RSP),%YMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVAPD %YMM18,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x220(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD 0x1a0(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM1,0x260(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0x340(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x60(%RSP),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPD 0x1e0(%RSP),%YMM31 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x200(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPS %YMM3,0x280(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM3,0x2a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x80(%RSP),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM3,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM24,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x1c0(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM6,0x360(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %YMM22,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %YMM6,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RBX,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RAX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R11,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R9,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0xa0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ 0x79c85(%RIP),%YMM19,%YMM19 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %YMM20,%YMM19,%K1 | |||||||||||
VMOVUPD (%R14,%RCX,8),%YMM30{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM31,%YMM30,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTTPD2DQ %YMM20,%XMM6 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VPMOVSXDQ %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM6,%YMM21,%YMM6 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM22,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD (,%YMM6,1),%YMM22{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXORD %XMM19,%XMM19,%XMM19 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VGATHERQPD 0x8(,%YMM6,1),%YMM19{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXORD %XMM21,%XMM21,%XMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VGATHERQPD 0x10(,%YMM6,1),%YMM21{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM24,%XMM24,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x18(,%YMM6,1),%YMM24{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VRNDSCALEPD $0xb,%YMM20,%YMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBPD %YMM6,%YMM20,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM25,%YMM20,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM20,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM22,%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM6,%YMM19,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM20,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM11,%YMM21,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM27,%YMM20,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM10,%YMM24,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM17,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU32 (%R12,%RCX,4),%XMM6{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPMOVSXDQ %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDQ %YMM6,%YMM14,%YMM14{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSLLQ $0x3,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM6,%YMM18,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM8,(,%YMM10,1){%K2} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VFMADD231PD %YMM26,%YMM20,%YMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM12,%YMM20,%YMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM16,%YMM20,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x20(%RSP),%YMM20,%YMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM22,%YMM23,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM8,%YMM19,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x40(%RSP),%YMM20,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM0,%YMM20,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM5,%YMM21,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x240(%RSP),%YMM20,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0xc0(%RSP),%YMM20,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM4,%YMM24,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM31,%YMM9,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM30,%YMM0,%YMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VPADDQ %YMM6,%YMM13,%YMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM0,(,%YMM4,1){%K2} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VFMADD231PD 0x320(%RSP),%YMM20,%YMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x2c0(%RSP),%YMM20,%YMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x2e0(%RSP),%YMM20,%YMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM22,%YMM7,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x300(%RSP),%YMM20,%YMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x220(%RSP),%YMM20,%YMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x260(%RSP),%YMM20,%YMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM0,%YMM19,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x340(%RSP),%YMM20,%YMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x280(%RSP),%YMM20,%YMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x2a0(%RSP),%YMM20,%YMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM2,%YMM21,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x80(%RSP),%YMM20,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x360(%RSP),%YMM20,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x60(%RSP),%YMM20,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM1,%YMM24,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ 0xa0(%RSP),%YMM6,%YMM0 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VSCATTERQPD %YMM3,(,%YMM0,1){%K1} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | BsplineFunctor.h:275-339 |
Module | exec |
nb instructions | 404 |
nb uops | 474 |
loop length | 2607 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 32 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 41 |
ADD-SUB / MUL ratio | 0.14 |
micro-operation queue | 118.50 cycles |
front end | 118.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 34.67 | 34.33 | 74.50 | 74.17 | 89.00 | 81.00 | 22.00 | 74.33 |
cycles | 34.67 | 34.33 | 74.50 | 74.17 | 89.00 | 81.00 | 22.00 | 74.33 |
Cycles executing div or sqrt instructions | 8.00 |
FE+BE cycles | 167.50-168.50 |
Stall cycles | 63.99-65.00 |
LB full (events) | 68.99-69.99 |
LM full (events) | 0.02 |
SB full (events) | 0.99 |
Front-end | 118.50 |
Dispatch | 89.00 |
DIV/SQRT | 8.00 |
Overall L1 | 118.50 |
all | 50% |
load | 100% |
store | 75% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 30% |
all | 60% |
load | 38% |
store | 100% |
mul | 85% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 39% |
all | 58% |
load | 43% |
store | 97% |
mul | 85% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 37% |
all | 26% |
load | 47% |
store | 40% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 17% |
all | 32% |
load | 27% |
store | 44% |
mul | 44% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 24% |
all | 31% |
load | 28% |
store | 44% |
mul | 44% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 22% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x4c0,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB %EDX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41e76a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x28(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x238(%RDI),%XMM23 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %EDX,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD 0x8(%RDI),%XMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %ECX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x10,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %R15,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41e2ca | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPBROADCASTD %ESI,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM17,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVQ %R12,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVQ %R14,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R11,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R8,%R11,8),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQU 0x7a84e(%RIP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU 0x7a866(%RIP),%YMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPXOR %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVD %XMM9,%EBX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP %R15,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x40(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JNE 41e2cf | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41e76a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x18(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x218(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x128(%RDI),%XMM25 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x130(%RDI),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x148(%RDI),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x150(%RDI),%XMM22 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x168(%RDI),%XMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x170(%RDI),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x188(%RDI),%XMM31 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x190(%RDI),%XMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0xa0(%RDI),%XMM27 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0xa8(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xb0(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xc0(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0xe0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xc8(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0x100(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xd0(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xe0(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS %XMM0,0xc0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0xe8(%RDI),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0xf0(%RDI),%XMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x100(%RDI),%XMM30 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x108(%RDI),%XMM26 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x110(%RDI),%XMM19 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x18(%RDI),%XMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x20(%RDI),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x28(%RDI),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x30(%RDI),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x38(%RDI),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x40(%RDI),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x48(%RDI),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x50(%RDI),%XMM28 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x58(%RDI),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x60(%RDI),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x68(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x70(%RDI),%XMM29 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x4,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD 0x78(%RDI),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x80(%RDI),%XMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x88(%RDI),%XMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x90(%RDI),%XMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULSD %XMM23,%XMM23,%XMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND %RBX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPS %XMM4,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPS %XMM0,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 41e310 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVAPD %XMM1,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM26,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM2,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM12,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %XMM5,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM23,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x3c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %YMM0,0x200(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x3a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM31,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x3e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM27,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x40(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x20(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x380(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0xe0(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x100(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x100(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0xa0(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0xc0(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM19,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM28,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM0,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x60(%RSP),%YMM28 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD %XMM29,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x80(%RSP),%YMM29 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD %XMM16,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM24,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %R11,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RAX,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0x400(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPBROADCASTQ %RDX,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0x420(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPBROADCASTQ %RSI,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0x460(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPBROADCASTQ %R9,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0xa0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VMOVDQU %YMM7,0x440(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VMOVUPD %YMM13,0x480(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM10,0xc0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD 0xe0(%RSP),%YMM26 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM15,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM4,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM11,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM28,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM23,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD 0x100(%RSP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x140(%RSP),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x1c0(%RSP),%YMM28 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x1a0(%RSP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x120(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x1e0(%RSP),%YMM23 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x160(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x180(%RSP),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
CMP %RBX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41e76a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVAPD %YMM0,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RBX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVUPD %YMM27,0x320(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM29,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM8,0x340(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM12,0x300(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM30,0x2e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM31,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM14,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM16,0x360(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM15,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM23,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM10,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM11,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM28,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM4,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM5,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM7,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM9,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM1,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM3,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM18,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %YMM22,0x2a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM6,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %YMM19,0x280(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM20,0x260(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVDQA64 %YMM0,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %YMM24,0x220(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %YMM21,0x240(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM26,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM13,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD 0x380(%RSP),%YMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x3e0(%RSP),%YMM27 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x3c0(%RSP),%YMM25 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x480(%RSP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x3a0(%RSP),%YMM29 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x200(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQU 0x460(%RSP),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU 0x440(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU64 0x420(%RSP),%YMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU64 0x400(%RSP),%YMM21 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
JMP 41e56b | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R8,%R11,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 41e2ec | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM23,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM28,0x1a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM2,0x140(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM5,0x120(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM1,0x180(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM6,0x160(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %XMM3,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %YMM0,0x200(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD %XMM24,0x1c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %XMM11,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM22,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM7,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM10,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM16,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM31,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM27,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM8,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD 0x40(%RSP),%YMM23 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0x20(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVAPD %XMM18,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD 0xe0(%RSP),%YMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0x100(%RSP),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0xa0(%RSP),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM3,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0xc0(%RSP),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM3,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %XMM12,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM1,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0x240(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM9,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM26,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM19,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0xc0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM2,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM20,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0x320(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x120(%RSP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0x140(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVAPD %YMM16,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPS %YMM1,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0x2e0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x160(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM1,0x300(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x180(%RSP),%YMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVAPD %YMM18,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD %XMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x220(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVAPD %YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD 0x1a0(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM1,0x260(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM1,0x340(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x60(%RSP),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPD 0x1e0(%RSP),%YMM31 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPD 0x200(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPS %YMM3,0x280(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM3,0x2a0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD 0x80(%RSP),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM3,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM24,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x1c0(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVUPS %YMM6,0x360(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD %YMM22,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVUPD %YMM6,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RBX,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RAX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R11,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R9,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %YMM6,0xa0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ 0x79c85(%RIP),%YMM19,%YMM19 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %YMM20,%YMM19,%K1 | |||||||||||
VMOVUPD (%R14,%RCX,8),%YMM30{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM31,%YMM30,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTTPD2DQ %YMM20,%XMM6 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VPMOVSXDQ %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM6,%YMM21,%YMM6 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM22,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD (,%YMM6,1),%YMM22{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXORD %XMM19,%XMM19,%XMM19 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VGATHERQPD 0x8(,%YMM6,1),%YMM19{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXORD %XMM21,%XMM21,%XMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VGATHERQPD 0x10(,%YMM6,1),%YMM21{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM24,%XMM24,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x18(,%YMM6,1),%YMM24{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VRNDSCALEPD $0xb,%YMM20,%YMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBPD %YMM6,%YMM20,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM25,%YMM20,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM20,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM22,%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM6,%YMM19,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM20,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM11,%YMM21,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM27,%YMM20,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM10,%YMM24,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM17,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU32 (%R12,%RCX,4),%XMM6{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPMOVSXDQ %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDQ %YMM6,%YMM14,%YMM14{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSLLQ $0x3,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM6,%YMM18,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM8,(,%YMM10,1){%K2} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VFMADD231PD %YMM26,%YMM20,%YMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM12,%YMM20,%YMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM16,%YMM20,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x20(%RSP),%YMM20,%YMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM22,%YMM23,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM8,%YMM19,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x40(%RSP),%YMM20,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM0,%YMM20,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM5,%YMM21,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x240(%RSP),%YMM20,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0xc0(%RSP),%YMM20,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM4,%YMM24,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM31,%YMM9,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM30,%YMM0,%YMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VPADDQ %YMM6,%YMM13,%YMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM0,(,%YMM4,1){%K2} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VFMADD231PD 0x320(%RSP),%YMM20,%YMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x2c0(%RSP),%YMM20,%YMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x2e0(%RSP),%YMM20,%YMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM22,%YMM7,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x300(%RSP),%YMM20,%YMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x220(%RSP),%YMM20,%YMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x260(%RSP),%YMM20,%YMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM0,%YMM19,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x340(%RSP),%YMM20,%YMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x280(%RSP),%YMM20,%YMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x2a0(%RSP),%YMM20,%YMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM2,%YMM21,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x80(%RSP),%YMM20,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x360(%RSP),%YMM20,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x60(%RSP),%YMM20,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM1,%YMM24,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ 0xa0(%RSP),%YMM6,%YMM0 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VSCATTERQPD %YMM3,(,%YMM0,1){%K1} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::BsplineFunctor | 0.75 | 0.51 |
○Loop 299 - BsplineFunctor.h:291-297 - exec | 0.62 | 0.42 |
○Loop 297 - BsplineFunctor.h:302-335 - exec | 0.07 | 0.05 |
○Loop 298 - BsplineFunctor.h:291-298 - exec | 0 | 0 |