Loop Id: 585 | Module: exec | Source: BsplineFunctor.h:305-336 | Coverage: 0.06% |
---|
Loop Id: 585 | Module: exec | Source: BsplineFunctor.h:305-336 | Coverage: 0.06% |
---|
0x447e50 VMOVAPD (%RSI,%RDX,2),%YMM12 [6] |
0x447e55 VMOVAPD 0x20(%RSI,%RDX,2),%YMM11 [6] |
0x447e5b KMOVB %K1,%K2 |
0x447e5f KMOVB %K1,%K4 |
0x447e63 KMOVB %K1,%K3 |
0x447e67 KMOVB %K1,%K6 |
0x447e6b KMOVB %K1,%K5 |
0x447e6f KMOVB %K1,%K7 |
0x447e73 VMULPD %YMM18,%YMM12,%YMM0 |
0x447e79 VMOVAPD 0x3c8(%RSP),%YMM24 [15] |
0x447e84 VMOVDQA (%R10,%RDX,1),%YMM4 [16] |
0x447e8a ADD $0x20,%RDX |
0x447e8e VMULPD %YMM18,%YMM11,%YMM1 |
0x447e94 VDIVPD %YMM12,%YMM25,%YMM12 |
0x447e9a VRNDSCALEPD $0xb,%YMM0,%YMM9 |
0x447ea1 VCVTTPD2DQ %YMM0,%XMM6 |
0x447ea5 VGATHERDPD (%RAX,%XMM6,8),%YMM22{%K2} [1] |
0x447eac KMOVB %K1,%K2 |
0x447eb0 VRNDSCALEPD $0xb,%YMM1,%YMM8 |
0x447eb7 VCVTTPD2DQ %YMM1,%XMM5 |
0x447ebb VINSERTI128 $0x1,%XMM5,%YMM6,%YMM13 |
0x447ec1 VMOVAPD 0x468(%RSP),%YMM5 [15] |
0x447eca VPADDD 0x108(%RSP),%YMM13,%YMM10 [15] |
0x447ed3 VPADDD 0xe8(%RSP),%YMM13,%YMM6 [15] |
0x447edc VPERM2I128 $0x11,%YMM13,%YMM13,%YMM14 |
0x447ee2 VPADDD 0xc8(%RSP),%YMM13,%YMM13 [15] |
0x447eeb VGATHERDPD (%RAX,%XMM14,8),%YMM21{%K3} [4] |
0x447ef2 KMOVB %K1,%K3 |
0x447ef6 VGATHERDPD (%RAX,%XMM10,8),%YMM23{%K4} [7] |
0x447efd VPERM2I128 $0x11,%YMM10,%YMM10,%YMM7 |
0x447f03 VMOVAPD 0x4a8(%RSP),%YMM10 [15] |
0x447f0c VPERM2I128 $0x11,%YMM6,%YMM6,%YMM14 |
0x447f12 VPERM2I128 $0x11,%YMM13,%YMM13,%YMM15 |
0x447f18 VGATHERDPD (%RAX,%XMM13,8),%YMM20{%K2} [17] |
0x447f1f KMOVB %K1,%K4 |
0x447f23 KMOVB %K1,%K2 |
0x447f27 VSUBPD %YMM9,%YMM0,%YMM0 |
0x447f2c VGATHERDPD (%RAX,%XMM6,8),%YMM9{%K6} [11] |
0x447f33 VMOVAPD 0x488(%RSP),%YMM6 [15] |
0x447f3c VGATHERDPD (%RAX,%XMM15,8),%YMM13{%K3} [5] |
0x447f43 VSUBPD %YMM8,%YMM1,%YMM1 |
0x447f48 VMOVAPD 0x448(%RSP),%YMM15 [15] |
0x447f51 VGATHERDPD (%RAX,%XMM7,8),%YMM8{%K5} [14] |
0x447f58 VGATHERDPD (%RAX,%XMM14,8),%YMM7{%K7} [3] |
0x447f5f VMOVAPD %YMM10,%YMM14 |
0x447f64 KMOVB %K1,%K5 |
0x447f68 KMOVB %K1,%K6 |
0x447f6c KMOVB %K1,%K7 |
0x447f70 VMULPD %YMM0,%YMM0,%YMM3 |
0x447f74 VFMADD132PD %YMM0,%YMM6,%YMM14 |
0x447f79 KMOVB %K1,%K3 |
0x447f7d VMULPD %YMM1,%YMM1,%YMM2 |
0x447f81 VFMADD132PD %YMM1,%YMM6,%YMM10 |
0x447f86 VMOVAPD %YMM5,%YMM6 |
0x447f8a VFMADD132PD %YMM0,%YMM15,%YMM6 |
0x447f8f VFMADD132PD %YMM1,%YMM15,%YMM5 |
0x447f94 VMOVAPD 0x428(%RSP),%YMM15 [15] |
0x447f9d VMOVAPD %YMM15,%YMM19 |
0x447fa3 VMULPD %YMM3,%YMM0,%YMM17 |
0x447fa9 VMULPD %YMM2,%YMM1,%YMM16 |
0x447faf VMULPD %YMM5,%YMM8,%YMM5 |
0x447fb3 VMULPD %YMM6,%YMM23,%YMM6 |
0x447fb9 VDIVPD %YMM11,%YMM25,%YMM11 |
0x447fbf VFMADD132PD %YMM21,%YMM5,%YMM10 |
0x447fc5 VMOVAPD 0x3e8(%RSP),%YMM5 [15] |
0x447fce VFMADD132PD %YMM22,%YMM6,%YMM14 |
0x447fd4 VMOVAPD 0x408(%RSP),%YMM6 [15] |
0x447fdd VFMADD132PD %YMM0,%YMM6,%YMM19 |
0x447fe3 VFMADD132PD %YMM1,%YMM6,%YMM15 |
0x447fe8 VMOVAPD %YMM5,%YMM6 |
0x447fec VFMADD132PD %YMM0,%YMM24,%YMM6 |
0x447ff2 VFMADD132PD %YMM1,%YMM24,%YMM5 |
0x447ff8 VMULPD %YMM18,%YMM12,%YMM12 |
0x447ffe VMULPD %YMM6,%YMM20,%YMM6 |
0x448004 VMULPD %YMM5,%YMM13,%YMM5 |
0x448008 VFMADD231PD %YMM19,%YMM9,%YMM6 |
0x44800e VMOVDQA32 %XMM4,%XMM19 |
0x448014 VFMADD231PD %YMM15,%YMM7,%YMM5 |
0x448019 VMOVAPD 0x3a8(%RSP),%YMM15 [15] |
0x448022 VADDPD %YMM14,%YMM6,%YMM14 |
0x448027 VADDPD %YMM10,%YMM5,%YMM10 |
0x44802c VMULPD %YMM15,%YMM14,%YMM6 |
0x448031 VMULPD %YMM15,%YMM10,%YMM5 |
0x448036 VMULPD %YMM18,%YMM11,%YMM11 |
0x44803c VSCATTERDPD %YMM6,(%RBX,%XMM4,8){%K4} [2] |
0x448043 VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 |
0x448049 VSCATTERDPD %YMM5,(%RBX,%XMM4,8){%K5} [9] |
0x448050 VMOVAPD 0x368(%RSP),%YMM5 [15] |
0x448059 VMOVAPD 0x348(%RSP),%YMM14 [15] |
0x448062 VMOVAPD 0x388(%RSP),%YMM15 [15] |
0x44806b VMOVAPD 0x308(%RSP),%YMM10 [15] |
0x448074 VMOVAPD %YMM5,%YMM6 |
0x448078 VFMADD132PD %YMM1,%YMM14,%YMM5 |
0x44807d VFMADD132PD %YMM0,%YMM14,%YMM6 |
0x448082 VMOVAPD %YMM10,%YMM14 |
0x448087 VFMADD231PD %YMM15,%YMM2,%YMM5 |
0x44808c VFMADD231PD %YMM15,%YMM3,%YMM6 |
0x448091 VMOVAPD 0x2e8(%RSP),%YMM15 [15] |
0x44809a VFMADD132PD %YMM0,%YMM15,%YMM14 |
0x44809f VFMADD132PD %YMM1,%YMM15,%YMM10 |
0x4480a4 VMOVAPD 0x328(%RSP),%YMM15 [15] |
0x4480ad VFMADD231PD %YMM15,%YMM3,%YMM14 |
0x4480b2 VFMADD231PD %YMM15,%YMM2,%YMM10 |
0x4480b7 VMULPD %YMM14,%YMM23,%YMM14 |
0x4480bd VMULPD %YMM10,%YMM8,%YMM10 |
0x4480c2 VFMADD132PD %YMM22,%YMM14,%YMM6 |
0x4480c8 VMOVAPD 0x2a8(%RSP),%YMM14 [15] |
0x4480d1 VMOVAPD 0x288(%RSP),%YMM15 [15] |
0x4480da VFMADD132PD %YMM21,%YMM10,%YMM5 |
0x4480e0 VMOVAPD 0x2c8(%RSP),%YMM10 [15] |
0x4480e9 VMOVAPD %YMM14,%YMM24 |
0x4480ef VFMADD132PD %YMM1,%YMM15,%YMM14 |
0x4480f4 VFMADD132PD %YMM0,%YMM15,%YMM24 |
0x4480fa VMOVAPD 0x268(%RSP),%YMM15 [15] |
0x448103 VFMADD231PD %YMM10,%YMM2,%YMM14 |
0x448108 VFMADD231PD %YMM10,%YMM3,%YMM24 |
0x44810e VMOVAPD %YMM15,%YMM10 |
0x448113 VFMADD213PD 0x4c8(%RSP),%YMM0,%YMM10 [15] |
0x44811d VFMADD213PD 0x4c8(%RSP),%YMM1,%YMM15 [15] |
0x448127 VFMADD231PD 0x4e8(%RSP),%YMM3,%YMM10 [15] |
0x448131 VFMADD231PD 0x4e8(%RSP),%YMM2,%YMM15 [15] |
0x44813b VMULPD %YMM10,%YMM20,%YMM10 |
0x448141 VMULPD %YMM15,%YMM13,%YMM15 |
0x448146 VFMADD231PD %YMM24,%YMM9,%YMM10 |
0x44814c VFMADD132PD %YMM7,%YMM15,%YMM14 |
0x448151 VADDPD %YMM10,%YMM6,%YMM6 |
0x448156 VADDPD %YMM14,%YMM5,%YMM5 |
0x44815b VMULPD %YMM12,%YMM6,%YMM12 |
0x448160 VMULPD %YMM11,%YMM5,%YMM11 |
0x448165 VSCATTERDPD %YMM12,(%R8,%XMM19,8){%K6} [13] |
0x44816c VSCATTERDPD %YMM11,(%R8,%XMM4,8){%K7} [12] |
0x448173 VMOVAPD 0x228(%RSP),%YMM14 [15] |
0x44817c VMOVAPD 0x208(%RSP),%YMM6 [15] |
0x448185 VMOVAPD 0x248(%RSP),%YMM15 [15] |
0x44818e VMOVAPD 0x1e8(%RSP),%YMM12 [15] |
0x448197 VMULPD %YMM14,%YMM3,%YMM10 |
0x44819c VMOVAPD %YMM6,%YMM11 |
0x4481a0 VMULPD %YMM14,%YMM2,%YMM5 |
0x4481a5 VFMADD132PD %YMM0,%YMM12,%YMM11 |
0x4481aa VMULPD %YMM31,%YMM2,%YMM14 |
0x4481b0 VFMADD132PD %YMM1,%YMM12,%YMM6 |
0x4481b5 VMULPD %YMM31,%YMM3,%YMM12 |
0x4481bb VFMADD231PD %YMM15,%YMM17,%YMM10 |
0x4481c1 VFMADD231PD %YMM15,%YMM16,%YMM5 |
0x4481c7 VMOVAPD 0x188(%RSP),%YMM15 [15] |
0x4481d0 VADDPD %YMM10,%YMM11,%YMM11 |
0x4481d5 VMOVAPD 0x1c8(%RSP),%YMM10 [15] |
0x4481de VADDPD %YMM5,%YMM6,%YMM6 |
0x4481e2 VFMADD231PD %YMM10,%YMM17,%YMM12 |
0x4481e8 VFMADD231PD %YMM10,%YMM16,%YMM14 |
0x4481ee VMOVAPD 0x1a8(%RSP),%YMM10 [15] |
0x4481f7 VMOVAPD %YMM10,%YMM5 |
0x4481fb VFMADD132PD %YMM1,%YMM15,%YMM10 |
0x448200 VFMADD132PD %YMM0,%YMM15,%YMM5 |
0x448205 VADDPD %YMM14,%YMM10,%YMM14 |
0x44820a VMOVAPD 0x168(%RSP),%YMM10 [15] |
0x448213 VADDPD %YMM12,%YMM5,%YMM12 |
0x448218 VMULPD %YMM14,%YMM8,%YMM8 |
0x44821d VMOVAPD 0x128(%RSP),%YMM14 [15] |
0x448226 VMULPD %YMM23,%YMM12,%YMM5 |
0x44822c VMOVAPD %YMM14,%YMM12 |
0x448231 VFMADD132PD %YMM1,%YMM30,%YMM14 |
0x448237 VFMADD132PD %YMM0,%YMM30,%YMM12 |
0x44823d VFMADD132PD %YMM27,%YMM26,%YMM1 |
0x448243 VFMADD132PD %YMM27,%YMM26,%YMM0 |
0x448249 VFMADD231PD %YMM6,%YMM21,%YMM8 |
0x44824f VMOVAPD 0x148(%RSP),%YMM6 [15] |
0x448258 VFMADD231PD %YMM11,%YMM22,%YMM5 |
0x44825e VMULPD %YMM6,%YMM2,%YMM15 |
0x448262 VMULPD %YMM6,%YMM3,%YMM11 |
0x448266 VMULPD %YMM28,%YMM2,%YMM2 |
0x44826c VMULPD %YMM28,%YMM3,%YMM3 |
0x448272 VFMADD231PD %YMM10,%YMM16,%YMM15 |
0x448278 VFMADD231PD %YMM10,%YMM17,%YMM11 |
0x44827e VFMADD231PD %YMM29,%YMM16,%YMM2 |
0x448284 VFMADD231PD %YMM29,%YMM17,%YMM3 |
0x44828a VADDPD %YMM15,%YMM14,%YMM6 |
0x44828f VADDPD %YMM11,%YMM12,%YMM11 |
0x448294 VADDPD %YMM1,%YMM2,%YMM1 |
0x448298 VADDPD %YMM3,%YMM0,%YMM0 |
0x44829c VMULPD %YMM1,%YMM13,%YMM13 |
0x4482a0 VMULPD %YMM20,%YMM0,%YMM10 |
0x4482a6 VFMADD132PD %YMM6,%YMM13,%YMM7 |
0x4482ab VFMADD132PD %YMM11,%YMM10,%YMM9 |
0x4482b0 VADDPD %YMM8,%YMM7,%YMM7 |
0x4482b5 VADDPD %YMM5,%YMM9,%YMM9 |
0x4482b9 VSCATTERDPD %YMM9,(%RDI,%XMM19,8){%K2} [8] |
0x4482c0 VSCATTERDPD %YMM7,(%RDI,%XMM4,8){%K3} [10] |
0x4482c7 CMP %RDX,%R14 |
0x4482ca JNE 447e50 |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 305 - 336 |
-------------------------------------------------------------------------------- |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►36.36+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:266 | exec |
○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:300 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:207 | exec |
○ | main._omp_fn.1 | miniqmc.cpp:438 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►27.27+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:266 | exec |
○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:312 | exec |
○ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
○ | main._omp_fn.1 | stl_vector.h:1126 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►27.27+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:266 | exec |
○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:300 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:207 | exec |
○ | main._omp_fn.1 | miniqmc.cpp:438 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►9.09+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:266 | exec |
○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:312 | exec |
○ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
○ | main._omp_fn.1 | stl_vector.h:1126 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.05 |
CQA speedup if FP arith vectorized | 1.51 |
CQA speedup if fully vectorized | 2.20 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.25 |
Bottlenecks | micro-operation queue, |
Function | qmcplusplus::BsplineFunctor |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 81.25 |
CQA cycles if no scalar integer | 77.75 |
CQA cycles if FP arith vectorized | 53.67 |
CQA cycles if fully vectorized | 36.88 |
Front-end cycles | 81.25 |
DIV/SQRT cycles | 65.00 |
P0 cycles | 65.00 |
P1 cycles | 47.50 |
P2 cycles | 47.50 |
P3 cycles | 24.00 |
P4 cycles | 25.00 |
P5 cycles | 2.00 |
P6 cycles | 0.00 |
P7 cycles | 16.00 |
Inter-iter dependencies cycles | 20 |
FE+BE cycles (UFS) | 82.02 |
Stall cycles (UFS) | 29.76 |
Nb insns | 190.00 |
Nb uops | 325.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 34.00 |
FLOP/cycle | 7.58 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 128.00 |
Nb FLOP fma | 208.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.87 |
Bytes prefetched | 0.00 |
Bytes loaded | 1504.00 |
Bytes stored | 192.00 |
Stride 0 | 9.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 100.00 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 100.00 |
Vector-efficiency ratio all | 48.99 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 50.00 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 44.17 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.05 |
CQA speedup if FP arith vectorized | 1.51 |
CQA speedup if fully vectorized | 2.20 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.25 |
Bottlenecks | micro-operation queue, |
Function | qmcplusplus::BsplineFunctor |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 81.25 |
CQA cycles if no scalar integer | 77.75 |
CQA cycles if FP arith vectorized | 53.67 |
CQA cycles if fully vectorized | 36.88 |
Front-end cycles | 81.25 |
DIV/SQRT cycles | 65.00 |
P0 cycles | 65.00 |
P1 cycles | 47.50 |
P2 cycles | 47.50 |
P3 cycles | 24.00 |
P4 cycles | 25.00 |
P5 cycles | 2.00 |
P6 cycles | 0.00 |
P7 cycles | 16.00 |
Inter-iter dependencies cycles | 20 |
FE+BE cycles (UFS) | 82.02 |
Stall cycles (UFS) | 29.76 |
Nb insns | 190.00 |
Nb uops | 325.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 34.00 |
FLOP/cycle | 7.58 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 128.00 |
Nb FLOP fma | 208.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.87 |
Bytes prefetched | 0.00 |
Bytes loaded | 1504.00 |
Bytes stored | 192.00 |
Stride 0 | 9.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 100.00 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 100.00 |
Vector-efficiency ratio all | 48.99 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 50.00 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 44.17 |
Path / |
Function | qmcplusplus::BsplineFunctor |
Source file and lines | BsplineFunctor.h:305-336 |
Module | exec |
nb instructions | 190 |
nb uops | 325 |
loop length | 1152 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 34 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 81.25 cycles |
front end | 81.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 65.00 | 65.00 | 47.50 | 47.50 | 24.00 | 25.00 | 2.00 | 0.00 |
cycles | 65.00 | 65.00 | 47.50 | 47.50 | 24.00 | 25.00 | 2.00 | 0.00 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 20.00 |
FE+BE cycles | 82.02 |
Stall cycles | 29.76 |
RS full (events) | 4.59 |
PRF_FLOAT full (events) | 31.54 |
Front-end | 81.25 |
Dispatch | 65.00 |
DIV/SQRT | 16.00 |
Data deps. | 20.00 |
Overall L1 | 81.25 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 34% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 50% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 50% |
all | 48% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 44% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%RSI,%RDX,2),%YMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x20(%RSI,%RDX,2),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM18,%YMM12,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3c8(%RSP),%YMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQA (%R10,%RDX,1),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x20,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULPD %YMM18,%YMM11,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM12,%YMM25,%YMM12 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VRNDSCALEPD $0xb,%YMM0,%YMM9 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM0,%XMM6 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%XMM6,8),%YMM22{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VRNDSCALEPD $0xb,%YMM1,%YMM8 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM1,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VINSERTI128 $0x1,%XMM5,%YMM6,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x468(%RSP),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VPADDD 0x108(%RSP),%YMM13,%YMM10 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPADDD 0xe8(%RSP),%YMM13,%YMM6 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPERM2I128 $0x11,%YMM13,%YMM13,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0xc8(%RSP),%YMM13,%YMM13 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VGATHERDPD (%RAX,%XMM14,8),%YMM21{%K3} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%XMM10,8),%YMM23{%K4} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VPERM2I128 $0x11,%YMM10,%YMM10,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x4a8(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPERM2I128 $0x11,%YMM13,%YMM13,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%XMM13,8),%YMM20{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %YMM9,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%XMM6,8),%YMM9{%K6} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VMOVAPD 0x488(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%XMM15,8),%YMM13{%K3} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VSUBPD %YMM8,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x448(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%XMM7,8),%YMM8{%K5} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VGATHERDPD (%RAX,%XMM14,8),%YMM7{%K7} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VMOVAPD %YMM10,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM0,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM6,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM1,%YMM1,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM6,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM0,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x428(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM15,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %YMM3,%YMM0,%YMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM2,%YMM1,%YMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM5,%YMM8,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM23,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM11,%YMM25,%YMM11 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VFMADD132PD %YMM21,%YMM5,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3e8(%RSP),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM22,%YMM6,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x408(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM0,%YMM6,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM6,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM0,%YMM24,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM24,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM12,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM20,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM5,%YMM13,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM19,%YMM9,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %XMM4,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %YMM15,%YMM7,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3a8(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM14,%YMM6,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM10,%YMM5,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM10,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM6,(%RBX,%XMM4,8){%K4} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSCATTERDPD %YMM5,(%RBX,%XMM4,8){%K5} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VMOVAPD 0x368(%RSP),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x348(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x388(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x308(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM14,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM10,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %YMM15,%YMM2,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM3,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2e8(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x328(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %YMM15,%YMM3,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM2,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM10,%YMM8,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM22,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2a8(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x288(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM21,%YMM10,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2c8(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM14,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x268(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %YMM10,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM3,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD213PD 0x4c8(%RSP),%YMM0,%YMM10 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x4c8(%RSP),%YMM1,%YMM15 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x4e8(%RSP),%YMM3,%YMM10 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x4e8(%RSP),%YMM2,%YMM15 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM10,%YMM20,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM13,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM24,%YMM9,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM7,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM10,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM12,%YMM6,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM11,%YMM5,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM12,(%R8,%XMM19,8){%K6} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VSCATTERDPD %YMM11,(%R8,%XMM4,8){%K7} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VMOVAPD 0x228(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x208(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x248(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x1e8(%RSP),%YMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM14,%YMM3,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM6,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %YMM14,%YMM2,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM12,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM31,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM12,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM31,%YMM3,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM17,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM16,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x188(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM10,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1c8(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM5,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM17,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM16,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1a8(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM10,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM15,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM10,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x168(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM12,%YMM5,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM8,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x128(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM23,%YMM12,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM14,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM30,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM30,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM27,%YMM26,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM27,%YMM26,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM21,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x148(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %YMM11,%YMM22,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM2,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM3,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM16,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM17,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM16,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM17,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM15,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM11,%YMM12,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM1,%YMM2,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM3,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM1,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM20,%YMM0,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM6,%YMM13,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM11,%YMM10,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM8,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM5,%YMM9,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM9,(%RDI,%XMM19,8){%K2} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VSCATTERDPD %YMM7,(%RDI,%XMM4,8){%K3} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
CMP %RDX,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 447e50 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
Function | qmcplusplus::BsplineFunctor |
Source file and lines | BsplineFunctor.h:305-336 |
Module | exec |
nb instructions | 190 |
nb uops | 325 |
loop length | 1152 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 34 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 81.25 cycles |
front end | 81.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 65.00 | 65.00 | 47.50 | 47.50 | 24.00 | 25.00 | 2.00 | 0.00 |
cycles | 65.00 | 65.00 | 47.50 | 47.50 | 24.00 | 25.00 | 2.00 | 0.00 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 20.00 |
FE+BE cycles | 82.02 |
Stall cycles | 29.76 |
RS full (events) | 4.59 |
PRF_FLOAT full (events) | 31.54 |
Front-end | 81.25 |
Dispatch | 65.00 |
DIV/SQRT | 16.00 |
Data deps. | 20.00 |
Overall L1 | 81.25 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 34% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 50% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 50% |
all | 48% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 44% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%RSI,%RDX,2),%YMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x20(%RSI,%RDX,2),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM18,%YMM12,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3c8(%RSP),%YMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQA (%R10,%RDX,1),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x20,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULPD %YMM18,%YMM11,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM12,%YMM25,%YMM12 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VRNDSCALEPD $0xb,%YMM0,%YMM9 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM0,%XMM6 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%XMM6,8),%YMM22{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VRNDSCALEPD $0xb,%YMM1,%YMM8 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM1,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VINSERTI128 $0x1,%XMM5,%YMM6,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x468(%RSP),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VPADDD 0x108(%RSP),%YMM13,%YMM10 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPADDD 0xe8(%RSP),%YMM13,%YMM6 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPERM2I128 $0x11,%YMM13,%YMM13,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0xc8(%RSP),%YMM13,%YMM13 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VGATHERDPD (%RAX,%XMM14,8),%YMM21{%K3} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%XMM10,8),%YMM23{%K4} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VPERM2I128 $0x11,%YMM10,%YMM10,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x4a8(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPERM2I128 $0x11,%YMM13,%YMM13,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%XMM13,8),%YMM20{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %YMM9,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%XMM6,8),%YMM9{%K6} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VMOVAPD 0x488(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%XMM15,8),%YMM13{%K3} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VSUBPD %YMM8,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x448(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%XMM7,8),%YMM8{%K5} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VGATHERDPD (%RAX,%XMM14,8),%YMM7{%K7} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VMOVAPD %YMM10,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM0,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM6,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM1,%YMM1,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM6,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM0,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x428(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM15,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %YMM3,%YMM0,%YMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM2,%YMM1,%YMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM5,%YMM8,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM23,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM11,%YMM25,%YMM11 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VFMADD132PD %YMM21,%YMM5,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3e8(%RSP),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM22,%YMM6,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x408(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM0,%YMM6,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM6,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM0,%YMM24,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM24,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM12,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM20,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM5,%YMM13,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM19,%YMM9,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %XMM4,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %YMM15,%YMM7,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3a8(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM14,%YMM6,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM10,%YMM5,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM10,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM6,(%RBX,%XMM4,8){%K4} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSCATTERDPD %YMM5,(%RBX,%XMM4,8){%K5} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VMOVAPD 0x368(%RSP),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x348(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x388(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x308(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM14,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM10,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %YMM15,%YMM2,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM3,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2e8(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x328(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %YMM15,%YMM3,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM2,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM10,%YMM8,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM22,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2a8(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x288(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM21,%YMM10,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2c8(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM14,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x268(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %YMM10,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM3,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD213PD 0x4c8(%RSP),%YMM0,%YMM10 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x4c8(%RSP),%YMM1,%YMM15 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x4e8(%RSP),%YMM3,%YMM10 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x4e8(%RSP),%YMM2,%YMM15 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM10,%YMM20,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM13,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM24,%YMM9,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM7,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM10,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM12,%YMM6,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM11,%YMM5,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM12,(%R8,%XMM19,8){%K6} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VSCATTERDPD %YMM11,(%R8,%XMM4,8){%K7} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VMOVAPD 0x228(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x208(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x248(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x1e8(%RSP),%YMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM14,%YMM3,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM6,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %YMM14,%YMM2,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM12,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM31,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM12,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM31,%YMM3,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM17,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM16,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x188(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM10,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1c8(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM5,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM17,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM16,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1a8(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM10,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM15,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM10,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x168(%RSP),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM12,%YMM5,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM8,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x128(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM23,%YMM12,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM14,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM30,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM30,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM27,%YMM26,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM27,%YMM26,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM21,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x148(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %YMM11,%YMM22,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM2,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM3,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM16,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM17,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM16,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM17,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM15,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM11,%YMM12,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM1,%YMM2,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM3,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM1,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM20,%YMM0,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM6,%YMM13,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM11,%YMM10,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM8,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM5,%YMM9,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM9,(%RDI,%XMM19,8){%K2} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VSCATTERDPD %YMM7,(%RDI,%XMM4,8){%K3} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
CMP %RDX,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 447e50 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |